LLVM 23.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
318 MachineIRBuilder &MIRBuilder) const;
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
353 MachineIRBuilder &MIB) const;
358 MachineIRBuilder &MIB) const;
360 bool Negate, Register CCOp,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424 const MachineRegisterInfo &MRI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486 ComplexRendererFns
487 selectCVTFixedPosRecipOperandVec(MachineOperand &Root) const;
488 ComplexRendererFns
489 selectCVTFixedPointVecBase(const MachineOperand &Root,
490 bool isReciprocal = false) const;
491 void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFixedPointRecipXForm(MachineInstrBuilder &MIB,
494 const MachineInstr &MI, int OpIdx = -1) const;
495
496 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
497 int OpIdx = -1) const;
498 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
499 int OpIdx = -1) const;
500 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
501 int OpIdx = -1) const;
502 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
503 int OpIdx) const;
504 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
505 int OpIdx = -1) const;
506 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
507 int OpIdx = -1) const;
508 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
509 int OpIdx = -1) const;
510 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
511 const MachineInstr &MI,
512 int OpIdx = -1) const;
513
514 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
515 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
516
517 // Optimization methods.
518 bool tryOptSelect(GSelect &Sel);
519 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
520 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
522 MachineIRBuilder &MIRBuilder) const;
523
524 /// Return true if \p MI is a load or store of \p NumBytes bytes.
525 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
526
527 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
528 /// register zeroed out. In other words, the result of MI has been explicitly
529 /// zero extended.
530 bool isDef32(const MachineInstr &MI) const;
531
532 const AArch64TargetMachine &TM;
533 const AArch64Subtarget &STI;
534 const AArch64InstrInfo &TII;
536 const AArch64RegisterBankInfo &RBI;
537
538 bool ProduceNonFlagSettingCondBr = false;
539
540 // Some cached values used during selection.
541 // We use LR as a live-in register, and we keep track of it here as it can be
542 // clobbered by calls.
543 Register MFReturnAddr;
544
546
547#define GET_GLOBALISEL_PREDICATES_DECL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_PREDICATES_DECL
550
551// We declare the temporaries used by selectImpl() in the class to minimize the
552// cost of constructing placeholder values.
553#define GET_GLOBALISEL_TEMPORARIES_DECL
554#include "AArch64GenGlobalISel.inc"
555#undef GET_GLOBALISEL_TEMPORARIES_DECL
556};
557
558} // end anonymous namespace
559
560#define GET_GLOBALISEL_IMPL
561#include "AArch64GenGlobalISel.inc"
562#undef GET_GLOBALISEL_IMPL
563
564AArch64InstructionSelector::AArch64InstructionSelector(
565 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
566 const AArch64RegisterBankInfo &RBI)
567 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
568 RBI(RBI),
570#include "AArch64GenGlobalISel.inc"
573#include "AArch64GenGlobalISel.inc"
575{
576}
577
578// FIXME: This should be target-independent, inferred from the types declared
579// for each class in the bank.
580//
581/// Given a register bank, and a type, return the smallest register class that
582/// can represent that combination.
583static const TargetRegisterClass *
584getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
585 bool GetAllRegSet = false) {
586 if (RB.getID() == AArch64::GPRRegBankID) {
587 if (Ty.getSizeInBits() <= 32)
588 return GetAllRegSet ? &AArch64::GPR32allRegClass
589 : &AArch64::GPR32RegClass;
590 if (Ty.getSizeInBits() == 64)
591 return GetAllRegSet ? &AArch64::GPR64allRegClass
592 : &AArch64::GPR64RegClass;
593 if (Ty.getSizeInBits() == 128)
594 return &AArch64::XSeqPairsClassRegClass;
595 return nullptr;
596 }
597
598 if (RB.getID() == AArch64::FPRRegBankID) {
599 switch (Ty.getSizeInBits()) {
600 case 8:
601 return &AArch64::FPR8RegClass;
602 case 16:
603 return &AArch64::FPR16RegClass;
604 case 32:
605 return &AArch64::FPR32RegClass;
606 case 64:
607 return &AArch64::FPR64RegClass;
608 case 128:
609 return &AArch64::FPR128RegClass;
610 }
611 return nullptr;
612 }
613
614 return nullptr;
615}
616
617/// Given a register bank, and size in bits, return the smallest register class
618/// that can represent that combination.
619static const TargetRegisterClass *
621 bool GetAllRegSet = false) {
622 if (SizeInBits.isScalable()) {
623 assert(RB.getID() == AArch64::FPRRegBankID &&
624 "Expected FPR regbank for scalable type size");
625 return &AArch64::ZPRRegClass;
626 }
627
628 unsigned RegBankID = RB.getID();
629
630 if (RegBankID == AArch64::GPRRegBankID) {
631 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
632 if (SizeInBits <= 32)
633 return GetAllRegSet ? &AArch64::GPR32allRegClass
634 : &AArch64::GPR32RegClass;
635 if (SizeInBits == 64)
636 return GetAllRegSet ? &AArch64::GPR64allRegClass
637 : &AArch64::GPR64RegClass;
638 if (SizeInBits == 128)
639 return &AArch64::XSeqPairsClassRegClass;
640 }
641
642 if (RegBankID == AArch64::FPRRegBankID) {
643 if (SizeInBits.isScalable()) {
644 assert(SizeInBits == TypeSize::getScalable(128) &&
645 "Unexpected scalable register size");
646 return &AArch64::ZPRRegClass;
647 }
648
649 switch (SizeInBits) {
650 default:
651 return nullptr;
652 case 8:
653 return &AArch64::FPR8RegClass;
654 case 16:
655 return &AArch64::FPR16RegClass;
656 case 32:
657 return &AArch64::FPR32RegClass;
658 case 64:
659 return &AArch64::FPR64RegClass;
660 case 128:
661 return &AArch64::FPR128RegClass;
662 }
663 }
664
665 return nullptr;
666}
667
668/// Returns the correct subregister to use for a given register class.
670 const TargetRegisterInfo &TRI, unsigned &SubReg) {
671 switch (TRI.getRegSizeInBits(*RC)) {
672 case 8:
673 SubReg = AArch64::bsub;
674 break;
675 case 16:
676 SubReg = AArch64::hsub;
677 break;
678 case 32:
679 if (RC != &AArch64::FPR32RegClass)
680 SubReg = AArch64::sub_32;
681 else
682 SubReg = AArch64::ssub;
683 break;
684 case 64:
685 SubReg = AArch64::dsub;
686 break;
687 default:
689 dbgs() << "Couldn't find appropriate subregister for register class.");
690 return false;
691 }
692
693 return true;
694}
695
696/// Returns the minimum size the given register bank can hold.
697static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
698 switch (RB.getID()) {
699 case AArch64::GPRRegBankID:
700 return 32;
701 case AArch64::FPRRegBankID:
702 return 8;
703 default:
704 llvm_unreachable("Tried to get minimum size for unknown register bank.");
705 }
706}
707
708/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
709/// Helper function for functions like createDTuple and createQTuple.
710///
711/// \p RegClassIDs - The list of register class IDs available for some tuple of
712/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
713/// expected to contain between 2 and 4 tuple classes.
714///
715/// \p SubRegs - The list of subregister classes associated with each register
716/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
717/// subregister class. The index of each subregister class is expected to
718/// correspond with the index of each register class.
719///
720/// \returns Either the destination register of REG_SEQUENCE instruction that
721/// was created, or the 0th element of \p Regs if \p Regs contains a single
722/// element.
724 const unsigned RegClassIDs[],
725 const unsigned SubRegs[], MachineIRBuilder &MIB) {
726 unsigned NumRegs = Regs.size();
727 if (NumRegs == 1)
728 return Regs[0];
729 assert(NumRegs >= 2 && NumRegs <= 4 &&
730 "Only support between two and 4 registers in a tuple!");
732 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
733 auto RegSequence =
734 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
735 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
736 RegSequence.addUse(Regs[I]);
737 RegSequence.addImm(SubRegs[I]);
738 }
739 return RegSequence.getReg(0);
740}
741
742/// Create a tuple of D-registers using the registers in \p Regs.
744 static const unsigned RegClassIDs[] = {
745 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
746 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
747 AArch64::dsub2, AArch64::dsub3};
748 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
749}
750
751/// Create a tuple of Q-registers using the registers in \p Regs.
753 static const unsigned RegClassIDs[] = {
754 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
755 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
756 AArch64::qsub2, AArch64::qsub3};
757 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
758}
759
760static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
761 auto &MI = *Root.getParent();
762 auto &MBB = *MI.getParent();
763 auto &MF = *MBB.getParent();
764 auto &MRI = MF.getRegInfo();
765 uint64_t Immed;
766 if (Root.isImm())
767 Immed = Root.getImm();
768 else if (Root.isCImm())
769 Immed = Root.getCImm()->getZExtValue();
770 else if (Root.isReg()) {
771 auto ValAndVReg =
773 if (!ValAndVReg)
774 return std::nullopt;
775 Immed = ValAndVReg->Value.getSExtValue();
776 } else
777 return std::nullopt;
778 return Immed;
779}
780
781/// Check whether \p I is a currently unsupported binary operation:
782/// - it has an unsized type
783/// - an operand is not a vreg
784/// - all operands are not in the same bank
785/// These are checks that should someday live in the verifier, but right now,
786/// these are mostly limitations of the aarch64 selector.
787static bool unsupportedBinOp(const MachineInstr &I,
788 const AArch64RegisterBankInfo &RBI,
789 const MachineRegisterInfo &MRI,
790 const AArch64RegisterInfo &TRI) {
791 LLT Ty = MRI.getType(I.getOperand(0).getReg());
792 if (!Ty.isValid()) {
793 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
794 return true;
795 }
796
797 const RegisterBank *PrevOpBank = nullptr;
798 for (auto &MO : I.operands()) {
799 // FIXME: Support non-register operands.
800 if (!MO.isReg()) {
801 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
802 return true;
803 }
804
805 // FIXME: Can generic operations have physical registers operands? If
806 // so, this will need to be taught about that, and we'll need to get the
807 // bank out of the minimal class for the register.
808 // Either way, this needs to be documented (and possibly verified).
809 if (!MO.getReg().isVirtual()) {
810 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
811 return true;
812 }
813
814 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
815 if (!OpBank) {
816 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
817 return true;
818 }
819
820 if (PrevOpBank && OpBank != PrevOpBank) {
821 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
822 return true;
823 }
824 PrevOpBank = OpBank;
825 }
826 return false;
827}
828
829/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
830/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
831/// and of size \p OpSize.
832/// \returns \p GenericOpc if the combination is unsupported.
833static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
834 unsigned OpSize) {
835 switch (RegBankID) {
836 case AArch64::GPRRegBankID:
837 if (OpSize == 32) {
838 switch (GenericOpc) {
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVWr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVWr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVWr;
845 default:
846 return GenericOpc;
847 }
848 } else if (OpSize == 64) {
849 switch (GenericOpc) {
850 case TargetOpcode::G_PTR_ADD:
851 return AArch64::ADDXrr;
852 case TargetOpcode::G_SHL:
853 return AArch64::LSLVXr;
854 case TargetOpcode::G_LSHR:
855 return AArch64::LSRVXr;
856 case TargetOpcode::G_ASHR:
857 return AArch64::ASRVXr;
858 default:
859 return GenericOpc;
860 }
861 }
862 break;
863 case AArch64::FPRRegBankID:
864 switch (OpSize) {
865 case 32:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDSrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBSrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULSrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVSrr;
875 default:
876 return GenericOpc;
877 }
878 case 64:
879 switch (GenericOpc) {
880 case TargetOpcode::G_FADD:
881 return AArch64::FADDDrr;
882 case TargetOpcode::G_FSUB:
883 return AArch64::FSUBDrr;
884 case TargetOpcode::G_FMUL:
885 return AArch64::FMULDrr;
886 case TargetOpcode::G_FDIV:
887 return AArch64::FDIVDrr;
888 case TargetOpcode::G_OR:
889 return AArch64::ORRv8i8;
890 default:
891 return GenericOpc;
892 }
893 }
894 break;
895 }
896 return GenericOpc;
897}
898
899/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
900/// appropriate for the (value) register bank \p RegBankID and of memory access
901/// size \p OpSize. This returns the variant with the base+unsigned-immediate
902/// addressing mode (e.g., LDRXui).
903/// \returns \p GenericOpc if the combination is unsupported.
904static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
905 unsigned OpSize) {
906 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
907 switch (RegBankID) {
908 case AArch64::GPRRegBankID:
909 switch (OpSize) {
910 case 8:
911 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
912 case 16:
913 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
914 case 32:
915 return isStore ? AArch64::STRWui : AArch64::LDRWui;
916 case 64:
917 return isStore ? AArch64::STRXui : AArch64::LDRXui;
918 }
919 break;
920 case AArch64::FPRRegBankID:
921 switch (OpSize) {
922 case 8:
923 return isStore ? AArch64::STRBui : AArch64::LDRBui;
924 case 16:
925 return isStore ? AArch64::STRHui : AArch64::LDRHui;
926 case 32:
927 return isStore ? AArch64::STRSui : AArch64::LDRSui;
928 case 64:
929 return isStore ? AArch64::STRDui : AArch64::LDRDui;
930 case 128:
931 return isStore ? AArch64::STRQui : AArch64::LDRQui;
932 }
933 break;
934 }
935 return GenericOpc;
936}
937
938/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
939/// to \p *To.
940///
941/// E.g "To = COPY SrcReg:SubReg"
943 const RegisterBankInfo &RBI, Register SrcReg,
944 const TargetRegisterClass *To, unsigned SubReg) {
945 assert(SrcReg.isValid() && "Expected a valid source register?");
946 assert(To && "Destination register class cannot be null");
947 assert(SubReg && "Expected a valid subregister");
948
949 MachineIRBuilder MIB(I);
950 auto SubRegCopy =
951 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, {}, SubReg);
952 MachineOperand &RegOp = I.getOperand(1);
953 RegOp.setReg(SubRegCopy.getReg(0));
954
955 // It's possible that the destination register won't be constrained. Make
956 // sure that happens.
957 if (!I.getOperand(0).getReg().isPhysical())
958 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
959
960 return true;
961}
962
963/// Helper function to get the source and destination register classes for a
964/// copy. Returns a std::pair containing the source register class for the
965/// copy, and the destination register class for the copy. If a register class
966/// cannot be determined, then it will be nullptr.
967static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
970 const RegisterBankInfo &RBI) {
971 Register DstReg = I.getOperand(0).getReg();
972 Register SrcReg = I.getOperand(1).getReg();
973 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
974 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
975
976 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
977 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
978
979 // Special casing for cross-bank copies of s1s. We can technically represent
980 // a 1-bit value with any size of register. The minimum size for a GPR is 32
981 // bits. So, we need to put the FPR on 32 bits as well.
982 //
983 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
984 // then we can pull it into the helpers that get the appropriate class for a
985 // register bank. Or make a new helper that carries along some constraint
986 // information.
987 if (SrcRegBank != DstRegBank &&
988 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
989 SrcSize = DstSize = TypeSize::getFixed(32);
990
991 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
992 getMinClassForRegBank(DstRegBank, DstSize, true)};
993}
994
995// FIXME: We need some sort of API in RBI/TRI to allow generic code to
996// constrain operands of simple instructions given a TargetRegisterClass
997// and LLT
999 const RegisterBankInfo &RBI) {
1000 for (MachineOperand &MO : I.operands()) {
1001 if (!MO.isReg())
1002 continue;
1003 Register Reg = MO.getReg();
1004 if (!Reg)
1005 continue;
1006 if (Reg.isPhysical())
1007 continue;
1008 LLT Ty = MRI.getType(Reg);
1009 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1010 const TargetRegisterClass *RC =
1012 if (!RC) {
1013 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1014 RC = getRegClassForTypeOnBank(Ty, RB);
1015 if (!RC) {
1016 LLVM_DEBUG(
1017 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1018 break;
1019 }
1020 }
1021 RBI.constrainGenericRegister(Reg, *RC, MRI);
1022 }
1023
1024 return true;
1025}
1026
1029 const RegisterBankInfo &RBI) {
1030 Register DstReg = I.getOperand(0).getReg();
1031 Register SrcReg = I.getOperand(1).getReg();
1032 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1033 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1034
1035 // Find the correct register classes for the source and destination registers.
1036 const TargetRegisterClass *SrcRC;
1037 const TargetRegisterClass *DstRC;
1038 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1039
1040 if (!DstRC) {
1041 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1042 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1043 return false;
1044 }
1045
1046 // Is this a copy? If so, then we may need to insert a subregister copy.
1047 if (I.isCopy()) {
1048 // Yes. Check if there's anything to fix up.
1049 if (!SrcRC) {
1050 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1051 return false;
1052 }
1053
1054 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1055 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1056 unsigned SubReg;
1057
1058 // If the source bank doesn't support a subregister copy small enough,
1059 // then we first need to copy to the destination bank.
1060 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1061 const TargetRegisterClass *DstTempRC =
1062 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1063 getSubRegForClass(DstRC, TRI, SubReg);
1064
1065 MachineIRBuilder MIB(I);
1066 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1067 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1068 } else if (SrcSize > DstSize) {
1069 // If the source register is bigger than the destination we need to
1070 // perform a subregister copy.
1071 const TargetRegisterClass *SubRegRC =
1072 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1073 getSubRegForClass(SubRegRC, TRI, SubReg);
1074 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1075 } else if (DstSize > SrcSize) {
1076 // If the destination register is bigger than the source we need to do
1077 // a promotion using SUBREG_TO_REG.
1078 const TargetRegisterClass *PromotionRC =
1079 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1080 getSubRegForClass(SrcRC, TRI, SubReg);
1081
1082 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1083 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1084 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1085 .addUse(SrcReg)
1086 .addImm(SubReg);
1087 MachineOperand &RegOp = I.getOperand(1);
1088 RegOp.setReg(PromoteReg);
1089 }
1090
1091 // If the destination is a physical register, then there's nothing to
1092 // change, so we're done.
1093 if (DstReg.isPhysical())
1094 return true;
1095 }
1096
1097 // No need to constrain SrcReg. It will get constrained when we hit another
1098 // of its use or its defs. Copies do not have constraints.
1099 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1100 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1101 << " operand\n");
1102 return false;
1103 }
1104
1105 // If this a GPR ZEXT that we want to just reduce down into a copy.
1106 // The sizes will be mismatched with the source < 32b but that's ok.
1107 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1108 I.setDesc(TII.get(AArch64::COPY));
1109 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1110 return selectCopy(I, TII, MRI, TRI, RBI);
1111 }
1112
1113 I.setDesc(TII.get(AArch64::COPY));
1114 return true;
1115}
1116
1118AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1119 Register False, AArch64CC::CondCode CC,
1120 MachineIRBuilder &MIB) const {
1121 MachineRegisterInfo &MRI = *MIB.getMRI();
1122 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1123 RBI.getRegBank(True, MRI, TRI)->getID() &&
1124 "Expected both select operands to have the same regbank?");
1125 LLT Ty = MRI.getType(True);
1126 if (Ty.isVector())
1127 return nullptr;
1128 const unsigned Size = Ty.getSizeInBits();
1129 assert((Size == 32 || Size == 64) &&
1130 "Expected 32 bit or 64 bit select only?");
1131 const bool Is32Bit = Size == 32;
1132 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1133 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1134 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1136 return &*FCSel;
1137 }
1138
1139 // By default, we'll try and emit a CSEL.
1140 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1141 bool Optimized = false;
1142 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1143 &Optimized](Register &Reg, Register &OtherReg,
1144 bool Invert) {
1145 if (Optimized)
1146 return false;
1147
1148 // Attempt to fold:
1149 //
1150 // %sub = G_SUB 0, %x
1151 // %select = G_SELECT cc, %reg, %sub
1152 //
1153 // Into:
1154 // %select = CSNEG %reg, %x, cc
1155 Register MatchReg;
1156 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1157 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1158 Reg = MatchReg;
1159 if (Invert) {
1161 std::swap(Reg, OtherReg);
1162 }
1163 return true;
1164 }
1165
1166 // Attempt to fold:
1167 //
1168 // %xor = G_XOR %x, -1
1169 // %select = G_SELECT cc, %reg, %xor
1170 //
1171 // Into:
1172 // %select = CSINV %reg, %x, cc
1173 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1174 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1175 Reg = MatchReg;
1176 if (Invert) {
1178 std::swap(Reg, OtherReg);
1179 }
1180 return true;
1181 }
1182
1183 // Attempt to fold:
1184 //
1185 // %add = G_ADD %x, 1
1186 // %select = G_SELECT cc, %reg, %add
1187 //
1188 // Into:
1189 // %select = CSINC %reg, %x, cc
1190 if (mi_match(Reg, MRI,
1191 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1192 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1193 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1194 Reg = MatchReg;
1195 if (Invert) {
1197 std::swap(Reg, OtherReg);
1198 }
1199 return true;
1200 }
1201
1202 return false;
1203 };
1204
1205 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1206 // true/false values are constants.
1207 // FIXME: All of these patterns already exist in tablegen. We should be
1208 // able to import these.
1209 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1210 &Optimized]() {
1211 if (Optimized)
1212 return false;
1213 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1214 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1215 if (!TrueCst && !FalseCst)
1216 return false;
1217
1218 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1219 if (TrueCst && FalseCst) {
1220 int64_t T = TrueCst->Value.getSExtValue();
1221 int64_t F = FalseCst->Value.getSExtValue();
1222
1223 if (T == 0 && F == 1) {
1224 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1225 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1226 True = ZReg;
1227 False = ZReg;
1228 return true;
1229 }
1230
1231 if (T == 0 && F == -1) {
1232 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1233 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1234 True = ZReg;
1235 False = ZReg;
1236 return true;
1237 }
1238 }
1239
1240 if (TrueCst) {
1241 int64_t T = TrueCst->Value.getSExtValue();
1242 if (T == 1) {
1243 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1244 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1245 True = False;
1246 False = ZReg;
1248 return true;
1249 }
1250
1251 if (T == -1) {
1252 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1253 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1254 True = False;
1255 False = ZReg;
1257 return true;
1258 }
1259 }
1260
1261 if (FalseCst) {
1262 int64_t F = FalseCst->Value.getSExtValue();
1263 if (F == 1) {
1264 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1265 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1266 False = ZReg;
1267 return true;
1268 }
1269
1270 if (F == -1) {
1271 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1272 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1273 False = ZReg;
1274 return true;
1275 }
1276 }
1277 return false;
1278 };
1279
1280 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1281 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1282 Optimized |= TryOptSelectCst();
1283 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1284 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1285 return &*SelectInst;
1286}
1287
1290 MachineRegisterInfo *MRI = nullptr) {
1291 switch (P) {
1292 default:
1293 llvm_unreachable("Unknown condition code!");
1294 case CmpInst::ICMP_NE:
1295 return AArch64CC::NE;
1296 case CmpInst::ICMP_EQ:
1297 return AArch64CC::EQ;
1298 case CmpInst::ICMP_SGT:
1299 return AArch64CC::GT;
1300 case CmpInst::ICMP_SGE:
1301 if (RHS && MRI) {
1302 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1303 if (ValAndVReg && ValAndVReg->Value == 0)
1304 return AArch64CC::PL;
1305 }
1306 return AArch64CC::GE;
1307 case CmpInst::ICMP_SLT:
1308 if (RHS && MRI) {
1309 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1310 if (ValAndVReg && ValAndVReg->Value == 0)
1311 return AArch64CC::MI;
1312 }
1313 return AArch64CC::LT;
1314 case CmpInst::ICMP_SLE:
1315 return AArch64CC::LE;
1316 case CmpInst::ICMP_UGT:
1317 return AArch64CC::HI;
1318 case CmpInst::ICMP_UGE:
1319 return AArch64CC::HS;
1320 case CmpInst::ICMP_ULT:
1321 return AArch64CC::LO;
1322 case CmpInst::ICMP_ULE:
1323 return AArch64CC::LS;
1324 }
1325}
1326
1327/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1329 AArch64CC::CondCode &CondCode,
1330 AArch64CC::CondCode &CondCode2) {
1331 CondCode2 = AArch64CC::AL;
1332 switch (CC) {
1333 default:
1334 llvm_unreachable("Unknown FP condition!");
1335 case CmpInst::FCMP_OEQ:
1336 CondCode = AArch64CC::EQ;
1337 break;
1338 case CmpInst::FCMP_OGT:
1339 CondCode = AArch64CC::GT;
1340 break;
1341 case CmpInst::FCMP_OGE:
1342 CondCode = AArch64CC::GE;
1343 break;
1344 case CmpInst::FCMP_OLT:
1345 CondCode = AArch64CC::MI;
1346 break;
1347 case CmpInst::FCMP_OLE:
1348 CondCode = AArch64CC::LS;
1349 break;
1350 case CmpInst::FCMP_ONE:
1351 CondCode = AArch64CC::MI;
1352 CondCode2 = AArch64CC::GT;
1353 break;
1354 case CmpInst::FCMP_ORD:
1355 CondCode = AArch64CC::VC;
1356 break;
1357 case CmpInst::FCMP_UNO:
1358 CondCode = AArch64CC::VS;
1359 break;
1360 case CmpInst::FCMP_UEQ:
1361 CondCode = AArch64CC::EQ;
1362 CondCode2 = AArch64CC::VS;
1363 break;
1364 case CmpInst::FCMP_UGT:
1365 CondCode = AArch64CC::HI;
1366 break;
1367 case CmpInst::FCMP_UGE:
1368 CondCode = AArch64CC::PL;
1369 break;
1370 case CmpInst::FCMP_ULT:
1371 CondCode = AArch64CC::LT;
1372 break;
1373 case CmpInst::FCMP_ULE:
1374 CondCode = AArch64CC::LE;
1375 break;
1376 case CmpInst::FCMP_UNE:
1377 CondCode = AArch64CC::NE;
1378 break;
1379 }
1380}
1381
1382/// Convert an IR fp condition code to an AArch64 CC.
1383/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1384/// should be AND'ed instead of OR'ed.
1386 AArch64CC::CondCode &CondCode,
1387 AArch64CC::CondCode &CondCode2) {
1388 CondCode2 = AArch64CC::AL;
1389 switch (CC) {
1390 default:
1391 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1392 assert(CondCode2 == AArch64CC::AL);
1393 break;
1394 case CmpInst::FCMP_ONE:
1395 // (a one b)
1396 // == ((a olt b) || (a ogt b))
1397 // == ((a ord b) && (a une b))
1398 CondCode = AArch64CC::VC;
1399 CondCode2 = AArch64CC::NE;
1400 break;
1401 case CmpInst::FCMP_UEQ:
1402 // (a ueq b)
1403 // == ((a uno b) || (a oeq b))
1404 // == ((a ule b) && (a uge b))
1405 CondCode = AArch64CC::PL;
1406 CondCode2 = AArch64CC::LE;
1407 break;
1408 }
1409}
1410
1411/// Return a register which can be used as a bit to test in a TB(N)Z.
1412static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1413 MachineRegisterInfo &MRI) {
1414 assert(Reg.isValid() && "Expected valid register!");
1415 bool HasZext = false;
1416 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1417 unsigned Opc = MI->getOpcode();
1418
1419 if (!MI->getOperand(0).isReg() ||
1420 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1421 break;
1422
1423 // (tbz (any_ext x), b) -> (tbz x, b) and
1424 // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1425 //
1426 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1427 // on the truncated x is the same as the bit number on x.
1428 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1429 Opc == TargetOpcode::G_TRUNC) {
1430 if (Opc == TargetOpcode::G_ZEXT)
1431 HasZext = true;
1432
1433 Register NextReg = MI->getOperand(1).getReg();
1434 // Did we find something worth folding?
1435 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1436 break;
1437 TypeSize InSize = MRI.getType(NextReg).getSizeInBits();
1438 if (Bit >= InSize)
1439 break;
1440
1441 // NextReg is worth folding. Keep looking.
1442 Reg = NextReg;
1443 continue;
1444 }
1445
1446 // Attempt to find a suitable operation with a constant on one side.
1447 std::optional<uint64_t> C;
1448 Register TestReg;
1449 switch (Opc) {
1450 default:
1451 break;
1452 case TargetOpcode::G_AND:
1453 case TargetOpcode::G_XOR: {
1454 TestReg = MI->getOperand(1).getReg();
1455 Register ConstantReg = MI->getOperand(2).getReg();
1456 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1457 if (!VRegAndVal) {
1458 // AND commutes, check the other side for a constant.
1459 // FIXME: Can we canonicalize the constant so that it's always on the
1460 // same side at some point earlier?
1461 std::swap(ConstantReg, TestReg);
1462 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1463 }
1464 if (VRegAndVal) {
1465 if (HasZext)
1466 C = VRegAndVal->Value.getZExtValue();
1467 else
1468 C = VRegAndVal->Value.getSExtValue();
1469 }
1470 break;
1471 }
1472 case TargetOpcode::G_ASHR:
1473 case TargetOpcode::G_LSHR:
1474 case TargetOpcode::G_SHL: {
1475 TestReg = MI->getOperand(1).getReg();
1476 auto VRegAndVal =
1477 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1478 if (VRegAndVal)
1479 C = VRegAndVal->Value.getSExtValue();
1480 break;
1481 }
1482 }
1483
1484 // Didn't find a constant or viable register. Bail out of the loop.
1485 if (!C || !TestReg.isValid())
1486 break;
1487
1488 // We found a suitable instruction with a constant. Check to see if we can
1489 // walk through the instruction.
1490 Register NextReg;
1491 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1492 switch (Opc) {
1493 default:
1494 break;
1495 case TargetOpcode::G_AND:
1496 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1497 if ((*C >> Bit) & 1)
1498 NextReg = TestReg;
1499 break;
1500 case TargetOpcode::G_SHL:
1501 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1502 // the type of the register.
1503 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1504 NextReg = TestReg;
1505 Bit = Bit - *C;
1506 }
1507 break;
1508 case TargetOpcode::G_ASHR:
1509 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1510 // in x
1511 NextReg = TestReg;
1512 Bit = Bit + *C;
1513 if (Bit >= TestRegSize)
1514 Bit = TestRegSize - 1;
1515 break;
1516 case TargetOpcode::G_LSHR:
1517 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1518 if ((Bit + *C) < TestRegSize) {
1519 NextReg = TestReg;
1520 Bit = Bit + *C;
1521 }
1522 break;
1523 case TargetOpcode::G_XOR:
1524 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1525 // appropriate.
1526 //
1527 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1528 //
1529 // tbz x', b -> tbnz x, b
1530 //
1531 // Because x' only has the b-th bit set if x does not.
1532 if ((*C >> Bit) & 1)
1533 Invert = !Invert;
1534 NextReg = TestReg;
1535 break;
1536 }
1537
1538 // Check if we found anything worth folding.
1539 if (!NextReg.isValid())
1540 return Reg;
1541 Reg = NextReg;
1542 }
1543
1544 return Reg;
1545}
1546
1547MachineInstr *AArch64InstructionSelector::emitTestBit(
1548 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1549 MachineIRBuilder &MIB) const {
1550 assert(TestReg.isValid());
1551 assert(ProduceNonFlagSettingCondBr &&
1552 "Cannot emit TB(N)Z with speculation tracking!");
1553 MachineRegisterInfo &MRI = *MIB.getMRI();
1554
1555 // Attempt to optimize the test bit by walking over instructions.
1556 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1557 LLT Ty = MRI.getType(TestReg);
1558 unsigned Size = Ty.getSizeInBits();
1559 assert(!Ty.isVector() && "Expected a scalar!");
1560 assert(Bit < 64 && "Bit is too large!");
1561
1562 // When the test register is a 64-bit register, we have to narrow to make
1563 // TBNZW work.
1564 bool UseWReg = Bit < 32;
1565 unsigned NecessarySize = UseWReg ? 32 : 64;
1566 if (Size != NecessarySize)
1567 TestReg = moveScalarRegClass(
1568 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1569 MIB);
1570
1571 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1572 {AArch64::TBZW, AArch64::TBNZW}};
1573 unsigned Opc = OpcTable[UseWReg][IsNegative];
1574 auto TestBitMI =
1575 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1576 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1577 return &*TestBitMI;
1578}
1579
1580bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1581 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1582 MachineIRBuilder &MIB) const {
1583 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1584 // Given something like this:
1585 //
1586 // %x = ...Something...
1587 // %one = G_CONSTANT i64 1
1588 // %zero = G_CONSTANT i64 0
1589 // %and = G_AND %x, %one
1590 // %cmp = G_ICMP intpred(ne), %and, %zero
1591 // %cmp_trunc = G_TRUNC %cmp
1592 // G_BRCOND %cmp_trunc, %bb.3
1593 //
1594 // We want to try and fold the AND into the G_BRCOND and produce either a
1595 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1596 //
1597 // In this case, we'd get
1598 //
1599 // TBNZ %x %bb.3
1600 //
1601
1602 // Check if the AND has a constant on its RHS which we can use as a mask.
1603 // If it's a power of 2, then it's the same as checking a specific bit.
1604 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1605 auto MaybeBit = getIConstantVRegValWithLookThrough(
1606 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1607 if (!MaybeBit)
1608 return false;
1609
1610 int32_t Bit = MaybeBit->Value.exactLogBase2();
1611 if (Bit < 0)
1612 return false;
1613
1614 Register TestReg = AndInst.getOperand(1).getReg();
1615
1616 // Emit a TB(N)Z.
1617 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1618 return true;
1619}
1620
1621MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1622 bool IsNegative,
1623 MachineBasicBlock *DestMBB,
1624 MachineIRBuilder &MIB) const {
1625 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1626 MachineRegisterInfo &MRI = *MIB.getMRI();
1627 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1628 AArch64::GPRRegBankID &&
1629 "Expected GPRs only?");
1630 auto Ty = MRI.getType(CompareReg);
1631 unsigned Width = Ty.getSizeInBits();
1632 assert(!Ty.isVector() && "Expected scalar only?");
1633 assert(Width <= 64 && "Expected width to be at most 64?");
1634 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1635 {AArch64::CBNZW, AArch64::CBNZX}};
1636 unsigned Opc = OpcTable[IsNegative][Width == 64];
1637 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1638 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1639 return &*BranchMI;
1640}
1641
1642bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1643 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1644 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1645 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1646 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1647 // totally clean. Some of them require two branches to implement.
1648 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1649 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1650 Pred);
1651 AArch64CC::CondCode CC1, CC2;
1652 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1653 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1654 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1655 if (CC2 != AArch64CC::AL)
1656 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1657 I.eraseFromParent();
1658 return true;
1659}
1660
1661bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1662 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1663 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1664 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1665 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1666 //
1667 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1668 // instructions will not be produced, as they are conditional branch
1669 // instructions that do not set flags.
1670 if (!ProduceNonFlagSettingCondBr)
1671 return false;
1672
1673 MachineRegisterInfo &MRI = *MIB.getMRI();
1674 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1675 auto Pred =
1676 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1677 Register LHS = ICmp.getOperand(2).getReg();
1678 Register RHS = ICmp.getOperand(3).getReg();
1679
1680 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1681 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1682 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1683
1684 // When we can emit a TB(N)Z, prefer that.
1685 //
1686 // Handle non-commutative condition codes first.
1687 // Note that we don't want to do this when we have a G_AND because it can
1688 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1689 if (VRegAndVal && !AndInst) {
1690 int64_t C = VRegAndVal->Value.getSExtValue();
1691
1692 // When we have a greater-than comparison, we can just test if the msb is
1693 // zero.
1694 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1695 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1696 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1697 I.eraseFromParent();
1698 return true;
1699 }
1700
1701 // When we have a less than comparison, we can just test if the msb is not
1702 // zero.
1703 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1704 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1705 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1706 I.eraseFromParent();
1707 return true;
1708 }
1709
1710 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1711 // we can test if the msb is zero.
1712 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1713 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1714 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1715 I.eraseFromParent();
1716 return true;
1717 }
1718 }
1719
1720 // Attempt to handle commutative condition codes. Right now, that's only
1721 // eq/ne.
1722 if (ICmpInst::isEquality(Pred)) {
1723 if (!VRegAndVal) {
1724 std::swap(RHS, LHS);
1725 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1726 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1727 }
1728
1729 if (VRegAndVal && VRegAndVal->Value == 0) {
1730 // If there's a G_AND feeding into this branch, try to fold it away by
1731 // emitting a TB(N)Z instead.
1732 //
1733 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1734 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1735 // would be redundant.
1736 if (AndInst &&
1737 tryOptAndIntoCompareBranch(
1738 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1739 I.eraseFromParent();
1740 return true;
1741 }
1742
1743 // Otherwise, try to emit a CB(N)Z instead.
1744 auto LHSTy = MRI.getType(LHS);
1745 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1746 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1747 I.eraseFromParent();
1748 return true;
1749 }
1750 }
1751 }
1752
1753 return false;
1754}
1755
1756bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1757 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1758 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1759 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1760 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1761 return true;
1762
1763 // Couldn't optimize. Emit a compare + a Bcc.
1764 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1765 auto &PredOp = ICmp.getOperand(1);
1766 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1768 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1769 ICmp.getOperand(3).getReg(), MIB.getMRI());
1770 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1771 I.eraseFromParent();
1772 return true;
1773}
1774
1775bool AArch64InstructionSelector::selectCompareBranch(
1776 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1777 Register CondReg = I.getOperand(0).getReg();
1778 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1779 // Try to select the G_BRCOND using whatever is feeding the condition if
1780 // possible.
1781 unsigned CCMIOpc = CCMI->getOpcode();
1782 if (CCMIOpc == TargetOpcode::G_FCMP)
1783 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1784 if (CCMIOpc == TargetOpcode::G_ICMP)
1785 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1786
1787 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1788 // instructions will not be produced, as they are conditional branch
1789 // instructions that do not set flags.
1790 if (ProduceNonFlagSettingCondBr) {
1791 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1792 I.getOperand(1).getMBB(), MIB);
1793 I.eraseFromParent();
1794 return true;
1795 }
1796
1797 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1798 auto TstMI =
1799 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1801 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1803 .addMBB(I.getOperand(1).getMBB());
1804 I.eraseFromParent();
1806 return true;
1807}
1808
1809/// Returns the element immediate value of a vector shift operand if found.
1810/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1811static std::optional<int64_t> getVectorShiftImm(Register Reg,
1812 MachineRegisterInfo &MRI) {
1813 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1814 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1815 return getAArch64VectorSplatScalar(*OpMI, MRI);
1816}
1817
1818/// Matches and returns the shift immediate value for a SHL instruction given
1819/// a shift operand.
1820static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1821 MachineRegisterInfo &MRI) {
1822 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1823 if (!ShiftImm)
1824 return std::nullopt;
1825 // Check the immediate is in range for a SHL.
1826 int64_t Imm = *ShiftImm;
1827 if (Imm < 0)
1828 return std::nullopt;
1829 switch (SrcTy.getElementType().getSizeInBits()) {
1830 default:
1831 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1832 return std::nullopt;
1833 case 8:
1834 if (Imm > 7)
1835 return std::nullopt;
1836 break;
1837 case 16:
1838 if (Imm > 15)
1839 return std::nullopt;
1840 break;
1841 case 32:
1842 if (Imm > 31)
1843 return std::nullopt;
1844 break;
1845 case 64:
1846 if (Imm > 63)
1847 return std::nullopt;
1848 break;
1849 }
1850 return Imm;
1851}
1852
1853bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1854 MachineRegisterInfo &MRI) {
1855 assert(I.getOpcode() == TargetOpcode::G_SHL);
1856 Register DstReg = I.getOperand(0).getReg();
1857 const LLT Ty = MRI.getType(DstReg);
1858 Register Src1Reg = I.getOperand(1).getReg();
1859 Register Src2Reg = I.getOperand(2).getReg();
1860
1861 if (!Ty.isVector())
1862 return false;
1863
1864 // Check if we have a vector of constants on RHS that we can select as the
1865 // immediate form.
1866 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1867
1868 unsigned Opc = 0;
1869 if (Ty == LLT::fixed_vector(2, 64)) {
1870 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1871 } else if (Ty == LLT::fixed_vector(4, 32)) {
1872 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1873 } else if (Ty == LLT::fixed_vector(2, 32)) {
1874 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1875 } else if (Ty == LLT::fixed_vector(4, 16)) {
1876 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1877 } else if (Ty == LLT::fixed_vector(8, 16)) {
1878 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1879 } else if (Ty == LLT::fixed_vector(16, 8)) {
1880 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1881 } else if (Ty == LLT::fixed_vector(8, 8)) {
1882 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1883 } else {
1884 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1885 return false;
1886 }
1887
1888 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1889 if (ImmVal)
1890 Shl.addImm(*ImmVal);
1891 else
1892 Shl.addUse(Src2Reg);
1894 I.eraseFromParent();
1895 return true;
1896}
1897
1898bool AArch64InstructionSelector::selectVectorAshrLshr(
1899 MachineInstr &I, MachineRegisterInfo &MRI) {
1900 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1901 I.getOpcode() == TargetOpcode::G_LSHR);
1902 Register DstReg = I.getOperand(0).getReg();
1903 const LLT Ty = MRI.getType(DstReg);
1904 Register Src1Reg = I.getOperand(1).getReg();
1905 Register Src2Reg = I.getOperand(2).getReg();
1906
1907 if (!Ty.isVector())
1908 return false;
1909
1910 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1911
1912 // We expect the immediate case to be lowered in the PostLegalCombiner to
1913 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1914
1915 // There is not a shift right register instruction, but the shift left
1916 // register instruction takes a signed value, where negative numbers specify a
1917 // right shift.
1918
1919 unsigned Opc = 0;
1920 unsigned NegOpc = 0;
1921 const TargetRegisterClass *RC =
1922 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1923 if (Ty == LLT::fixed_vector(2, 64)) {
1924 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1925 NegOpc = AArch64::NEGv2i64;
1926 } else if (Ty == LLT::fixed_vector(4, 32)) {
1927 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1928 NegOpc = AArch64::NEGv4i32;
1929 } else if (Ty == LLT::fixed_vector(2, 32)) {
1930 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1931 NegOpc = AArch64::NEGv2i32;
1932 } else if (Ty == LLT::fixed_vector(4, 16)) {
1933 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1934 NegOpc = AArch64::NEGv4i16;
1935 } else if (Ty == LLT::fixed_vector(8, 16)) {
1936 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1937 NegOpc = AArch64::NEGv8i16;
1938 } else if (Ty == LLT::fixed_vector(16, 8)) {
1939 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1940 NegOpc = AArch64::NEGv16i8;
1941 } else if (Ty == LLT::fixed_vector(8, 8)) {
1942 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1943 NegOpc = AArch64::NEGv8i8;
1944 } else {
1945 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1946 return false;
1947 }
1948
1949 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1951 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1953 I.eraseFromParent();
1954 return true;
1955}
1956
1957bool AArch64InstructionSelector::selectVaStartAAPCS(
1958 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1959
1961 MF.getFunction().isVarArg()))
1962 return false;
1963
1964 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1965 // Standard, section 10.1.5.
1966
1967 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1968 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1969 const auto *PtrRegClass =
1970 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1971
1972 const MCInstrDesc &MCIDAddAddr =
1973 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1974 const MCInstrDesc &MCIDStoreAddr =
1975 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1976
1977 /*
1978 * typedef struct va_list {
1979 * void * stack; // next stack param
1980 * void * gr_top; // end of GP arg reg save area
1981 * void * vr_top; // end of FP/SIMD arg reg save area
1982 * int gr_offs; // offset from gr_top to next GP register arg
1983 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1984 * } va_list;
1985 */
1986 const auto VAList = I.getOperand(0).getReg();
1987
1988 // Our current offset in bytes from the va_list struct (VAList).
1989 unsigned OffsetBytes = 0;
1990
1991 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1992 // and increment OffsetBytes by PtrSize.
1993 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1994 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1995 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1996 .addDef(Top)
1997 .addFrameIndex(FrameIndex)
1998 .addImm(Imm)
1999 .addImm(0);
2001
2002 const auto *MMO = *I.memoperands_begin();
2003 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
2004 .addUse(Top)
2005 .addUse(VAList)
2006 .addImm(OffsetBytes / PtrSize)
2008 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2009 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2011
2012 OffsetBytes += PtrSize;
2013 };
2014
2015 // void* stack at offset 0
2016 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2017
2018 // void* gr_top at offset 8 (4 on ILP32)
2019 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2020 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2021
2022 // void* vr_top at offset 16 (8 on ILP32)
2023 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2024 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2025
2026 // Helper function to store a 4-byte integer constant to VAList at offset
2027 // OffsetBytes, and increment OffsetBytes by 4.
2028 const auto PushIntConstant = [&](const int32_t Value) {
2029 constexpr int IntSize = 4;
2030 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2031 auto MIB =
2032 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2033 .addDef(Temp)
2034 .addImm(Value);
2036
2037 const auto *MMO = *I.memoperands_begin();
2038 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2039 .addUse(Temp)
2040 .addUse(VAList)
2041 .addImm(OffsetBytes / IntSize)
2043 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2044 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2046 OffsetBytes += IntSize;
2047 };
2048
2049 // int gr_offs at offset 24 (12 on ILP32)
2050 PushIntConstant(-static_cast<int32_t>(GPRSize));
2051
2052 // int vr_offs at offset 28 (16 on ILP32)
2053 PushIntConstant(-static_cast<int32_t>(FPRSize));
2054
2055 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2056
2057 I.eraseFromParent();
2058 return true;
2059}
2060
2061bool AArch64InstructionSelector::selectVaStartDarwin(
2062 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2063 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2064 Register ListReg = I.getOperand(0).getReg();
2065
2066 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2067
2068 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2069 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2071 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2072 ? FuncInfo->getVarArgsGPRIndex()
2073 : FuncInfo->getVarArgsStackIndex();
2074 }
2075
2076 auto MIB =
2077 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2078 .addDef(ArgsAddrReg)
2079 .addFrameIndex(FrameIdx)
2080 .addImm(0)
2081 .addImm(0);
2082
2084
2085 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2086 .addUse(ArgsAddrReg)
2087 .addUse(ListReg)
2088 .addImm(0)
2089 .addMemOperand(*I.memoperands_begin());
2090
2092 I.eraseFromParent();
2093 return true;
2094}
2095
2096void AArch64InstructionSelector::materializeLargeCMVal(
2097 MachineInstr &I, const Value *V, unsigned OpFlags) {
2098 MachineBasicBlock &MBB = *I.getParent();
2099 MachineFunction &MF = *MBB.getParent();
2100 MachineRegisterInfo &MRI = MF.getRegInfo();
2101
2102 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2103 MovZ->addOperand(MF, I.getOperand(1));
2104 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2106 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2108
2109 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2110 Register ForceDstReg) {
2111 Register DstReg = ForceDstReg
2112 ? ForceDstReg
2113 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2114 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2115 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2116 MovI->addOperand(MF, MachineOperand::CreateGA(
2117 GV, MovZ->getOperand(1).getOffset(), Flags));
2118 } else {
2119 MovI->addOperand(
2121 MovZ->getOperand(1).getOffset(), Flags));
2122 }
2125 return DstReg;
2126 };
2127 Register DstReg = BuildMovK(MovZ.getReg(0),
2129 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2130 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2131}
2132
2133bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2134 MachineBasicBlock &MBB = *I.getParent();
2135 MachineFunction &MF = *MBB.getParent();
2136 MachineRegisterInfo &MRI = MF.getRegInfo();
2137
2138 switch (I.getOpcode()) {
2139 case TargetOpcode::G_CONSTANT: {
2140 Register DefReg = I.getOperand(0).getReg();
2141 const LLT DefTy = MRI.getType(DefReg);
2142 if (!DefTy.isPointer())
2143 return false;
2144 const unsigned PtrSize = DefTy.getSizeInBits();
2145 if (PtrSize != 32 && PtrSize != 64)
2146 return false;
2147 // Convert pointer typed constants to integers so TableGen can select.
2148 MRI.setType(DefReg, LLT::integer(PtrSize));
2149 return true;
2150 }
2151 case TargetOpcode::G_STORE: {
2152 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2153 MachineOperand &SrcOp = I.getOperand(0);
2154 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2155 // Allow matching with imported patterns for stores of pointers. Unlike
2156 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2157 // and constrain.
2158 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2159 Register NewSrc = Copy.getReg(0);
2160 SrcOp.setReg(NewSrc);
2161 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2162 Changed = true;
2163 }
2164 return Changed;
2165 }
2166 case TargetOpcode::G_PTR_ADD: {
2167 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2168 // arithmetic semantics instead of falling back to regular arithmetic.
2169 const auto &TL = STI.getTargetLowering();
2170 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2171 return false;
2172 return convertPtrAddToAdd(I, MRI);
2173 }
2174 case TargetOpcode::G_LOAD: {
2175 // For scalar loads of pointers, we try to convert the dest type from p0
2176 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2177 // conversion, this should be ok because all users should have been
2178 // selected already, so the type doesn't matter for them.
2179 Register DstReg = I.getOperand(0).getReg();
2180 const LLT DstTy = MRI.getType(DstReg);
2181 if (!DstTy.isPointer())
2182 return false;
2183 MRI.setType(DstReg, LLT::scalar(64));
2184 return true;
2185 }
2186 case AArch64::G_DUP: {
2187 // Convert the type from p0 to s64 to help selection.
2188 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2189 if (!DstTy.isPointerVector())
2190 return false;
2191 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2192 MRI.setType(I.getOperand(0).getReg(),
2193 DstTy.changeElementType(LLT::scalar(64)));
2194 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2195 I.getOperand(1).setReg(NewSrc.getReg(0));
2196 return true;
2197 }
2198 case AArch64::G_INSERT_VECTOR_ELT: {
2199 // Convert the type from p0 to s64 to help selection.
2200 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2201 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2202 if (!SrcVecTy.isPointerVector())
2203 return false;
2204 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2205 MRI.setType(I.getOperand(1).getReg(),
2206 DstTy.changeElementType(LLT::scalar(64)));
2207 MRI.setType(I.getOperand(0).getReg(),
2208 DstTy.changeElementType(LLT::scalar(64)));
2209 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2210 I.getOperand(2).setReg(NewSrc.getReg(0));
2211 return true;
2212 }
2213 case TargetOpcode::G_UITOFP:
2214 case TargetOpcode::G_SITOFP: {
2215 // If both source and destination regbanks are FPR, then convert the opcode
2216 // to G_SITOF so that the importer can select it to an fpr variant.
2217 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2218 // copy.
2219 Register SrcReg = I.getOperand(1).getReg();
2220 LLT SrcTy = MRI.getType(SrcReg);
2221 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2222 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2223 return false;
2224
2225 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2226 // Need to add a copy to change the type so that the existing patterns can
2227 // match when there is an integer on an FPR bank.
2228 if (SrcTy.getScalarType().isInteger()) {
2229 auto Copy = MIB.buildCopy(DstTy, SrcReg);
2230 I.getOperand(1).setReg(Copy.getReg(0));
2231 MRI.setRegClass(Copy.getReg(0),
2232 getRegClassForTypeOnBank(
2233 SrcTy, RBI.getRegBank(AArch64::FPRRegBankID)));
2234 }
2235 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2236 I.setDesc(TII.get(AArch64::G_SITOF));
2237 else
2238 I.setDesc(TII.get(AArch64::G_UITOF));
2239 return true;
2240 }
2241 return false;
2242 }
2243 default:
2244 return false;
2245 }
2246}
2247
2248/// This lowering tries to look for G_PTR_ADD instructions and then converts
2249/// them to a standard G_ADD with a COPY on the source.
2250///
2251/// The motivation behind this is to expose the add semantics to the imported
2252/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2253/// because the selector works bottom up, uses before defs. By the time we
2254/// end up trying to select a G_PTR_ADD, we should have already attempted to
2255/// fold this into addressing modes and were therefore unsuccessful.
2256bool AArch64InstructionSelector::convertPtrAddToAdd(
2257 MachineInstr &I, MachineRegisterInfo &MRI) {
2258 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2259 Register DstReg = I.getOperand(0).getReg();
2260 Register AddOp1Reg = I.getOperand(1).getReg();
2261 const LLT PtrTy = MRI.getType(DstReg);
2262 if (PtrTy.getAddressSpace() != 0)
2263 return false;
2264
2265 const LLT CastPtrTy = PtrTy.isVector()
2267 : LLT::integer(64);
2268 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2269 // Set regbanks on the registers.
2270 if (PtrTy.isVector())
2271 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2272 else
2273 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2274
2275 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2276 // %dst(intty) = G_ADD %intbase, off
2277 I.setDesc(TII.get(TargetOpcode::G_ADD));
2278 MRI.setType(DstReg, CastPtrTy);
2279 I.getOperand(1).setReg(PtrToInt.getReg(0));
2280 if (!select(*PtrToInt)) {
2281 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2282 return false;
2283 }
2284
2285 // Also take the opportunity here to try to do some optimization.
2286 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2287 Register NegatedReg;
2288 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2289 return true;
2290 I.getOperand(2).setReg(NegatedReg);
2291 I.setDesc(TII.get(TargetOpcode::G_SUB));
2292 return true;
2293}
2294
2295bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2296 MachineRegisterInfo &MRI) {
2297 // We try to match the immediate variant of LSL, which is actually an alias
2298 // for a special case of UBFM. Otherwise, we fall back to the imported
2299 // selector which will match the register variant.
2300 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2301 const auto &MO = I.getOperand(2);
2302 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2303 if (!VRegAndVal)
2304 return false;
2305
2306 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2307 if (DstTy.isVector())
2308 return false;
2309 bool Is64Bit = DstTy.getSizeInBits() == 64;
2310 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2311 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2312
2313 if (!Imm1Fn || !Imm2Fn)
2314 return false;
2315
2316 auto NewI =
2317 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2318 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2319
2320 for (auto &RenderFn : *Imm1Fn)
2321 RenderFn(NewI);
2322 for (auto &RenderFn : *Imm2Fn)
2323 RenderFn(NewI);
2324
2325 I.eraseFromParent();
2327 return true;
2328}
2329
2330bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2331 MachineInstr &I, MachineRegisterInfo &MRI) {
2332 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2333 // If we're storing a scalar, it doesn't matter what register bank that
2334 // scalar is on. All that matters is the size.
2335 //
2336 // So, if we see something like this (with a 32-bit scalar as an example):
2337 //
2338 // %x:gpr(s32) = ... something ...
2339 // %y:fpr(s32) = COPY %x:gpr(s32)
2340 // G_STORE %y:fpr(s32)
2341 //
2342 // We can fix this up into something like this:
2343 //
2344 // G_STORE %x:gpr(s32)
2345 //
2346 // And then continue the selection process normally.
2347 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2348 if (!DefDstReg.isValid())
2349 return false;
2350 LLT DefDstTy = MRI.getType(DefDstReg);
2351 Register StoreSrcReg = I.getOperand(0).getReg();
2352 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2353
2354 // If we get something strange like a physical register, then we shouldn't
2355 // go any further.
2356 if (!DefDstTy.isValid())
2357 return false;
2358
2359 // Are the source and dst types the same size?
2360 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2361 return false;
2362
2363 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2364 RBI.getRegBank(DefDstReg, MRI, TRI))
2365 return false;
2366
2367 // We have a cross-bank copy, which is entering a store. Let's fold it.
2368 I.getOperand(0).setReg(DefDstReg);
2369 return true;
2370}
2371
2372bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2373 assert(I.getParent() && "Instruction should be in a basic block!");
2374 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2375
2376 MachineBasicBlock &MBB = *I.getParent();
2377 MachineFunction &MF = *MBB.getParent();
2378 MachineRegisterInfo &MRI = MF.getRegInfo();
2379
2380 switch (I.getOpcode()) {
2381 case AArch64::G_DUP: {
2382 // Before selecting a DUP instruction, check if it is better selected as a
2383 // MOV or load from a constant pool.
2384 Register Src = I.getOperand(1).getReg();
2385 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
2386 Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true);
2387 if (!ValAndVReg)
2388 return false;
2389 LLVMContext &Ctx = MF.getFunction().getContext();
2390 Register Dst = I.getOperand(0).getReg();
2392 MRI.getType(Dst).getNumElements(),
2393 ConstantInt::get(
2394 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2395 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2396 if (!emitConstantVector(Dst, CV, MIB, MRI))
2397 return false;
2398 I.eraseFromParent();
2399 return true;
2400 }
2401 case TargetOpcode::G_SEXT:
2402 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2403 // over a normal extend.
2404 if (selectUSMovFromExtend(I, MRI))
2405 return true;
2406 return false;
2407 case TargetOpcode::G_BR:
2408 return false;
2409 case TargetOpcode::G_SHL:
2410 return earlySelectSHL(I, MRI);
2411 case TargetOpcode::G_CONSTANT: {
2412 bool IsZero = false;
2413 if (I.getOperand(1).isCImm())
2414 IsZero = I.getOperand(1).getCImm()->isZero();
2415 else if (I.getOperand(1).isImm())
2416 IsZero = I.getOperand(1).getImm() == 0;
2417
2418 if (!IsZero)
2419 return false;
2420
2421 Register DefReg = I.getOperand(0).getReg();
2422 LLT Ty = MRI.getType(DefReg);
2423 if (Ty.getSizeInBits() == 64) {
2424 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2425 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2426 } else if (Ty.getSizeInBits() <= 32) {
2427 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2428 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2429 } else
2430 return false;
2431
2432 I.setDesc(TII.get(TargetOpcode::COPY));
2433 return true;
2434 }
2435
2436 case TargetOpcode::G_ADD: {
2437 // Check if this is being fed by a G_ICMP on either side.
2438 //
2439 // (cmp pred, x, y) + z
2440 //
2441 // In the above case, when the cmp is true, we increment z by 1. So, we can
2442 // fold the add into the cset for the cmp by using cinc.
2443 //
2444 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2445 Register AddDst = I.getOperand(0).getReg();
2446 Register AddLHS = I.getOperand(1).getReg();
2447 Register AddRHS = I.getOperand(2).getReg();
2448 // Only handle scalars.
2449 LLT Ty = MRI.getType(AddLHS);
2450 if (Ty.isVector())
2451 return false;
2452 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2453 // bits.
2454 unsigned Size = Ty.getSizeInBits();
2455 if (Size != 32 && Size != 64)
2456 return false;
2457 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2458 if (!MRI.hasOneNonDBGUse(Reg))
2459 return nullptr;
2460 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2461 // compare.
2462 if (Size == 32)
2463 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2464 // We model scalar compares using 32-bit destinations right now.
2465 // If it's a 64-bit compare, it'll have 64-bit sources.
2466 Register ZExt;
2467 if (!mi_match(Reg, MRI,
2469 return nullptr;
2470 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2471 if (!Cmp ||
2472 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2473 return nullptr;
2474 return Cmp;
2475 };
2476 // Try to match
2477 // z + (cmp pred, x, y)
2478 MachineInstr *Cmp = MatchCmp(AddRHS);
2479 if (!Cmp) {
2480 // (cmp pred, x, y) + z
2481 std::swap(AddLHS, AddRHS);
2482 Cmp = MatchCmp(AddRHS);
2483 if (!Cmp)
2484 return false;
2485 }
2486 auto &PredOp = Cmp->getOperand(1);
2488 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2489 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2490 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2492 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2493 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2494 I.eraseFromParent();
2495 return true;
2496 }
2497 case TargetOpcode::G_OR: {
2498 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2499 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2500 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2501 Register Dst = I.getOperand(0).getReg();
2502 LLT Ty = MRI.getType(Dst);
2503
2504 if (!Ty.isScalar())
2505 return false;
2506
2507 unsigned Size = Ty.getSizeInBits();
2508 if (Size != 32 && Size != 64)
2509 return false;
2510
2511 Register ShiftSrc;
2512 int64_t ShiftImm;
2513 Register MaskSrc;
2514 int64_t MaskImm;
2515 if (!mi_match(
2516 Dst, MRI,
2517 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2518 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2519 return false;
2520
2521 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2522 return false;
2523
2524 int64_t Immr = Size - ShiftImm;
2525 int64_t Imms = Size - ShiftImm - 1;
2526 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2527 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2528 I.eraseFromParent();
2529 return true;
2530 }
2531 case TargetOpcode::G_FENCE: {
2532 if (I.getOperand(1).getImm() == 0)
2533 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2534 else
2535 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2536 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2537 I.eraseFromParent();
2538 return true;
2539 }
2540 default:
2541 return false;
2542 }
2543}
2544
2545bool AArch64InstructionSelector::select(MachineInstr &I) {
2546 assert(I.getParent() && "Instruction should be in a basic block!");
2547 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2548
2549 MachineBasicBlock &MBB = *I.getParent();
2550 MachineFunction &MF = *MBB.getParent();
2551 MachineRegisterInfo &MRI = MF.getRegInfo();
2552
2553 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2554 if (Subtarget->requiresStrictAlign()) {
2555 // We don't support this feature yet.
2556 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2557 return false;
2558 }
2559
2561
2562 unsigned Opcode = I.getOpcode();
2563 // G_PHI requires same handling as PHI
2564 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2565 // Certain non-generic instructions also need some special handling.
2566
2567 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2569 return true;
2570 }
2571
2572 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2573 const Register DefReg = I.getOperand(0).getReg();
2574 const LLT DefTy = MRI.getType(DefReg);
2575
2576 const RegClassOrRegBank &RegClassOrBank =
2577 MRI.getRegClassOrRegBank(DefReg);
2578
2579 const TargetRegisterClass *DefRC =
2581 if (!DefRC) {
2582 if (!DefTy.isValid()) {
2583 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2584 return false;
2585 }
2586 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2587 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2588 if (!DefRC) {
2589 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2590 return false;
2591 }
2592 }
2593
2594 I.setDesc(TII.get(TargetOpcode::PHI));
2595
2596 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2597 }
2598
2599 if (I.isCopy())
2600 return selectCopy(I, TII, MRI, TRI, RBI);
2601
2602 if (I.isDebugInstr())
2603 return selectDebugInstr(I, MRI, RBI);
2604
2605 return true;
2606 }
2607
2608
2609 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2610 LLVM_DEBUG(
2611 dbgs() << "Generic instruction has unexpected implicit operands\n");
2612 return false;
2613 }
2614
2615 // Try to do some lowering before we start instruction selecting. These
2616 // lowerings are purely transformations on the input G_MIR and so selection
2617 // must continue after any modification of the instruction.
2618 if (preISelLower(I)) {
2619 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2620 }
2621
2622 // There may be patterns where the importer can't deal with them optimally,
2623 // but does select it to a suboptimal sequence so our custom C++ selection
2624 // code later never has a chance to work on it. Therefore, we have an early
2625 // selection attempt here to give priority to certain selection routines
2626 // over the imported ones.
2627 if (earlySelect(I))
2628 return true;
2629
2630 if (selectImpl(I, *CoverageInfo))
2631 return true;
2632
2633 LLT Ty =
2634 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2635
2636 switch (Opcode) {
2637 case TargetOpcode::G_SBFX:
2638 case TargetOpcode::G_UBFX: {
2639 static const unsigned OpcTable[2][2] = {
2640 {AArch64::UBFMWri, AArch64::UBFMXri},
2641 {AArch64::SBFMWri, AArch64::SBFMXri}};
2642 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2643 unsigned Size = Ty.getSizeInBits();
2644 unsigned Opc = OpcTable[IsSigned][Size == 64];
2645 auto Cst1 =
2646 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2647 assert(Cst1 && "Should have gotten a constant for src 1?");
2648 auto Cst2 =
2649 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2650 assert(Cst2 && "Should have gotten a constant for src 2?");
2651 auto LSB = Cst1->Value.getZExtValue();
2652 auto Width = Cst2->Value.getZExtValue();
2653 auto BitfieldInst =
2654 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2655 .addImm(LSB)
2656 .addImm(LSB + Width - 1);
2657 I.eraseFromParent();
2658 constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2659 return true;
2660 }
2661 case TargetOpcode::G_BRCOND:
2662 return selectCompareBranch(I, MF, MRI);
2663
2664 case TargetOpcode::G_BRINDIRECT: {
2665 const Function &Fn = MF.getFunction();
2666 if (std::optional<uint16_t> BADisc =
2668 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2669 MI.addImm(AArch64PACKey::IA);
2670 MI.addImm(*BADisc);
2671 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2672 I.eraseFromParent();
2674 return true;
2675 }
2676 I.setDesc(TII.get(AArch64::BR));
2678 return true;
2679 }
2680
2681 case TargetOpcode::G_BRJT:
2682 return selectBrJT(I, MRI);
2683
2684 case AArch64::G_ADD_LOW: {
2685 // This op may have been separated from it's ADRP companion by the localizer
2686 // or some other code motion pass. Given that many CPUs will try to
2687 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2688 // which will later be expanded into an ADRP+ADD pair after scheduling.
2689 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2690 if (BaseMI->getOpcode() != AArch64::ADRP) {
2691 I.setDesc(TII.get(AArch64::ADDXri));
2692 I.addOperand(MachineOperand::CreateImm(0));
2694 return true;
2695 }
2697 "Expected small code model");
2698 auto Op1 = BaseMI->getOperand(1);
2699 auto Op2 = I.getOperand(2);
2700 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2701 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2702 Op1.getTargetFlags())
2703 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2704 Op2.getTargetFlags());
2705 I.eraseFromParent();
2706 constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2707 return true;
2708 }
2709
2710 case TargetOpcode::G_FCONSTANT: {
2711 const Register DefReg = I.getOperand(0).getReg();
2712 const LLT DefTy = MRI.getType(DefReg);
2713 const unsigned DefSize = DefTy.getSizeInBits();
2714 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2715
2716 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2717 // For 16, 64, and 128b values, emit a constant pool load.
2718 switch (DefSize) {
2719 default:
2720 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2721 case 32:
2722 case 64: {
2723 bool OptForSize = shouldOptForSize(&MF);
2724 const auto &TLI = MF.getSubtarget().getTargetLowering();
2725 // If TLI says that this fpimm is illegal, then we'll expand to a
2726 // constant pool load.
2727 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2728 EVT::getFloatingPointVT(DefSize), OptForSize))
2729 break;
2730 [[fallthrough]];
2731 }
2732 case 16:
2733 case 128: {
2734 auto *FPImm = I.getOperand(1).getFPImm();
2735 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2736 if (!LoadMI) {
2737 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2738 return false;
2739 }
2740 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2741 I.eraseFromParent();
2742 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2743 }
2744 }
2745
2746 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2747 // Either emit a FMOV, or emit a copy to emit a normal mov.
2748 const Register DefGPRReg = MRI.createVirtualRegister(
2749 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2750 MachineOperand &RegOp = I.getOperand(0);
2751 RegOp.setReg(DefGPRReg);
2752 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2753 MIB.buildCopy({DefReg}, {DefGPRReg});
2754
2755 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2756 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2757 return false;
2758 }
2759
2760 MachineOperand &ImmOp = I.getOperand(1);
2761 ImmOp.ChangeToImmediate(
2763
2764 const unsigned MovOpc =
2765 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2766 I.setDesc(TII.get(MovOpc));
2768 return true;
2769 }
2770 case TargetOpcode::G_EXTRACT: {
2771 Register DstReg = I.getOperand(0).getReg();
2772 Register SrcReg = I.getOperand(1).getReg();
2773 LLT SrcTy = MRI.getType(SrcReg);
2774 LLT DstTy = MRI.getType(DstReg);
2775 (void)DstTy;
2776 unsigned SrcSize = SrcTy.getSizeInBits();
2777
2778 if (SrcTy.getSizeInBits() > 64) {
2779 // This should be an extract of an s128, which is like a vector extract.
2780 if (SrcTy.getSizeInBits() != 128)
2781 return false;
2782 // Only support extracting 64 bits from an s128 at the moment.
2783 if (DstTy.getSizeInBits() != 64)
2784 return false;
2785
2786 unsigned Offset = I.getOperand(2).getImm();
2787 if (Offset % 64 != 0)
2788 return false;
2789
2790 // Check we have the right regbank always.
2791 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2792 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2793 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2794
2795 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2796 auto NewI =
2797 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2798 .addUse(SrcReg, {},
2799 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2800 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2801 AArch64::GPR64RegClass, NewI->getOperand(0));
2802 I.eraseFromParent();
2803 return true;
2804 }
2805
2806 // Emit the same code as a vector extract.
2807 // Offset must be a multiple of 64.
2808 unsigned LaneIdx = Offset / 64;
2809 MachineInstr *Extract = emitExtractVectorElt(
2810 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2811 if (!Extract)
2812 return false;
2813 I.eraseFromParent();
2814 return true;
2815 }
2816
2817 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2818 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2819 Ty.getSizeInBits() - 1);
2820
2821 if (SrcSize < 64) {
2822 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2823 "unexpected G_EXTRACT types");
2825 return true;
2826 }
2827
2828 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2829 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2830 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2831 .addReg(DstReg, {}, AArch64::sub_32);
2832 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2833 AArch64::GPR32RegClass, MRI);
2834 I.getOperand(0).setReg(DstReg);
2835
2837 return true;
2838 }
2839
2840 case TargetOpcode::G_INSERT: {
2841 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2842 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2843 unsigned DstSize = DstTy.getSizeInBits();
2844 // Larger inserts are vectors, same-size ones should be something else by
2845 // now (split up or turned into COPYs).
2846 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2847 return false;
2848
2849 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2850 unsigned LSB = I.getOperand(3).getImm();
2851 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2852 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2853 MachineInstrBuilder(MF, I).addImm(Width - 1);
2854
2855 if (DstSize < 64) {
2856 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2857 "unexpected G_INSERT types");
2859 return true;
2860 }
2861
2863 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2864 TII.get(AArch64::SUBREG_TO_REG))
2865 .addDef(SrcReg)
2866 .addUse(I.getOperand(2).getReg())
2867 .addImm(AArch64::sub_32);
2868 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2869 AArch64::GPR32RegClass, MRI);
2870 I.getOperand(2).setReg(SrcReg);
2871
2873 return true;
2874 }
2875 case TargetOpcode::G_FRAME_INDEX: {
2876 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2877 if (Ty != LLT::pointer(0, 64)) {
2878 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2879 << ", expected: " << LLT::pointer(0, 64) << '\n');
2880 return false;
2881 }
2882 I.setDesc(TII.get(AArch64::ADDXri));
2883
2884 // MOs for a #0 shifted immediate.
2885 I.addOperand(MachineOperand::CreateImm(0));
2886 I.addOperand(MachineOperand::CreateImm(0));
2887
2889 return true;
2890 }
2891
2892 case TargetOpcode::G_GLOBAL_VALUE: {
2893 const GlobalValue *GV = nullptr;
2894 unsigned OpFlags;
2895 if (I.getOperand(1).isSymbol()) {
2896 OpFlags = I.getOperand(1).getTargetFlags();
2897 // Currently only used by "RtLibUseGOT".
2898 assert(OpFlags == AArch64II::MO_GOT);
2899 } else {
2900 GV = I.getOperand(1).getGlobal();
2901 if (GV->isThreadLocal()) {
2902 // We don't support instructions with emulated TLS variables yet
2903 if (TM.useEmulatedTLS())
2904 return false;
2905 return selectTLSGlobalValue(I, MRI);
2906 }
2907 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2908 }
2909
2910 if (OpFlags & AArch64II::MO_GOT) {
2911 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2912 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2913 I.getOperand(1).setTargetFlags(OpFlags);
2914 I.addImplicitDefUseOperands(MF);
2915 } else if (TM.getCodeModel() == CodeModel::Large &&
2916 !TM.isPositionIndependent()) {
2917 // Materialize the global using movz/movk instructions.
2918 materializeLargeCMVal(I, GV, OpFlags);
2919 I.eraseFromParent();
2920 return true;
2921 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2922 I.setDesc(TII.get(AArch64::ADR));
2923 I.getOperand(1).setTargetFlags(OpFlags);
2924 } else {
2925 I.setDesc(TII.get(AArch64::MOVaddr));
2926 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2927 MachineInstrBuilder MIB(MF, I);
2928 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2930 }
2932 return true;
2933 }
2934
2935 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2936 return selectPtrAuthGlobalValue(I, MRI);
2937
2938 case TargetOpcode::G_ZEXTLOAD:
2939 case TargetOpcode::G_LOAD:
2940 case TargetOpcode::G_STORE: {
2941 GLoadStore &LdSt = cast<GLoadStore>(I);
2942 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2943 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2944
2945 // Can only handle AddressSpace 0, 64-bit pointers.
2946 if (PtrTy != LLT::pointer(0, 64)) {
2947 return false;
2948 }
2949
2950 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2951 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2952 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2953
2954 // Need special instructions for atomics that affect ordering.
2955 if (isStrongerThanMonotonic(Order)) {
2956 assert(!isa<GZExtLoad>(LdSt));
2957 assert(MemSizeInBytes <= 8 &&
2958 "128-bit atomics should already be custom-legalized");
2959
2960 if (isa<GLoad>(LdSt)) {
2961 static constexpr unsigned LDAPROpcodes[] = {
2962 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2963 static constexpr unsigned LDAROpcodes[] = {
2964 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2965 ArrayRef<unsigned> Opcodes =
2966 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2967 ? LDAPROpcodes
2968 : LDAROpcodes;
2969 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2970 } else {
2971 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2972 AArch64::STLRW, AArch64::STLRX};
2973 Register ValReg = LdSt.getReg(0);
2974 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2975 // Emit a subreg copy of 32 bits.
2976 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2977 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2978 .addReg(I.getOperand(0).getReg(), {}, AArch64::sub_32);
2979 I.getOperand(0).setReg(NewVal);
2980 }
2981 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2982 }
2984 return true;
2985 }
2986
2987#ifndef NDEBUG
2988 const Register PtrReg = LdSt.getPointerReg();
2989 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2990 // Check that the pointer register is valid.
2991 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2992 "Load/Store pointer operand isn't a GPR");
2993 assert(MRI.getType(PtrReg).isPointer() &&
2994 "Load/Store pointer operand isn't a pointer");
2995#endif
2996
2997 const Register ValReg = LdSt.getReg(0);
2998 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2999 LLT ValTy = MRI.getType(ValReg);
3000
3001 // The code below doesn't support truncating stores, so we need to split it
3002 // again.
3003 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3004 unsigned SubReg;
3005 LLT MemTy = LdSt.getMMO().getMemoryType();
3006 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3007 if (!getSubRegForClass(RC, TRI, SubReg))
3008 return false;
3009
3010 // Generate a subreg copy.
3011 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3012 .addReg(ValReg, {}, SubReg)
3013 .getReg(0);
3014 RBI.constrainGenericRegister(Copy, *RC, MRI);
3015 LdSt.getOperand(0).setReg(Copy);
3016 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3017 // If this is an any-extending load from the FPR bank, split it into a regular
3018 // load + extend.
3019 if (RB.getID() == AArch64::FPRRegBankID) {
3020 unsigned SubReg;
3021 LLT MemTy = LdSt.getMMO().getMemoryType();
3022 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3023 if (!getSubRegForClass(RC, TRI, SubReg))
3024 return false;
3025 Register OldDst = LdSt.getReg(0);
3026 Register NewDst =
3028 LdSt.getOperand(0).setReg(NewDst);
3029 MRI.setRegBank(NewDst, RB);
3030 // Generate a SUBREG_TO_REG to extend it.
3031 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3032 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3033 .addUse(NewDst)
3034 .addImm(SubReg);
3035 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3036 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3037 MIB.setInstr(LdSt);
3038 ValTy = MemTy; // This is no longer an extending load.
3039 }
3040 }
3041
3042 // Helper lambda for partially selecting I. Either returns the original
3043 // instruction with an updated opcode, or a new instruction.
3044 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3045 bool IsStore = isa<GStore>(I);
3046 const unsigned NewOpc =
3047 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3048 if (NewOpc == I.getOpcode())
3049 return nullptr;
3050 // Check if we can fold anything into the addressing mode.
3051 auto AddrModeFns =
3052 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3053 if (!AddrModeFns) {
3054 // Can't fold anything. Use the original instruction.
3055 I.setDesc(TII.get(NewOpc));
3056 I.addOperand(MachineOperand::CreateImm(0));
3057 return &I;
3058 }
3059
3060 // Folded something. Create a new instruction and return it.
3061 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3062 Register CurValReg = I.getOperand(0).getReg();
3063 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3064 NewInst.cloneMemRefs(I);
3065 for (auto &Fn : *AddrModeFns)
3066 Fn(NewInst);
3067 I.eraseFromParent();
3068 return &*NewInst;
3069 };
3070
3071 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3072 if (!LoadStore)
3073 return false;
3074
3075 // If we're storing a 0, use WZR/XZR.
3076 if (Opcode == TargetOpcode::G_STORE) {
3078 LoadStore->getOperand(0).getReg(), MRI);
3079 if (CVal && CVal->Value == 0) {
3080 switch (LoadStore->getOpcode()) {
3081 case AArch64::STRWui:
3082 case AArch64::STRHHui:
3083 case AArch64::STRBBui:
3084 LoadStore->getOperand(0).setReg(AArch64::WZR);
3085 break;
3086 case AArch64::STRXui:
3087 LoadStore->getOperand(0).setReg(AArch64::XZR);
3088 break;
3089 }
3090 }
3091 }
3092
3093 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3094 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3095 // The any/zextload from a smaller type to i32 should be handled by the
3096 // importer.
3097 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3098 return false;
3099 // If we have an extending load then change the load's type to be a
3100 // narrower reg and zero_extend with SUBREG_TO_REG.
3101 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3102 Register DstReg = LoadStore->getOperand(0).getReg();
3103 LoadStore->getOperand(0).setReg(LdReg);
3104
3105 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3106 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3107 .addUse(LdReg)
3108 .addImm(AArch64::sub_32);
3109 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3110 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3111 MRI);
3112 }
3113 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3114 return true;
3115 }
3116
3117 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3118 case TargetOpcode::G_INDEXED_SEXTLOAD:
3119 return selectIndexedExtLoad(I, MRI);
3120 case TargetOpcode::G_INDEXED_LOAD:
3121 return selectIndexedLoad(I, MRI);
3122 case TargetOpcode::G_INDEXED_STORE:
3123 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3124
3125 case TargetOpcode::G_LSHR:
3126 case TargetOpcode::G_ASHR:
3127 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3128 return selectVectorAshrLshr(I, MRI);
3129 [[fallthrough]];
3130 case TargetOpcode::G_SHL:
3131 if (Opcode == TargetOpcode::G_SHL &&
3132 MRI.getType(I.getOperand(0).getReg()).isVector())
3133 return selectVectorSHL(I, MRI);
3134
3135 // These shifts were legalized to have 64 bit shift amounts because we
3136 // want to take advantage of the selection patterns that assume the
3137 // immediates are s64s, however, selectBinaryOp will assume both operands
3138 // will have the same bit size.
3139 {
3140 Register SrcReg = I.getOperand(1).getReg();
3141 Register ShiftReg = I.getOperand(2).getReg();
3142 const LLT ShiftTy = MRI.getType(ShiftReg);
3143 const LLT SrcTy = MRI.getType(SrcReg);
3144 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3145 ShiftTy.getSizeInBits() == 64) {
3146 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3147 // Insert a subregister copy to implement a 64->32 trunc
3148 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3149 .addReg(ShiftReg, {}, AArch64::sub_32);
3150 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3151 I.getOperand(2).setReg(Trunc.getReg(0));
3152 }
3153 }
3154 [[fallthrough]];
3155 case TargetOpcode::G_OR: {
3156 // Reject the various things we don't support yet.
3157 if (unsupportedBinOp(I, RBI, MRI, TRI))
3158 return false;
3159
3160 const unsigned OpSize = Ty.getSizeInBits();
3161
3162 const Register DefReg = I.getOperand(0).getReg();
3163 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3164
3165 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3166 if (NewOpc == I.getOpcode())
3167 return false;
3168
3169 I.setDesc(TII.get(NewOpc));
3170 // FIXME: Should the type be always reset in setDesc?
3171
3172 // Now that we selected an opcode, we need to constrain the register
3173 // operands to use appropriate classes.
3175 return true;
3176 }
3177
3178 case TargetOpcode::G_PTR_ADD: {
3179 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3180 I.eraseFromParent();
3181 return true;
3182 }
3183
3184 case TargetOpcode::G_SADDE:
3185 case TargetOpcode::G_UADDE:
3186 case TargetOpcode::G_SSUBE:
3187 case TargetOpcode::G_USUBE:
3188 case TargetOpcode::G_SADDO:
3189 case TargetOpcode::G_UADDO:
3190 case TargetOpcode::G_SSUBO:
3191 case TargetOpcode::G_USUBO:
3192 return selectOverflowOp(I, MRI);
3193
3194 case TargetOpcode::G_PTRMASK: {
3195 Register MaskReg = I.getOperand(2).getReg();
3196 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3197 // TODO: Implement arbitrary cases
3198 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3199 return false;
3200
3201 uint64_t Mask = *MaskVal;
3202 I.setDesc(TII.get(AArch64::ANDXri));
3203 I.getOperand(2).ChangeToImmediate(
3205
3207 return true;
3208 }
3209 case TargetOpcode::G_PTRTOINT:
3210 case TargetOpcode::G_TRUNC: {
3211 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3212 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3213
3214 const Register DstReg = I.getOperand(0).getReg();
3215 const Register SrcReg = I.getOperand(1).getReg();
3216
3217 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3218 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3219
3220 if (DstRB.getID() != SrcRB.getID()) {
3221 LLVM_DEBUG(
3222 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3223 return false;
3224 }
3225
3226 if (DstRB.getID() == AArch64::GPRRegBankID) {
3227 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3228 if (!DstRC)
3229 return false;
3230
3231 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3232 if (!SrcRC)
3233 return false;
3234
3235 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3236 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3237 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3238 return false;
3239 }
3240
3241 if (DstRC == SrcRC) {
3242 // Nothing to be done
3243 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3244 SrcTy == LLT::scalar(64)) {
3245 llvm_unreachable("TableGen can import this case");
3246 return false;
3247 } else if (DstRC == &AArch64::GPR32RegClass &&
3248 SrcRC == &AArch64::GPR64RegClass) {
3249 I.getOperand(1).setSubReg(AArch64::sub_32);
3250 } else {
3251 LLVM_DEBUG(
3252 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3253 return false;
3254 }
3255
3256 I.setDesc(TII.get(TargetOpcode::COPY));
3257 return true;
3258 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3259 if (DstTy == LLT::fixed_vector(4, 16) &&
3260 SrcTy == LLT::fixed_vector(4, 32)) {
3261 I.setDesc(TII.get(AArch64::XTNv4i16));
3263 return true;
3264 }
3265
3266 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3267 MachineInstr *Extract = emitExtractVectorElt(
3268 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3269 if (!Extract)
3270 return false;
3271 I.eraseFromParent();
3272 return true;
3273 }
3274
3275 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3276 if (Opcode == TargetOpcode::G_PTRTOINT) {
3277 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3278 I.setDesc(TII.get(TargetOpcode::COPY));
3279 return selectCopy(I, TII, MRI, TRI, RBI);
3280 }
3281 }
3282
3283 return false;
3284 }
3285
3286 case TargetOpcode::G_ANYEXT: {
3287 if (selectUSMovFromExtend(I, MRI))
3288 return true;
3289
3290 const Register DstReg = I.getOperand(0).getReg();
3291 const Register SrcReg = I.getOperand(1).getReg();
3292
3293 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3294 if (RBDst.getID() != AArch64::GPRRegBankID) {
3295 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3296 << ", expected: GPR\n");
3297 return false;
3298 }
3299
3300 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3301 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3302 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3303 << ", expected: GPR\n");
3304 return false;
3305 }
3306
3307 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3308
3309 if (DstSize == 0) {
3310 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3311 return false;
3312 }
3313
3314 if (DstSize != 64 && DstSize > 32) {
3315 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3316 << ", expected: 32 or 64\n");
3317 return false;
3318 }
3319 // At this point G_ANYEXT is just like a plain COPY, but we need
3320 // to explicitly form the 64-bit value if any.
3321 if (DstSize > 32) {
3322 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3323 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3324 .addDef(ExtSrc)
3325 .addUse(SrcReg)
3326 .addImm(AArch64::sub_32);
3327 I.getOperand(1).setReg(ExtSrc);
3328 }
3329 return selectCopy(I, TII, MRI, TRI, RBI);
3330 }
3331
3332 case TargetOpcode::G_ZEXT:
3333 case TargetOpcode::G_SEXT_INREG:
3334 case TargetOpcode::G_SEXT: {
3335 if (selectUSMovFromExtend(I, MRI))
3336 return true;
3337
3338 unsigned Opcode = I.getOpcode();
3339 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3340 const Register DefReg = I.getOperand(0).getReg();
3341 Register SrcReg = I.getOperand(1).getReg();
3342 const LLT DstTy = MRI.getType(DefReg);
3343 const LLT SrcTy = MRI.getType(SrcReg);
3344 unsigned DstSize = DstTy.getSizeInBits();
3345 unsigned SrcSize = SrcTy.getSizeInBits();
3346
3347 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3348 // extended is encoded in the imm.
3349 if (Opcode == TargetOpcode::G_SEXT_INREG)
3350 SrcSize = I.getOperand(2).getImm();
3351
3352 if (DstTy.isVector())
3353 return false; // Should be handled by imported patterns.
3354
3355 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3356 AArch64::GPRRegBankID &&
3357 "Unexpected ext regbank");
3358
3359 MachineInstr *ExtI;
3360
3361 // First check if we're extending the result of a load which has a dest type
3362 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3363 // GPR register on AArch64 and all loads which are smaller automatically
3364 // zero-extend the upper bits. E.g.
3365 // %v(s8) = G_LOAD %p, :: (load 1)
3366 // %v2(s32) = G_ZEXT %v(s8)
3367 if (!IsSigned) {
3368 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3369 bool IsGPR =
3370 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3371 if (LoadMI && IsGPR) {
3372 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3373 unsigned BytesLoaded = MemOp->getSize().getValue();
3374 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3375 return selectCopy(I, TII, MRI, TRI, RBI);
3376 }
3377
3378 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3379 // + SUBREG_TO_REG.
3380 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3381 Register SubregToRegSrc =
3382 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3383 const Register ZReg = AArch64::WZR;
3384 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3385 .addImm(0);
3386
3387 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3388 .addUse(SubregToRegSrc)
3389 .addImm(AArch64::sub_32);
3390
3391 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3392 MRI)) {
3393 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3394 return false;
3395 }
3396
3397 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3398 MRI)) {
3399 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3400 return false;
3401 }
3402
3403 I.eraseFromParent();
3404 return true;
3405 }
3406 }
3407
3408 if (DstSize == 64) {
3409 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3410 // FIXME: Can we avoid manually doing this?
3411 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3412 MRI)) {
3413 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3414 << " operand\n");
3415 return false;
3416 }
3417 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3418 {&AArch64::GPR64RegClass}, {})
3419 .addUse(SrcReg)
3420 .addImm(AArch64::sub_32)
3421 .getReg(0);
3422 }
3423
3424 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3425 {DefReg}, {SrcReg})
3426 .addImm(0)
3427 .addImm(SrcSize - 1);
3428 } else if (DstSize <= 32) {
3429 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3430 {DefReg}, {SrcReg})
3431 .addImm(0)
3432 .addImm(SrcSize - 1);
3433 } else {
3434 return false;
3435 }
3436
3438 I.eraseFromParent();
3439 return true;
3440 }
3441
3442 case TargetOpcode::G_FREEZE:
3443 return selectCopy(I, TII, MRI, TRI, RBI);
3444
3445 case TargetOpcode::G_INTTOPTR:
3446 // The importer is currently unable to import pointer types since they
3447 // didn't exist in SelectionDAG.
3448 return selectCopy(I, TII, MRI, TRI, RBI);
3449
3450 case TargetOpcode::G_BITCAST:
3451 // Imported SelectionDAG rules can handle every bitcast except those that
3452 // bitcast from a type to the same type. Ideally, these shouldn't occur
3453 // but we might not run an optimizer that deletes them. The other exception
3454 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3455 // of them.
3456 return selectCopy(I, TII, MRI, TRI, RBI);
3457
3458 case TargetOpcode::G_SELECT: {
3459 auto &Sel = cast<GSelect>(I);
3460 const Register CondReg = Sel.getCondReg();
3461 const Register TReg = Sel.getTrueReg();
3462 const Register FReg = Sel.getFalseReg();
3463
3464 if (tryOptSelect(Sel))
3465 return true;
3466
3467 // Make sure to use an unused vreg instead of wzr, so that the peephole
3468 // optimizations will be able to optimize these.
3469 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3470 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3471 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3473 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3474 return false;
3475 Sel.eraseFromParent();
3476 return true;
3477 }
3478 case TargetOpcode::G_ICMP: {
3479 if (Ty.isVector())
3480 return false;
3481
3482 if (Ty != LLT::scalar(32)) {
3483 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3484 << ", expected: " << LLT::scalar(32) << '\n');
3485 return false;
3486 }
3487
3488 auto &PredOp = I.getOperand(1);
3489 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3490 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3492 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3493 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3494 /*Src2=*/AArch64::WZR, InvCC, MIB);
3495 I.eraseFromParent();
3496 return true;
3497 }
3498
3499 case TargetOpcode::G_FCMP: {
3500 CmpInst::Predicate Pred =
3501 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3502 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3503 Pred) ||
3504 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3505 return false;
3506 I.eraseFromParent();
3507 return true;
3508 }
3509 case TargetOpcode::G_VASTART:
3510 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3511 : selectVaStartAAPCS(I, MF, MRI);
3512 case TargetOpcode::G_INTRINSIC:
3513 return selectIntrinsic(I, MRI);
3514 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3515 return selectIntrinsicWithSideEffects(I, MRI);
3516 case TargetOpcode::G_IMPLICIT_DEF: {
3517 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3518 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3519 const Register DstReg = I.getOperand(0).getReg();
3520 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3521 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3522 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3523 return true;
3524 }
3525 case TargetOpcode::G_BLOCK_ADDR: {
3526 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3527 if (std::optional<uint16_t> BADisc =
3529 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3530 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3531 MIB.buildInstr(AArch64::MOVaddrPAC)
3532 .addBlockAddress(I.getOperand(1).getBlockAddress())
3534 .addReg(/*AddrDisc=*/AArch64::XZR)
3535 .addImm(*BADisc)
3536 .constrainAllUses(TII, TRI, RBI);
3537 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3538 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3539 AArch64::GPR64RegClass, MRI);
3540 I.eraseFromParent();
3541 return true;
3542 }
3544 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3545 I.eraseFromParent();
3546 return true;
3547 } else {
3548 I.setDesc(TII.get(AArch64::MOVaddrBA));
3549 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3550 I.getOperand(0).getReg())
3551 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3552 /* Offset */ 0, AArch64II::MO_PAGE)
3554 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3556 I.eraseFromParent();
3558 return true;
3559 }
3560 }
3561 case AArch64::G_DUP: {
3562 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3563 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3564 // difficult because at RBS we may end up pessimizing the fpr case if we
3565 // decided to add an anyextend to fix this. Manual selection is the most
3566 // robust solution for now.
3567 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3568 AArch64::GPRRegBankID)
3569 return false; // We expect the fpr regbank case to be imported.
3570 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3571 if (VecTy == LLT::fixed_vector(8, 8))
3572 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3573 else if (VecTy == LLT::fixed_vector(16, 8))
3574 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3575 else if (VecTy == LLT::fixed_vector(4, 16))
3576 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3577 else if (VecTy == LLT::fixed_vector(8, 16))
3578 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3579 else
3580 return false;
3582 return true;
3583 }
3584 case TargetOpcode::G_BUILD_VECTOR:
3585 return selectBuildVector(I, MRI);
3586 case TargetOpcode::G_MERGE_VALUES:
3587 return selectMergeValues(I, MRI);
3588 case TargetOpcode::G_UNMERGE_VALUES:
3589 return selectUnmergeValues(I, MRI);
3590 case TargetOpcode::G_SHUFFLE_VECTOR:
3591 return selectShuffleVector(I, MRI);
3592 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3593 return selectExtractElt(I, MRI);
3594 case TargetOpcode::G_CONCAT_VECTORS:
3595 return selectConcatVectors(I, MRI);
3596 case TargetOpcode::G_JUMP_TABLE:
3597 return selectJumpTable(I, MRI);
3598 case TargetOpcode::G_MEMCPY:
3599 case TargetOpcode::G_MEMCPY_INLINE:
3600 case TargetOpcode::G_MEMMOVE:
3601 case TargetOpcode::G_MEMSET:
3602 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3603 return selectMOPS(I, MRI);
3604 }
3605
3606 return false;
3607}
3608
3609bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3610 MachineIRBuilderState OldMIBState = MIB.getState();
3611 bool Success = select(I);
3612 MIB.setState(OldMIBState);
3613 return Success;
3614}
3615
3616bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3617 MachineRegisterInfo &MRI) {
3618 unsigned Mopcode;
3619 switch (GI.getOpcode()) {
3620 case TargetOpcode::G_MEMCPY:
3621 case TargetOpcode::G_MEMCPY_INLINE:
3622 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3623 break;
3624 case TargetOpcode::G_MEMMOVE:
3625 Mopcode = AArch64::MOPSMemoryMovePseudo;
3626 break;
3627 case TargetOpcode::G_MEMSET:
3628 // For tagged memset see llvm.aarch64.mops.memset.tag
3629 Mopcode = AArch64::MOPSMemorySetPseudo;
3630 break;
3631 }
3632
3633 auto &DstPtr = GI.getOperand(0);
3634 auto &SrcOrVal = GI.getOperand(1);
3635 auto &Size = GI.getOperand(2);
3636
3637 // Create copies of the registers that can be clobbered.
3638 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3639 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3640 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3641
3642 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3643 const auto &SrcValRegClass =
3644 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3645
3646 // Constrain to specific registers
3647 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3648 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3649 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3650
3651 MIB.buildCopy(DstPtrCopy, DstPtr);
3652 MIB.buildCopy(SrcValCopy, SrcOrVal);
3653 MIB.buildCopy(SizeCopy, Size);
3654
3655 // New instruction uses the copied registers because it must update them.
3656 // The defs are not used since they don't exist in G_MEM*. They are still
3657 // tied.
3658 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3659 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3660 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3661 if (IsSet) {
3662 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3663 {DstPtrCopy, SizeCopy, SrcValCopy});
3664 } else {
3665 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3666 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3667 {DstPtrCopy, SrcValCopy, SizeCopy});
3668 }
3669
3670 GI.eraseFromParent();
3671 return true;
3672}
3673
3674bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3675 MachineRegisterInfo &MRI) {
3676 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3677 Register JTAddr = I.getOperand(0).getReg();
3678 unsigned JTI = I.getOperand(1).getIndex();
3679 Register Index = I.getOperand(2).getReg();
3680
3681 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3682
3683 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3684 // sequence later, to guarantee the integrity of the intermediate values.
3685 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3687 if (STI.isTargetMachO()) {
3688 if (CM != CodeModel::Small && CM != CodeModel::Large)
3689 report_fatal_error("Unsupported code-model for hardened jump-table");
3690 } else {
3691 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3692 assert(STI.isTargetELF() &&
3693 "jump table hardening only supported on MachO/ELF");
3694 if (CM != CodeModel::Small)
3695 report_fatal_error("Unsupported code-model for hardened jump-table");
3696 }
3697
3698 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3699 MIB.buildInstr(AArch64::BR_JumpTable)
3700 .addJumpTableIndex(I.getOperand(1).getIndex());
3701 I.eraseFromParent();
3702 return true;
3703 }
3704
3705 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3706 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3707
3708 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3709 {TargetReg, ScratchReg}, {JTAddr, Index})
3710 .addJumpTableIndex(JTI);
3711 // Save the jump table info.
3712 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3713 {static_cast<int64_t>(JTI)});
3714 // Build the indirect branch.
3715 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3716 I.eraseFromParent();
3717 constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3718 return true;
3719}
3720
3721bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3722 MachineRegisterInfo &MRI) {
3723 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3724 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3725
3726 Register DstReg = I.getOperand(0).getReg();
3727 unsigned JTI = I.getOperand(1).getIndex();
3728 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3729 auto MovMI =
3730 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3731 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3733 I.eraseFromParent();
3735 return true;
3736}
3737
3738bool AArch64InstructionSelector::selectTLSGlobalValue(
3739 MachineInstr &I, MachineRegisterInfo &MRI) {
3740 if (!STI.isTargetMachO())
3741 return false;
3742 MachineFunction &MF = *I.getParent()->getParent();
3743 MF.getFrameInfo().setAdjustsStack(true);
3744
3745 const auto &GlobalOp = I.getOperand(1);
3746 assert(GlobalOp.getOffset() == 0 &&
3747 "Shouldn't have an offset on TLS globals!");
3748 const GlobalValue &GV = *GlobalOp.getGlobal();
3749
3750 auto LoadGOT =
3751 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3752 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3753
3754 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3755 {LoadGOT.getReg(0)})
3756 .addImm(0);
3757
3758 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3759 // TLS calls preserve all registers except those that absolutely must be
3760 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3761 // silly).
3762 unsigned Opcode = getBLRCallOpcode(MF);
3763
3764 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3765 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3766 assert(Opcode == AArch64::BLR);
3767 Opcode = AArch64::BLRAAZ;
3768 }
3769
3770 MIB.buildInstr(Opcode, {}, {Load})
3771 .addUse(AArch64::X0, RegState::Implicit)
3772 .addDef(AArch64::X0, RegState::Implicit)
3773 .addRegMask(TRI.getTLSCallPreservedMask());
3774
3775 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3776 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3777 MRI);
3778 I.eraseFromParent();
3779 return true;
3780}
3781
3782MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3783 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3784 MachineIRBuilder &MIRBuilder) const {
3785 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3786
3787 auto BuildFn = [&](unsigned SubregIndex) {
3788 auto Ins =
3789 MIRBuilder
3790 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3791 .addImm(SubregIndex);
3794 return &*Ins;
3795 };
3796
3797 switch (EltSize) {
3798 case 8:
3799 return BuildFn(AArch64::bsub);
3800 case 16:
3801 return BuildFn(AArch64::hsub);
3802 case 32:
3803 return BuildFn(AArch64::ssub);
3804 case 64:
3805 return BuildFn(AArch64::dsub);
3806 default:
3807 return nullptr;
3808 }
3809}
3810
3811MachineInstr *
3812AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3813 MachineIRBuilder &MIB,
3814 MachineRegisterInfo &MRI) const {
3815 LLT DstTy = MRI.getType(DstReg);
3816 const TargetRegisterClass *RC =
3817 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3818 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3819 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3820 return nullptr;
3821 }
3822 unsigned SubReg = 0;
3823 if (!getSubRegForClass(RC, TRI, SubReg))
3824 return nullptr;
3825 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3826 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3827 << DstTy.getSizeInBits() << "\n");
3828 return nullptr;
3829 }
3830 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3831 .addReg(SrcReg, {}, SubReg);
3832 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3833 return Copy;
3834}
3835
3836bool AArch64InstructionSelector::selectMergeValues(
3837 MachineInstr &I, MachineRegisterInfo &MRI) {
3838 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3839 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3840 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3841 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3842 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3843
3844 if (I.getNumOperands() != 3)
3845 return false;
3846
3847 // Merging 2 s64s into an s128.
3848 if (DstTy == LLT::scalar(128)) {
3849 if (SrcTy.getSizeInBits() != 64)
3850 return false;
3851 Register DstReg = I.getOperand(0).getReg();
3852 Register Src1Reg = I.getOperand(1).getReg();
3853 Register Src2Reg = I.getOperand(2).getReg();
3854 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3855 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3856 /* LaneIdx */ 0, RB, MIB);
3857 if (!InsMI)
3858 return false;
3859 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3860 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3861 if (!Ins2MI)
3862 return false;
3865 I.eraseFromParent();
3866 return true;
3867 }
3868
3869 if (RB.getID() != AArch64::GPRRegBankID)
3870 return false;
3871
3872 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3873 return false;
3874
3875 auto *DstRC = &AArch64::GPR64RegClass;
3876 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3877 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3878 TII.get(TargetOpcode::SUBREG_TO_REG))
3879 .addDef(SubToRegDef)
3880 .addUse(I.getOperand(1).getReg())
3881 .addImm(AArch64::sub_32);
3882 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3883 // Need to anyext the second scalar before we can use bfm
3884 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3885 TII.get(TargetOpcode::SUBREG_TO_REG))
3886 .addDef(SubToRegDef2)
3887 .addUse(I.getOperand(2).getReg())
3888 .addImm(AArch64::sub_32);
3889 MachineInstr &BFM =
3890 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3891 .addDef(I.getOperand(0).getReg())
3892 .addUse(SubToRegDef)
3893 .addUse(SubToRegDef2)
3894 .addImm(32)
3895 .addImm(31);
3896 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3897 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3899 I.eraseFromParent();
3900 return true;
3901}
3902
3903static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3904 const unsigned EltSize) {
3905 // Choose a lane copy opcode and subregister based off of the size of the
3906 // vector's elements.
3907 switch (EltSize) {
3908 case 8:
3909 CopyOpc = AArch64::DUPi8;
3910 ExtractSubReg = AArch64::bsub;
3911 break;
3912 case 16:
3913 CopyOpc = AArch64::DUPi16;
3914 ExtractSubReg = AArch64::hsub;
3915 break;
3916 case 32:
3917 CopyOpc = AArch64::DUPi32;
3918 ExtractSubReg = AArch64::ssub;
3919 break;
3920 case 64:
3921 CopyOpc = AArch64::DUPi64;
3922 ExtractSubReg = AArch64::dsub;
3923 break;
3924 default:
3925 // Unknown size, bail out.
3926 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3927 return false;
3928 }
3929 return true;
3930}
3931
3932MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3933 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3934 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3935 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3936 unsigned CopyOpc = 0;
3937 unsigned ExtractSubReg = 0;
3938 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3939 LLVM_DEBUG(
3940 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3941 return nullptr;
3942 }
3943
3944 const TargetRegisterClass *DstRC =
3945 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3946 if (!DstRC) {
3947 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3948 return nullptr;
3949 }
3950
3951 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3952 const LLT &VecTy = MRI.getType(VecReg);
3953 const TargetRegisterClass *VecRC =
3954 getRegClassForTypeOnBank(VecTy, VecRB, true);
3955 if (!VecRC) {
3956 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3957 return nullptr;
3958 }
3959
3960 // The register that we're going to copy into.
3961 Register InsertReg = VecReg;
3962 if (!DstReg)
3963 DstReg = MRI.createVirtualRegister(DstRC);
3964 // If the lane index is 0, we just use a subregister COPY.
3965 if (LaneIdx == 0) {
3966 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3967 .addReg(VecReg, {}, ExtractSubReg);
3968 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3969 return &*Copy;
3970 }
3971
3972 // Lane copies require 128-bit wide registers. If we're dealing with an
3973 // unpacked vector, then we need to move up to that width. Insert an implicit
3974 // def and a subregister insert to get us there.
3975 if (VecTy.getSizeInBits() != 128) {
3976 MachineInstr *ScalarToVector = emitScalarToVector(
3977 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3978 if (!ScalarToVector)
3979 return nullptr;
3980 InsertReg = ScalarToVector->getOperand(0).getReg();
3981 }
3982
3983 MachineInstr *LaneCopyMI =
3984 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3985 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3986
3987 // Make sure that we actually constrain the initial copy.
3988 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3989 return LaneCopyMI;
3990}
3991
3992bool AArch64InstructionSelector::selectExtractElt(
3993 MachineInstr &I, MachineRegisterInfo &MRI) {
3994 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3995 "unexpected opcode!");
3996 Register DstReg = I.getOperand(0).getReg();
3997 const LLT NarrowTy = MRI.getType(DstReg);
3998 const Register SrcReg = I.getOperand(1).getReg();
3999 const LLT WideTy = MRI.getType(SrcReg);
4000 (void)WideTy;
4001 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4002 "source register size too small!");
4003 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4004
4005 // Need the lane index to determine the correct copy opcode.
4006 MachineOperand &LaneIdxOp = I.getOperand(2);
4007 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4008
4009 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4010 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4011 return false;
4012 }
4013
4014 // Find the index to extract from.
4015 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4016 if (!VRegAndVal)
4017 return false;
4018 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4019
4020
4021 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4022 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4023 LaneIdx, MIB);
4024 if (!Extract)
4025 return false;
4026
4027 I.eraseFromParent();
4028 return true;
4029}
4030
4031bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4032 MachineInstr &I, MachineRegisterInfo &MRI) {
4033 unsigned NumElts = I.getNumOperands() - 1;
4034 Register SrcReg = I.getOperand(NumElts).getReg();
4035 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4036 const LLT SrcTy = MRI.getType(SrcReg);
4037
4038 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4039 if (SrcTy.getSizeInBits() > 128) {
4040 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4041 return false;
4042 }
4043
4044 // We implement a split vector operation by treating the sub-vectors as
4045 // scalars and extracting them.
4046 const RegisterBank &DstRB =
4047 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4048 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4049 Register Dst = I.getOperand(OpIdx).getReg();
4050 MachineInstr *Extract =
4051 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4052 if (!Extract)
4053 return false;
4054 }
4055 I.eraseFromParent();
4056 return true;
4057}
4058
4059bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4060 MachineRegisterInfo &MRI) {
4061 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4062 "unexpected opcode");
4063
4064 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4065 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4066 AArch64::FPRRegBankID ||
4067 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4068 AArch64::FPRRegBankID) {
4069 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4070 "currently unsupported.\n");
4071 return false;
4072 }
4073
4074 // The last operand is the vector source register, and every other operand is
4075 // a register to unpack into.
4076 unsigned NumElts = I.getNumOperands() - 1;
4077 Register SrcReg = I.getOperand(NumElts).getReg();
4078 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4079 const LLT WideTy = MRI.getType(SrcReg);
4080
4081 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4082 "source register size too small!");
4083
4084 if (!NarrowTy.isScalar())
4085 return selectSplitVectorUnmerge(I, MRI);
4086
4087 // Choose a lane copy opcode and subregister based off of the size of the
4088 // vector's elements.
4089 unsigned CopyOpc = 0;
4090 unsigned ExtractSubReg = 0;
4091 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4092 return false;
4093
4094 // Set up for the lane copies.
4095 MachineBasicBlock &MBB = *I.getParent();
4096
4097 // Stores the registers we'll be copying from.
4098 SmallVector<Register, 4> InsertRegs;
4099
4100 // We'll use the first register twice, so we only need NumElts-1 registers.
4101 unsigned NumInsertRegs = NumElts - 1;
4102
4103 // If our elements fit into exactly 128 bits, then we can copy from the source
4104 // directly. Otherwise, we need to do a bit of setup with some subregister
4105 // inserts.
4106 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4107 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4108 } else {
4109 // No. We have to perform subregister inserts. For each insert, create an
4110 // implicit def and a subregister insert, and save the register we create.
4111 // For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4112 unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4113 : NarrowTy.getSizeInBits();
4114 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4115 LLT::fixed_vector(NumElts, EltSize), *RBI.getRegBank(SrcReg, MRI, TRI));
4116 unsigned SubReg = 0;
4117 bool Found = getSubRegForClass(RC, TRI, SubReg);
4118 (void)Found;
4119 assert(Found && "expected to find last operand's subeg idx");
4120 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4121 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4122 MachineInstr &ImpDefMI =
4123 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4124 ImpDefReg);
4125
4126 // Now, create the subregister insert from SrcReg.
4127 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4128 MachineInstr &InsMI =
4129 *BuildMI(MBB, I, I.getDebugLoc(),
4130 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4131 .addUse(ImpDefReg)
4132 .addUse(SrcReg)
4133 .addImm(SubReg);
4134
4135 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4137
4138 // Save the register so that we can copy from it after.
4139 InsertRegs.push_back(InsertReg);
4140 }
4141 }
4142
4143 // Now that we've created any necessary subregister inserts, we can
4144 // create the copies.
4145 //
4146 // Perform the first copy separately as a subregister copy.
4147 Register CopyTo = I.getOperand(0).getReg();
4148 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4149 .addReg(InsertRegs[0], {}, ExtractSubReg);
4150 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4151
4152 // Now, perform the remaining copies as vector lane copies.
4153 unsigned LaneIdx = 1;
4154 for (Register InsReg : InsertRegs) {
4155 Register CopyTo = I.getOperand(LaneIdx).getReg();
4156 MachineInstr &CopyInst =
4157 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4158 .addUse(InsReg)
4159 .addImm(LaneIdx);
4160 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4161 ++LaneIdx;
4162 }
4163
4164 // Separately constrain the first copy's destination. Because of the
4165 // limitation in constrainOperandRegClass, we can't guarantee that this will
4166 // actually be constrained. So, do it ourselves using the second operand.
4167 const TargetRegisterClass *RC =
4168 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4169 if (!RC) {
4170 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4171 return false;
4172 }
4173
4174 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4175 I.eraseFromParent();
4176 return true;
4177}
4178
4179bool AArch64InstructionSelector::selectConcatVectors(
4180 MachineInstr &I, MachineRegisterInfo &MRI) {
4181 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4182 "Unexpected opcode");
4183 Register Dst = I.getOperand(0).getReg();
4184 Register Op1 = I.getOperand(1).getReg();
4185 Register Op2 = I.getOperand(2).getReg();
4186 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4187 if (!ConcatMI)
4188 return false;
4189 I.eraseFromParent();
4190 return true;
4191}
4192
4193unsigned
4194AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4195 MachineFunction &MF) const {
4196 Type *CPTy = CPVal->getType();
4197 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4198
4199 MachineConstantPool *MCP = MF.getConstantPool();
4200 return MCP->getConstantPoolIndex(CPVal, Alignment);
4201}
4202
4203MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4204 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4205 const TargetRegisterClass *RC;
4206 unsigned Opc;
4207 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4208 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4209 switch (Size) {
4210 case 16:
4211 RC = &AArch64::FPR128RegClass;
4212 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4213 break;
4214 case 8:
4215 RC = &AArch64::FPR64RegClass;
4216 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4217 break;
4218 case 4:
4219 RC = &AArch64::FPR32RegClass;
4220 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4221 break;
4222 case 2:
4223 RC = &AArch64::FPR16RegClass;
4224 Opc = AArch64::LDRHui;
4225 break;
4226 default:
4227 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4228 << *CPVal->getType());
4229 return nullptr;
4230 }
4231
4232 MachineInstr *LoadMI = nullptr;
4233 auto &MF = MIRBuilder.getMF();
4234 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4235 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4236 // Use load(literal) for tiny code model.
4237 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4238 } else {
4239 auto Adrp =
4240 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4241 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4242
4243 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4244 .addConstantPoolIndex(
4246
4248 }
4249
4250 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4251 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4253 Size, Align(Size)));
4255 return LoadMI;
4256}
4257
4258/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4259/// size and RB.
4260static std::pair<unsigned, unsigned>
4261getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4262 unsigned Opc, SubregIdx;
4263 if (RB.getID() == AArch64::GPRRegBankID) {
4264 if (EltSize == 8) {
4265 Opc = AArch64::INSvi8gpr;
4266 SubregIdx = AArch64::bsub;
4267 } else if (EltSize == 16) {
4268 Opc = AArch64::INSvi16gpr;
4269 SubregIdx = AArch64::ssub;
4270 } else if (EltSize == 32) {
4271 Opc = AArch64::INSvi32gpr;
4272 SubregIdx = AArch64::ssub;
4273 } else if (EltSize == 64) {
4274 Opc = AArch64::INSvi64gpr;
4275 SubregIdx = AArch64::dsub;
4276 } else {
4277 llvm_unreachable("invalid elt size!");
4278 }
4279 } else {
4280 if (EltSize == 8) {
4281 Opc = AArch64::INSvi8lane;
4282 SubregIdx = AArch64::bsub;
4283 } else if (EltSize == 16) {
4284 Opc = AArch64::INSvi16lane;
4285 SubregIdx = AArch64::hsub;
4286 } else if (EltSize == 32) {
4287 Opc = AArch64::INSvi32lane;
4288 SubregIdx = AArch64::ssub;
4289 } else if (EltSize == 64) {
4290 Opc = AArch64::INSvi64lane;
4291 SubregIdx = AArch64::dsub;
4292 } else {
4293 llvm_unreachable("invalid elt size!");
4294 }
4295 }
4296 return std::make_pair(Opc, SubregIdx);
4297}
4298
4299MachineInstr *AArch64InstructionSelector::emitInstr(
4300 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4301 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4302 const ComplexRendererFns &RenderFns) const {
4303 assert(Opcode && "Expected an opcode?");
4304 assert(!isPreISelGenericOpcode(Opcode) &&
4305 "Function should only be used to produce selected instructions!");
4306 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4307 if (RenderFns)
4308 for (auto &Fn : *RenderFns)
4309 Fn(MI);
4311 return &*MI;
4312}
4313
4314MachineInstr *AArch64InstructionSelector::emitAddSub(
4315 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4316 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4317 MachineIRBuilder &MIRBuilder) const {
4318 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4319 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4320 auto Ty = MRI.getType(LHS.getReg());
4321 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4322 unsigned Size = Ty.getSizeInBits();
4323 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4324 bool Is32Bit = Size == 32;
4325
4326 // INSTRri form with positive arithmetic immediate.
4327 if (auto Fns = selectArithImmed(RHS))
4328 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4329 MIRBuilder, Fns);
4330
4331 // INSTRri form with negative arithmetic immediate.
4332 if (auto Fns = selectNegArithImmed(RHS))
4333 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4334 MIRBuilder, Fns);
4335
4336 // INSTRrx form.
4337 if (auto Fns = selectArithExtendedRegister(RHS))
4338 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4339 MIRBuilder, Fns);
4340
4341 // INSTRrs form.
4342 if (auto Fns = selectShiftedRegister(RHS))
4343 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4344 MIRBuilder, Fns);
4345 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4346 MIRBuilder);
4347}
4348
4349MachineInstr *
4350AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4351 MachineOperand &RHS,
4352 MachineIRBuilder &MIRBuilder) const {
4353 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4354 {{AArch64::ADDXri, AArch64::ADDWri},
4355 {AArch64::ADDXrs, AArch64::ADDWrs},
4356 {AArch64::ADDXrr, AArch64::ADDWrr},
4357 {AArch64::SUBXri, AArch64::SUBWri},
4358 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4359 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4360}
4361
4362MachineInstr *
4363AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4364 MachineOperand &RHS,
4365 MachineIRBuilder &MIRBuilder) const {
4366 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4367 {{AArch64::ADDSXri, AArch64::ADDSWri},
4368 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4369 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4370 {AArch64::SUBSXri, AArch64::SUBSWri},
4371 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4372 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4373}
4374
4375MachineInstr *
4376AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4377 MachineOperand &RHS,
4378 MachineIRBuilder &MIRBuilder) const {
4379 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4380 {{AArch64::SUBSXri, AArch64::SUBSWri},
4381 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4382 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4383 {AArch64::ADDSXri, AArch64::ADDSWri},
4384 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4385 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4386}
4387
4388MachineInstr *
4389AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4390 MachineOperand &RHS,
4391 MachineIRBuilder &MIRBuilder) const {
4392 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4393 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4394 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4395 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4396 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4397}
4398
4399MachineInstr *
4400AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4401 MachineOperand &RHS,
4402 MachineIRBuilder &MIRBuilder) const {
4403 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4404 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4405 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4406 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4407 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4408}
4409
4410MachineInstr *
4411AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4412 MachineIRBuilder &MIRBuilder) const {
4413 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4414 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
4415 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4416 return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4417}
4418
4419MachineInstr *
4420AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4421 MachineIRBuilder &MIRBuilder) const {
4422 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4423 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4424 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4425 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4426}
4427
4428MachineInstr *
4429AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4430 MachineIRBuilder &MIRBuilder) const {
4431 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4432 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4433 LLT Ty = MRI.getType(LHS.getReg());
4434 unsigned RegSize = Ty.getSizeInBits();
4435 bool Is32Bit = (RegSize == 32);
4436 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4437 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4438 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4439 // ANDS needs a logical immediate for its immediate form. Check if we can
4440 // fold one in.
4441 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4442 int64_t Imm = ValAndVReg->Value.getSExtValue();
4443
4445 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4448 return &*TstMI;
4449 }
4450 }
4451
4452 if (auto Fns = selectLogicalShiftedRegister(RHS))
4453 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4454 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4455}
4456
4457MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4458 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4459 MachineIRBuilder &MIRBuilder) const {
4460 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4461 assert(Predicate.isPredicate() && "Expected predicate?");
4462 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4463 LLT CmpTy = MRI.getType(LHS.getReg());
4464 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4465 unsigned Size = CmpTy.getSizeInBits();
4466 (void)Size;
4467 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4468 // Fold the compare into a cmn or tst if possible.
4469 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4470 return FoldCmp;
4471 return emitCMP(LHS, RHS, MIRBuilder);
4472}
4473
4474MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4475 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4476 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4477#ifndef NDEBUG
4478 LLT Ty = MRI.getType(Dst);
4479 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4480 "Expected a 32-bit scalar register?");
4481#endif
4482 const Register ZReg = AArch64::WZR;
4483 AArch64CC::CondCode CC1, CC2;
4484 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4485 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4486 if (CC2 == AArch64CC::AL)
4487 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4488 MIRBuilder);
4489 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4490 Register Def1Reg = MRI.createVirtualRegister(RC);
4491 Register Def2Reg = MRI.createVirtualRegister(RC);
4492 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4493 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4494 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4495 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4497 return &*OrMI;
4498}
4499
4500MachineInstr *AArch64InstructionSelector::emitFPCompare(
4501 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4502 std::optional<CmpInst::Predicate> Pred) const {
4503 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4504 LLT Ty = MRI.getType(LHS);
4505 if (Ty.isVector())
4506 return nullptr;
4507 unsigned OpSize = Ty.getSizeInBits();
4508 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4509
4510 // If this is a compare against +0.0, then we don't have
4511 // to explicitly materialize a constant.
4512 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4513 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4514
4515 auto IsEqualityPred = [](CmpInst::Predicate P) {
4516 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4518 };
4519 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4520 // Try commuting the operands.
4521 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4522 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4523 ShouldUseImm = true;
4524 std::swap(LHS, RHS);
4525 }
4526 }
4527 unsigned CmpOpcTbl[2][3] = {
4528 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4529 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4530 unsigned CmpOpc =
4531 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4532
4533 // Partially build the compare. Decide if we need to add a use for the
4534 // third operand based off whether or not we're comparing against 0.0.
4535 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4537 if (!ShouldUseImm)
4538 CmpMI.addUse(RHS);
4540 return &*CmpMI;
4541}
4542
4543MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4544 std::optional<Register> Dst, Register Op1, Register Op2,
4545 MachineIRBuilder &MIRBuilder) const {
4546 // We implement a vector concat by:
4547 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4548 // 2. Insert the upper vector into the destination's upper element
4549 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4550 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4551
4552 const LLT Op1Ty = MRI.getType(Op1);
4553 const LLT Op2Ty = MRI.getType(Op2);
4554
4555 if (Op1Ty != Op2Ty) {
4556 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4557 return nullptr;
4558 }
4559 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4560
4561 if (Op1Ty.getSizeInBits() >= 128) {
4562 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4563 return nullptr;
4564 }
4565
4566 // At the moment we just support 64 bit vector concats.
4567 if (Op1Ty.getSizeInBits() != 64) {
4568 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4569 return nullptr;
4570 }
4571
4572 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4573 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4574 const TargetRegisterClass *DstRC =
4575 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4576
4577 MachineInstr *WidenedOp1 =
4578 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4579 MachineInstr *WidenedOp2 =
4580 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4581 if (!WidenedOp1 || !WidenedOp2) {
4582 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4583 return nullptr;
4584 }
4585
4586 // Now do the insert of the upper element.
4587 unsigned InsertOpc, InsSubRegIdx;
4588 std::tie(InsertOpc, InsSubRegIdx) =
4589 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4590
4591 if (!Dst)
4592 Dst = MRI.createVirtualRegister(DstRC);
4593 auto InsElt =
4594 MIRBuilder
4595 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4596 .addImm(1) /* Lane index */
4597 .addUse(WidenedOp2->getOperand(0).getReg())
4598 .addImm(0);
4600 return &*InsElt;
4601}
4602
4603MachineInstr *
4604AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4605 Register Src2, AArch64CC::CondCode Pred,
4606 MachineIRBuilder &MIRBuilder) const {
4607 auto &MRI = *MIRBuilder.getMRI();
4608 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4609 // If we used a register class, then this won't necessarily have an LLT.
4610 // Compute the size based off whether or not we have a class or bank.
4611 unsigned Size;
4612 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4613 Size = TRI.getRegSizeInBits(*RC);
4614 else
4615 Size = MRI.getType(Dst).getSizeInBits();
4616 // Some opcodes use s1.
4617 assert(Size <= 64 && "Expected 64 bits or less only!");
4618 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4619 unsigned Opc = OpcTable[Size == 64];
4620 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4622 return &*CSINC;
4623}
4624
4625MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4626 Register CarryReg) {
4627 MachineRegisterInfo *MRI = MIB.getMRI();
4628 unsigned Opcode = I.getOpcode();
4629
4630 // If the instruction is a SUB, we need to negate the carry,
4631 // because borrowing is indicated by carry-flag == 0.
4632 bool NeedsNegatedCarry =
4633 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4634
4635 // If the previous instruction will already produce the correct carry, do not
4636 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4637 // generated during legalization of wide add/sub. This optimization depends on
4638 // these sequences not being interrupted by other instructions.
4639 // We have to select the previous instruction before the carry-using
4640 // instruction is deleted by the calling function, otherwise the previous
4641 // instruction might become dead and would get deleted.
4642 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4643 if (SrcMI == I.getPrevNode()) {
4644 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4645 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4646 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4647 CarrySrcMI->isUnsigned() &&
4648 CarrySrcMI->getCarryOutReg() == CarryReg &&
4649 selectAndRestoreState(*SrcMI))
4650 return nullptr;
4651 }
4652 }
4653
4654 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4655
4656 if (NeedsNegatedCarry) {
4657 // (0 - Carry) sets !C in NZCV when Carry == 1
4658 Register ZReg = AArch64::WZR;
4659 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4660 }
4661
4662 // (Carry - 1) sets !C in NZCV when Carry == 0
4663 auto Fns = select12BitValueWithLeftShift(1);
4664 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4665}
4666
4667bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4668 MachineRegisterInfo &MRI) {
4669 auto &CarryMI = cast<GAddSubCarryOut>(I);
4670
4671 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4672 // Set NZCV carry according to carry-in VReg
4673 emitCarryIn(I, CarryInMI->getCarryInReg());
4674 }
4675
4676 // Emit the operation and get the correct condition code.
4677 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4678 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4679
4680 Register CarryOutReg = CarryMI.getCarryOutReg();
4681
4682 // Don't convert carry-out to VReg if it is never used
4683 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4684 // Now, put the overflow result in the register given by the first operand
4685 // to the overflow op. CSINC increments the result when the predicate is
4686 // false, so to get the increment when it's true, we need to use the
4687 // inverse. In this case, we want to increment when carry is set.
4688 Register ZReg = AArch64::WZR;
4689 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4690 getInvertedCondCode(OpAndCC.second), MIB);
4691 }
4692
4693 I.eraseFromParent();
4694 return true;
4695}
4696
4697std::pair<MachineInstr *, AArch64CC::CondCode>
4698AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4699 MachineOperand &LHS,
4700 MachineOperand &RHS,
4701 MachineIRBuilder &MIRBuilder) const {
4702 switch (Opcode) {
4703 default:
4704 llvm_unreachable("Unexpected opcode!");
4705 case TargetOpcode::G_SADDO:
4706 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4707 case TargetOpcode::G_UADDO:
4708 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4709 case TargetOpcode::G_SSUBO:
4710 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4711 case TargetOpcode::G_USUBO:
4712 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4713 case TargetOpcode::G_SADDE:
4714 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4715 case TargetOpcode::G_UADDE:
4716 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4717 case TargetOpcode::G_SSUBE:
4718 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4719 case TargetOpcode::G_USUBE:
4720 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4721 }
4722}
4723
4724/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4725/// expressed as a conjunction.
4726/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4727/// changing the conditions on the CMP tests.
4728/// (this means we can call emitConjunctionRec() with
4729/// Negate==true on this sub-tree)
4730/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4731/// cannot do the negation naturally. We are required to
4732/// emit the subtree first in this case.
4733/// \param WillNegate Is true if are called when the result of this
4734/// subexpression must be negated. This happens when the
4735/// outer expression is an OR. We can use this fact to know
4736/// that we have a double negation (or (or ...) ...) that
4737/// can be implemented for free.
4738static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4739 bool WillNegate, MachineRegisterInfo &MRI,
4740 unsigned Depth = 0) {
4741 if (!MRI.hasOneNonDBGUse(Val))
4742 return false;
4743 MachineInstr *ValDef = MRI.getVRegDef(Val);
4744 unsigned Opcode = ValDef->getOpcode();
4745 if (isa<GAnyCmp>(ValDef)) {
4746 CanNegate = true;
4747 MustBeFirst = false;
4748 return true;
4749 }
4750 // Protect against exponential runtime and stack overflow.
4751 if (Depth > 6)
4752 return false;
4753 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4754 bool IsOR = Opcode == TargetOpcode::G_OR;
4755 Register O0 = ValDef->getOperand(1).getReg();
4756 Register O1 = ValDef->getOperand(2).getReg();
4757 bool CanNegateL;
4758 bool MustBeFirstL;
4759 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4760 return false;
4761 bool CanNegateR;
4762 bool MustBeFirstR;
4763 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4764 return false;
4765
4766 if (MustBeFirstL && MustBeFirstR)
4767 return false;
4768
4769 if (IsOR) {
4770 // For an OR expression we need to be able to naturally negate at least
4771 // one side or we cannot do the transformation at all.
4772 if (!CanNegateL && !CanNegateR)
4773 return false;
4774 // If we the result of the OR will be negated and we can naturally negate
4775 // the leaves, then this sub-tree as a whole negates naturally.
4776 CanNegate = WillNegate && CanNegateL && CanNegateR;
4777 // If we cannot naturally negate the whole sub-tree, then this must be
4778 // emitted first.
4779 MustBeFirst = !CanNegate;
4780 } else {
4781 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4782 // We cannot naturally negate an AND operation.
4783 CanNegate = false;
4784 MustBeFirst = MustBeFirstL || MustBeFirstR;
4785 }
4786 return true;
4787 }
4788 return false;
4789}
4790
4791MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4794 MachineIRBuilder &MIB) const {
4795 auto &MRI = *MIB.getMRI();
4796 LLT OpTy = MRI.getType(LHS);
4797 unsigned CCmpOpc;
4798 std::optional<ValueAndVReg> C;
4799 if (CmpInst::isIntPredicate(CC)) {
4800 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4802 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4803 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4804 else if (C->Value.ule(31))
4805 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4806 else
4807 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4808 } else {
4809 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4810 OpTy.getSizeInBits() == 64);
4811 switch (OpTy.getSizeInBits()) {
4812 case 16:
4813 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4814 CCmpOpc = AArch64::FCCMPHrr;
4815 break;
4816 case 32:
4817 CCmpOpc = AArch64::FCCMPSrr;
4818 break;
4819 case 64:
4820 CCmpOpc = AArch64::FCCMPDrr;
4821 break;
4822 default:
4823 return nullptr;
4824 }
4825 }
4827 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4828 auto CCmp =
4829 MIB.buildInstr(CCmpOpc, {}, {LHS});
4830 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4831 CCmp.addImm(C->Value.getZExtValue());
4832 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4833 CCmp.addImm(C->Value.abs().getZExtValue());
4834 else
4835 CCmp.addReg(RHS);
4836 CCmp.addImm(NZCV).addImm(Predicate);
4838 return &*CCmp;
4839}
4840
4841MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4842 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4843 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4844 // We're at a tree leaf, produce a conditional comparison operation.
4845 auto &MRI = *MIB.getMRI();
4846 MachineInstr *ValDef = MRI.getVRegDef(Val);
4847 unsigned Opcode = ValDef->getOpcode();
4848 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4849 Register LHS = Cmp->getLHSReg();
4850 Register RHS = Cmp->getRHSReg();
4851 CmpInst::Predicate CC = Cmp->getCond();
4852 if (Negate)
4854 if (isa<GICmp>(Cmp)) {
4855 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4856 } else {
4857 // Handle special FP cases.
4858 AArch64CC::CondCode ExtraCC;
4859 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4860 // Some floating point conditions can't be tested with a single condition
4861 // code. Construct an additional comparison in this case.
4862 if (ExtraCC != AArch64CC::AL) {
4863 MachineInstr *ExtraCmp;
4864 if (!CCOp)
4865 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4866 else
4867 ExtraCmp =
4868 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4869 CCOp = ExtraCmp->getOperand(0).getReg();
4870 Predicate = ExtraCC;
4871 }
4872 }
4873
4874 // Produce a normal comparison if we are first in the chain
4875 if (!CCOp) {
4876 if (isa<GICmp>(Cmp))
4877 return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4878 return emitFPCompare(Cmp->getOperand(2).getReg(),
4879 Cmp->getOperand(3).getReg(), MIB);
4880 }
4881 // Otherwise produce a ccmp.
4882 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4883 }
4884 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4885
4886 bool IsOR = Opcode == TargetOpcode::G_OR;
4887
4888 Register LHS = ValDef->getOperand(1).getReg();
4889 bool CanNegateL;
4890 bool MustBeFirstL;
4891 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4892 assert(ValidL && "Valid conjunction/disjunction tree");
4893 (void)ValidL;
4894
4895 Register RHS = ValDef->getOperand(2).getReg();
4896 bool CanNegateR;
4897 bool MustBeFirstR;
4898 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4899 assert(ValidR && "Valid conjunction/disjunction tree");
4900 (void)ValidR;
4901
4902 // Swap sub-tree that must come first to the right side.
4903 if (MustBeFirstL) {
4904 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4905 std::swap(LHS, RHS);
4906 std::swap(CanNegateL, CanNegateR);
4907 std::swap(MustBeFirstL, MustBeFirstR);
4908 }
4909
4910 bool NegateR;
4911 bool NegateAfterR;
4912 bool NegateL;
4913 bool NegateAfterAll;
4914 if (Opcode == TargetOpcode::G_OR) {
4915 // Swap the sub-tree that we can negate naturally to the left.
4916 if (!CanNegateL) {
4917 assert(CanNegateR && "at least one side must be negatable");
4918 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4919 assert(!Negate);
4920 std::swap(LHS, RHS);
4921 NegateR = false;
4922 NegateAfterR = true;
4923 } else {
4924 // Negate the left sub-tree if possible, otherwise negate the result.
4925 NegateR = CanNegateR;
4926 NegateAfterR = !CanNegateR;
4927 }
4928 NegateL = true;
4929 NegateAfterAll = !Negate;
4930 } else {
4931 assert(Opcode == TargetOpcode::G_AND &&
4932 "Valid conjunction/disjunction tree");
4933 assert(!Negate && "Valid conjunction/disjunction tree");
4934
4935 NegateL = false;
4936 NegateR = false;
4937 NegateAfterR = false;
4938 NegateAfterAll = false;
4939 }
4940
4941 // Emit sub-trees.
4942 AArch64CC::CondCode RHSCC;
4943 MachineInstr *CmpR =
4944 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4945 if (NegateAfterR)
4946 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4947 MachineInstr *CmpL = emitConjunctionRec(
4948 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4949 if (NegateAfterAll)
4950 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4951 return CmpL;
4952}
4953
4954MachineInstr *AArch64InstructionSelector::emitConjunction(
4955 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4956 bool DummyCanNegate;
4957 bool DummyMustBeFirst;
4958 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4959 *MIB.getMRI()))
4960 return nullptr;
4961 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4962}
4963
4964bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4965 MachineInstr &CondMI) {
4966 AArch64CC::CondCode AArch64CC;
4967 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4968 if (!ConjMI)
4969 return false;
4970
4971 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4972 SelI.eraseFromParent();
4973 return true;
4974}
4975
4976bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4977 MachineRegisterInfo &MRI = *MIB.getMRI();
4978 // We want to recognize this pattern:
4979 //
4980 // $z = G_FCMP pred, $x, $y
4981 // ...
4982 // $w = G_SELECT $z, $a, $b
4983 //
4984 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4985 // some copies/truncs in between.)
4986 //
4987 // If we see this, then we can emit something like this:
4988 //
4989 // fcmp $x, $y
4990 // fcsel $w, $a, $b, pred
4991 //
4992 // Rather than emitting both of the rather long sequences in the standard
4993 // G_FCMP/G_SELECT select methods.
4994
4995 // First, check if the condition is defined by a compare.
4996 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4997
4998 // We can only fold if all of the defs have one use.
4999 Register CondDefReg = CondDef->getOperand(0).getReg();
5000 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5001 // Unless it's another select.
5002 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5003 if (CondDef == &UI)
5004 continue;
5005 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5006 return false;
5007 }
5008 }
5009
5010 // Is the condition defined by a compare?
5011 unsigned CondOpc = CondDef->getOpcode();
5012 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5013 if (tryOptSelectConjunction(I, *CondDef))
5014 return true;
5015 return false;
5016 }
5017
5019 if (CondOpc == TargetOpcode::G_ICMP) {
5020 auto &PredOp = CondDef->getOperand(1);
5021 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5022 MIB);
5023 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5024 CondCode =
5025 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5026 } else {
5027 // Get the condition code for the select.
5028 auto Pred =
5029 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5030 AArch64CC::CondCode CondCode2;
5031 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5032
5033 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5034 // instructions to emit the comparison.
5035 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5036 // unnecessary.
5037 if (CondCode2 != AArch64CC::AL)
5038 return false;
5039
5040 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5041 CondDef->getOperand(3).getReg(), MIB)) {
5042 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5043 return false;
5044 }
5045 }
5046
5047 // Emit the select.
5048 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5049 I.getOperand(3).getReg(), CondCode, MIB);
5050 I.eraseFromParent();
5051 return true;
5052}
5053
5054MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5055 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5056 MachineIRBuilder &MIRBuilder) const {
5057 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5058 "Unexpected MachineOperand");
5059 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5060 // We want to find this sort of thing:
5061 // x = G_SUB 0, y
5062 // G_ICMP z, x
5063 //
5064 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5065 // e.g:
5066 //
5067 // cmn z, y
5068
5069 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5070 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5071 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5072 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5073
5074 // Given this:
5075 //
5076 // x = G_SUB 0, y
5077 // G_ICMP z, x
5078 //
5079 // Produce this:
5080 //
5081 // cmn z, y
5082 if (isCMN(RHSDef, P, MRI))
5083 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5084
5085 // Same idea here, but with the LHS of the compare instead:
5086 //
5087 // Given this:
5088 //
5089 // x = G_SUB 0, y
5090 // G_ICMP x, z
5091 //
5092 // Produce this:
5093 //
5094 // cmn y, z
5095 //
5096 // But be careful! We need to swap the predicate!
5097 if (isCMN(LHSDef, P, MRI)) {
5098 if (!CmpInst::isEquality(P)) {
5101 }
5102 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5103 }
5104
5105 // Given this:
5106 //
5107 // z = G_AND x, y
5108 // G_ICMP z, 0
5109 //
5110 // Produce this if the compare is signed:
5111 //
5112 // tst x, y
5113 if (!CmpInst::isUnsigned(P) && LHSDef &&
5114 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5115 // Make sure that the RHS is 0.
5116 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5117 if (!ValAndVReg || ValAndVReg->Value != 0)
5118 return nullptr;
5119
5120 return emitTST(LHSDef->getOperand(1),
5121 LHSDef->getOperand(2), MIRBuilder);
5122 }
5123
5124 return nullptr;
5125}
5126
5127bool AArch64InstructionSelector::selectShuffleVector(
5128 MachineInstr &I, MachineRegisterInfo &MRI) {
5129 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5130 Register Src1Reg = I.getOperand(1).getReg();
5131 Register Src2Reg = I.getOperand(2).getReg();
5132 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5133
5134 MachineBasicBlock &MBB = *I.getParent();
5135 MachineFunction &MF = *MBB.getParent();
5136 LLVMContext &Ctx = MF.getFunction().getContext();
5137
5138 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5139
5141 for (int Val : Mask) {
5142 // For now, any undef indexes we'll just assume to be 0. This should be
5143 // optimized in future, e.g. to select DUP etc.
5144 Val = Val < 0 ? 0 : Val;
5145 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5146 unsigned Offset = Byte + Val * BytesPerElt;
5147 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5148 }
5149 }
5150
5151 // Use a constant pool to load the index vector for TBL.
5152 Constant *CPVal = ConstantVector::get(CstIdxs);
5153 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5154 if (!IndexLoad) {
5155 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5156 return false;
5157 }
5158
5159 if (DstTy.getSizeInBits() != 128) {
5160 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5161 // This case can be done with TBL1.
5162 MachineInstr *Concat =
5163 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5164 if (!Concat) {
5165 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5166 return false;
5167 }
5168
5169 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5170 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5171 IndexLoad->getOperand(0).getReg(), MIB);
5172
5173 auto TBL1 = MIB.buildInstr(
5174 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5175 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5177
5178 auto Copy =
5179 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5180 .addReg(TBL1.getReg(0), {}, AArch64::dsub);
5181 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5182 I.eraseFromParent();
5183 return true;
5184 }
5185
5186 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5187 // Q registers for regalloc.
5188 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5189 auto RegSeq = createQTuple(Regs, MIB);
5190 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5191 {RegSeq, IndexLoad->getOperand(0)});
5193 I.eraseFromParent();
5194 return true;
5195}
5196
5197MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5198 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5199 unsigned LaneIdx, const RegisterBank &RB,
5200 MachineIRBuilder &MIRBuilder) const {
5201 MachineInstr *InsElt = nullptr;
5202 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5203 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5204
5205 // Create a register to define with the insert if one wasn't passed in.
5206 if (!DstReg)
5207 DstReg = MRI.createVirtualRegister(DstRC);
5208
5209 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5210 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5211
5212 if (RB.getID() == AArch64::FPRRegBankID) {
5213 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5214 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5215 .addImm(LaneIdx)
5216 .addUse(InsSub->getOperand(0).getReg())
5217 .addImm(0);
5218 } else {
5219 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5220 .addImm(LaneIdx)
5221 .addUse(EltReg);
5222 }
5223
5225 return InsElt;
5226}
5227
5228bool AArch64InstructionSelector::selectUSMovFromExtend(
5229 MachineInstr &MI, MachineRegisterInfo &MRI) {
5230 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5231 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5232 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5233 return false;
5234 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5235 const Register DefReg = MI.getOperand(0).getReg();
5236 const LLT DstTy = MRI.getType(DefReg);
5237 unsigned DstSize = DstTy.getSizeInBits();
5238
5239 if (DstSize != 32 && DstSize != 64)
5240 return false;
5241
5242 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5243 MI.getOperand(1).getReg(), MRI);
5244 int64_t Lane;
5245 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5246 return false;
5247 Register Src0 = Extract->getOperand(1).getReg();
5248
5249 const LLT VecTy = MRI.getType(Src0);
5250 if (VecTy.isScalableVector())
5251 return false;
5252
5253 if (VecTy.getSizeInBits() != 128) {
5254 const MachineInstr *ScalarToVector = emitScalarToVector(
5255 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5256 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5257 Src0 = ScalarToVector->getOperand(0).getReg();
5258 }
5259
5260 unsigned Opcode;
5261 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5262 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5263 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5264 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5265 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5266 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5267 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5268 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5269 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5270 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5271 else
5272 llvm_unreachable("Unexpected type combo for S/UMov!");
5273
5274 // We may need to generate one of these, depending on the type and sign of the
5275 // input:
5276 // DstReg = SMOV Src0, Lane;
5277 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5278 MachineInstr *ExtI = nullptr;
5279 if (DstSize == 64 && !IsSigned) {
5280 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5281 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5282 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5283 .addUse(NewReg)
5284 .addImm(AArch64::sub_32);
5285 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5286 } else
5287 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5288
5290 MI.eraseFromParent();
5291 return true;
5292}
5293
5294MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5295 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5296 unsigned int Op;
5297 if (DstSize == 128) {
5298 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5299 return nullptr;
5300 Op = AArch64::MOVIv16b_ns;
5301 } else {
5302 Op = AArch64::MOVIv8b_ns;
5303 }
5304
5305 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5306
5309 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5311 return &*Mov;
5312 }
5313 return nullptr;
5314}
5315
5316MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5317 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5318 bool Inv) {
5319
5320 unsigned int Op;
5321 if (DstSize == 128) {
5322 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5323 return nullptr;
5324 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5325 } else {
5326 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5327 }
5328
5329 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5330 uint64_t Shift;
5331
5334 Shift = 0;
5335 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5337 Shift = 8;
5338 } else
5339 return nullptr;
5340
5341 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5343 return &*Mov;
5344}
5345
5346MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5347 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5348 bool Inv) {
5349
5350 unsigned int Op;
5351 if (DstSize == 128) {
5352 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5353 return nullptr;
5354 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5355 } else {
5356 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5357 }
5358
5359 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5360 uint64_t Shift;
5361
5364 Shift = 0;
5365 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5367 Shift = 8;
5368 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5370 Shift = 16;
5371 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5373 Shift = 24;
5374 } else
5375 return nullptr;
5376
5377 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5379 return &*Mov;
5380}
5381
5382MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5383 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5384
5385 unsigned int Op;
5386 if (DstSize == 128) {
5387 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5388 return nullptr;
5389 Op = AArch64::MOVIv2d_ns;
5390 } else {
5391 Op = AArch64::MOVID;
5392 }
5393
5394 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5397 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5399 return &*Mov;
5400 }
5401 return nullptr;
5402}
5403
5404MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5405 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5406 bool Inv) {
5407
5408 unsigned int Op;
5409 if (DstSize == 128) {
5410 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5411 return nullptr;
5412 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5413 } else {
5414 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5415 }
5416
5417 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5418 uint64_t Shift;
5419
5422 Shift = 264;
5423 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5425 Shift = 272;
5426 } else
5427 return nullptr;
5428
5429 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5431 return &*Mov;
5432}
5433
5434MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5435 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5436
5437 unsigned int Op;
5438 bool IsWide = false;
5439 if (DstSize == 128) {
5440 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5441 return nullptr;
5442 Op = AArch64::FMOVv4f32_ns;
5443 IsWide = true;
5444 } else {
5445 Op = AArch64::FMOVv2f32_ns;
5446 }
5447
5448 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5449
5452 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5454 Op = AArch64::FMOVv2f64_ns;
5455 } else
5456 return nullptr;
5457
5458 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5460 return &*Mov;
5461}
5462
5463bool AArch64InstructionSelector::selectIndexedExtLoad(
5464 MachineInstr &MI, MachineRegisterInfo &MRI) {
5465 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5466 Register Dst = ExtLd.getDstReg();
5467 Register WriteBack = ExtLd.getWritebackReg();
5468 Register Base = ExtLd.getBaseReg();
5469 Register Offset = ExtLd.getOffsetReg();
5470 LLT Ty = MRI.getType(Dst);
5471 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5472 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5473 bool IsPre = ExtLd.isPre();
5474 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5475 unsigned InsertIntoSubReg = 0;
5476 bool IsDst64 = Ty.getSizeInBits() == 64;
5477
5478 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5479 // long as they are scalar.
5480 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5481 if ((IsSExt && IsFPR) || Ty.isVector())
5482 return false;
5483
5484 unsigned Opc = 0;
5485 LLT NewLdDstTy;
5486 LLT s32 = LLT::scalar(32);
5487 LLT s64 = LLT::scalar(64);
5488
5489 if (MemSizeBits == 8) {
5490 if (IsSExt) {
5491 if (IsDst64)
5492 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5493 else
5494 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5495 NewLdDstTy = IsDst64 ? s64 : s32;
5496 } else if (IsFPR) {
5497 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5498 InsertIntoSubReg = AArch64::bsub;
5499 NewLdDstTy = LLT::scalar(MemSizeBits);
5500 } else {
5501 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5502 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5503 NewLdDstTy = s32;
5504 }
5505 } else if (MemSizeBits == 16) {
5506 if (IsSExt) {
5507 if (IsDst64)
5508 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5509 else
5510 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5511 NewLdDstTy = IsDst64 ? s64 : s32;
5512 } else if (IsFPR) {
5513 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5514 InsertIntoSubReg = AArch64::hsub;
5515 NewLdDstTy = LLT::scalar(MemSizeBits);
5516 } else {
5517 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5518 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5519 NewLdDstTy = s32;
5520 }
5521 } else if (MemSizeBits == 32) {
5522 if (IsSExt) {
5523 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5524 NewLdDstTy = s64;
5525 } else if (IsFPR) {
5526 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5527 InsertIntoSubReg = AArch64::ssub;
5528 NewLdDstTy = LLT::scalar(MemSizeBits);
5529 } else {
5530 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5531 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5532 NewLdDstTy = s32;
5533 }
5534 } else {
5535 llvm_unreachable("Unexpected size for indexed load");
5536 }
5537
5538 auto Cst = getIConstantVRegVal(Offset, MRI);
5539 if (!Cst)
5540 return false; // Shouldn't happen, but just in case.
5541
5542 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5543 .addImm(Cst->getSExtValue());
5544 LdMI.cloneMemRefs(ExtLd);
5546 // Make sure to select the load with the MemTy as the dest type, and then
5547 // insert into a larger reg if needed.
5548 if (InsertIntoSubReg) {
5549 // Generate a SUBREG_TO_REG.
5550 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5551 .addUse(LdMI.getReg(1))
5552 .addImm(InsertIntoSubReg);
5554 SubToReg.getReg(0),
5555 *getRegClassForTypeOnBank(MRI.getType(Dst),
5556 *RBI.getRegBank(Dst, MRI, TRI)),
5557 MRI);
5558 } else {
5559 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5560 selectCopy(*Copy, TII, MRI, TRI, RBI);
5561 }
5562 MI.eraseFromParent();
5563
5564 return true;
5565}
5566
5567bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5568 MachineRegisterInfo &MRI) {
5569 auto &Ld = cast<GIndexedLoad>(MI);
5570 Register Dst = Ld.getDstReg();
5571 Register WriteBack = Ld.getWritebackReg();
5572 Register Base = Ld.getBaseReg();
5573 Register Offset = Ld.getOffsetReg();
5574 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5575 "Unexpected type for indexed load");
5576 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5577
5578 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5579 return selectIndexedExtLoad(MI, MRI);
5580
5581 unsigned Opc = 0;
5582 if (Ld.isPre()) {
5583 static constexpr unsigned GPROpcodes[] = {
5584 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5585 AArch64::LDRXpre};
5586 static constexpr unsigned FPROpcodes[] = {
5587 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5588 AArch64::LDRQpre};
5589 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5590 ? FPROpcodes[Log2_32(MemSize)]
5591 : GPROpcodes[Log2_32(MemSize)];
5592 ;
5593 } else {
5594 static constexpr unsigned GPROpcodes[] = {
5595 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5596 AArch64::LDRXpost};
5597 static constexpr unsigned FPROpcodes[] = {
5598 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5599 AArch64::LDRDpost, AArch64::LDRQpost};
5600 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5601 ? FPROpcodes[Log2_32(MemSize)]
5602 : GPROpcodes[Log2_32(MemSize)];
5603 ;
5604 }
5605 auto Cst = getIConstantVRegVal(Offset, MRI);
5606 if (!Cst)
5607 return false; // Shouldn't happen, but just in case.
5608 auto LdMI =
5609 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5610 LdMI.cloneMemRefs(Ld);
5612 MI.eraseFromParent();
5613 return true;
5614}
5615
5616bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5617 MachineRegisterInfo &MRI) {
5618 Register Dst = I.getWritebackReg();
5619 Register Val = I.getValueReg();
5620 Register Base = I.getBaseReg();
5621 Register Offset = I.getOffsetReg();
5622 assert(MRI.getType(Val).getSizeInBits() <= 128 &&
5623 "Unexpected type for indexed store");
5624
5625 LocationSize MemSize = I.getMMO().getSize();
5626 unsigned MemSizeInBytes = MemSize.getValue();
5627
5628 assert(MemSizeInBytes && MemSizeInBytes <= 16 &&
5629 "Unexpected indexed store size");
5630 unsigned MemSizeLog2 = Log2_32(MemSizeInBytes);
5631
5632 unsigned Opc = 0;
5633 if (I.isPre()) {
5634 static constexpr unsigned GPROpcodes[] = {
5635 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5636 AArch64::STRXpre};
5637 static constexpr unsigned FPROpcodes[] = {
5638 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5639 AArch64::STRQpre};
5640
5641 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5642 Opc = FPROpcodes[MemSizeLog2];
5643 else
5644 Opc = GPROpcodes[MemSizeLog2];
5645 } else {
5646 static constexpr unsigned GPROpcodes[] = {
5647 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5648 AArch64::STRXpost};
5649 static constexpr unsigned FPROpcodes[] = {
5650 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5651 AArch64::STRDpost, AArch64::STRQpost};
5652
5653 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5654 Opc = FPROpcodes[MemSizeLog2];
5655 else
5656 Opc = GPROpcodes[MemSizeLog2];
5657 }
5658
5659 auto Cst = getIConstantVRegVal(Offset, MRI);
5660 if (!Cst)
5661 return false; // Shouldn't happen, but just in case.
5662 auto Str =
5663 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5664 Str.cloneMemRefs(I);
5666 I.eraseFromParent();
5667 return true;
5668}
5669
5670MachineInstr *
5671AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5672 MachineIRBuilder &MIRBuilder,
5673 MachineRegisterInfo &MRI) {
5674 LLT DstTy = MRI.getType(Dst);
5675 unsigned DstSize = DstTy.getSizeInBits();
5676 assert((DstSize == 64 || DstSize == 128) &&
5677 "Unexpected vector constant size");
5678
5679 if (CV->isNullValue()) {
5680 if (DstSize == 128) {
5681 auto Mov =
5682 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5684 return &*Mov;
5685 }
5686
5687 if (DstSize == 64) {
5688 auto Mov =
5689 MIRBuilder
5690 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5691 .addImm(0);
5692 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5693 .addReg(Mov.getReg(0), {}, AArch64::dsub);
5694 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5695 return &*Copy;
5696 }
5697 }
5698
5699 if (Constant *SplatValue = CV->getSplatValue()) {
5700 APInt SplatValueAsInt =
5701 isa<ConstantFP>(SplatValue)
5702 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5703 : SplatValue->getUniqueInteger();
5704 APInt DefBits = APInt::getSplat(
5705 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5706 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5707 MachineInstr *NewOp;
5708 bool Inv = false;
5709 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5710 (NewOp =
5711 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5712 (NewOp =
5713 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5717 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5718 return NewOp;
5719
5720 DefBits = ~DefBits;
5721 Inv = true;
5722 if ((NewOp =
5723 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5724 (NewOp =
5725 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5726 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5727 return NewOp;
5728 return nullptr;
5729 };
5730
5731 if (auto *NewOp = TryMOVIWithBits(DefBits))
5732 return NewOp;
5733
5734 // See if a fneg of the constant can be materialized with a MOVI, etc
5735 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5736 unsigned NegOpc) -> MachineInstr * {
5737 // FNegate each sub-element of the constant
5738 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5739 APInt NegBits(DstSize, 0);
5740 unsigned NumElts = DstSize / NumBits;
5741 for (unsigned i = 0; i < NumElts; i++)
5742 NegBits |= Neg << (NumBits * i);
5743 NegBits = DefBits ^ NegBits;
5744
5745 // Try to create the new constants with MOVI, and if so generate a fneg
5746 // for it.
5747 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5748 Register NewDst = MRI.createVirtualRegister(
5749 DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5750 NewOp->getOperand(0).setReg(NewDst);
5751 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5752 }
5753 return nullptr;
5754 };
5755 MachineInstr *R;
5756 if ((R = TryWithFNeg(DefBits, 32,
5757 DstSize == 64 ? AArch64::FNEGv2f32
5758 : AArch64::FNEGv4f32)) ||
5759 (R = TryWithFNeg(DefBits, 64,
5760 DstSize == 64 ? AArch64::FNEGDr
5761 : AArch64::FNEGv2f64)) ||
5762 (STI.hasFullFP16() &&
5763 (R = TryWithFNeg(DefBits, 16,
5764 DstSize == 64 ? AArch64::FNEGv4f16
5765 : AArch64::FNEGv8f16))))
5766 return R;
5767 }
5768
5769 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5770 if (!CPLoad) {
5771 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5772 return nullptr;
5773 }
5774
5775 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5777 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5778 return &*Copy;
5779}
5780
5781bool AArch64InstructionSelector::tryOptConstantBuildVec(
5782 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5783 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5784 unsigned DstSize = DstTy.getSizeInBits();
5785 assert(DstSize <= 128 && "Unexpected build_vec type!");
5786 if (DstSize < 32)
5787 return false;
5788 // Check if we're building a constant vector, in which case we want to
5789 // generate a constant pool load instead of a vector insert sequence.
5791 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5792 Register OpReg = I.getOperand(Idx).getReg();
5793 if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
5794 OpReg, MRI, /*LookThroughInstrs=*/true,
5795 /*LookThroughAnyExt=*/true)) {
5796 MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
5797
5798 if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
5799 Csts.emplace_back(
5800 ConstantInt::get(MIB.getMF().getFunction().getContext(),
5801 std::move(AnyConst->Value)));
5802 continue;
5803 }
5804
5805 if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
5806 Csts.emplace_back(
5807 const_cast<ConstantFP *>(DefMI->getOperand(1).getFPImm()));
5808 continue;
5809 }
5810 }
5811 return false;
5812 }
5813 Constant *CV = ConstantVector::get(Csts);
5814 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5815 return false;
5816 I.eraseFromParent();
5817 return true;
5818}
5819
5820bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5821 MachineInstr &I, MachineRegisterInfo &MRI) {
5822 // Given:
5823 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5824 //
5825 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5826 Register Dst = I.getOperand(0).getReg();
5827 Register EltReg = I.getOperand(1).getReg();
5828 LLT EltTy = MRI.getType(EltReg);
5829 // If the index isn't on the same bank as its elements, then this can't be a
5830 // SUBREG_TO_REG.
5831 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5832 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5833 if (EltRB != DstRB)
5834 return false;
5835 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5836 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5837 }))
5838 return false;
5839 unsigned SubReg;
5840 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5841 if (!EltRC)
5842 return false;
5843 const TargetRegisterClass *DstRC =
5844 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5845 if (!DstRC)
5846 return false;
5847 if (!getSubRegForClass(EltRC, TRI, SubReg))
5848 return false;
5849 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5850 .addUse(EltReg)
5851 .addImm(SubReg);
5852 I.eraseFromParent();
5853 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5854 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5855}
5856
5857bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5858 MachineRegisterInfo &MRI) {
5859 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5860 // Until we port more of the optimized selections, for now just use a vector
5861 // insert sequence.
5862 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5863 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5864 unsigned EltSize = EltTy.getSizeInBits();
5865
5866 if (tryOptConstantBuildVec(I, DstTy, MRI))
5867 return true;
5868 if (tryOptBuildVecToSubregToReg(I, MRI))
5869 return true;
5870
5871 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5872 return false; // Don't support all element types yet.
5873 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5874
5875 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5876 MachineInstr *ScalarToVec =
5877 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5878 I.getOperand(1).getReg(), MIB);
5879 if (!ScalarToVec)
5880 return false;
5881
5882 Register DstVec = ScalarToVec->getOperand(0).getReg();
5883 unsigned DstSize = DstTy.getSizeInBits();
5884
5885 // Keep track of the last MI we inserted. Later on, we might be able to save
5886 // a copy using it.
5887 MachineInstr *PrevMI = ScalarToVec;
5888 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5889 // Note that if we don't do a subregister copy, we can end up making an
5890 // extra register.
5891 Register OpReg = I.getOperand(i).getReg();
5892 // Do not emit inserts for undefs
5893 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5894 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5895 DstVec = PrevMI->getOperand(0).getReg();
5896 }
5897 }
5898
5899 // If DstTy's size in bits is less than 128, then emit a subregister copy
5900 // from DstVec to the last register we've defined.
5901 if (DstSize < 128) {
5902 // Force this to be FPR using the destination vector.
5903 const TargetRegisterClass *RC =
5904 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5905 if (!RC)
5906 return false;
5907 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5908 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5909 return false;
5910 }
5911
5912 unsigned SubReg = 0;
5913 if (!getSubRegForClass(RC, TRI, SubReg))
5914 return false;
5915 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5916 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5917 << "\n");
5918 return false;
5919 }
5920
5922 Register DstReg = I.getOperand(0).getReg();
5923
5924 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, {}, SubReg);
5925 MachineOperand &RegOp = I.getOperand(1);
5926 RegOp.setReg(Reg);
5927 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5928 } else {
5929 // We either have a vector with all elements (except the first one) undef or
5930 // at least one non-undef non-first element. In the first case, we need to
5931 // constrain the output register ourselves as we may have generated an
5932 // INSERT_SUBREG operation which is a generic operation for which the
5933 // output regclass cannot be automatically chosen.
5934 //
5935 // In the second case, there is no need to do this as it may generate an
5936 // instruction like INSvi32gpr where the regclass can be automatically
5937 // chosen.
5938 //
5939 // Also, we save a copy by re-using the destination register on the final
5940 // insert.
5941 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5943
5944 Register DstReg = PrevMI->getOperand(0).getReg();
5945 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5946 const TargetRegisterClass *RC =
5947 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5948 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5949 }
5950 }
5951
5953 return true;
5954}
5955
5956bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5957 unsigned NumVecs,
5958 MachineInstr &I) {
5959 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5960 assert(Opc && "Expected an opcode?");
5961 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5962 auto &MRI = *MIB.getMRI();
5963 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5964 unsigned Size = Ty.getSizeInBits();
5965 assert((Size == 64 || Size == 128) &&
5966 "Destination must be 64 bits or 128 bits?");
5967 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5968 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5969 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5970 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5971 Load.cloneMemRefs(I);
5973 Register SelectedLoadDst = Load->getOperand(0).getReg();
5974 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5975 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5976 .addReg(SelectedLoadDst, {}, SubReg + Idx);
5977 // Emit the subreg copies and immediately select them.
5978 // FIXME: We should refactor our copy code into an emitCopy helper and
5979 // clean up uses of this pattern elsewhere in the selector.
5980 selectCopy(*Vec, TII, MRI, TRI, RBI);
5981 }
5982 return true;
5983}
5984
5985bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5986 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5987 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5988 assert(Opc && "Expected an opcode?");
5989 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5990 auto &MRI = *MIB.getMRI();
5991 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5992 bool Narrow = Ty.getSizeInBits() == 64;
5993
5994 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5995 SmallVector<Register, 4> Regs(NumVecs);
5996 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5997 [](auto MO) { return MO.getReg(); });
5998
5999 if (Narrow) {
6000 transform(Regs, Regs.begin(), [this](Register Reg) {
6001 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6002 ->getOperand(0)
6003 .getReg();
6004 });
6005 Ty = Ty.multiplyElements(2);
6006 }
6007
6008 Register Tuple = createQTuple(Regs, MIB);
6009 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6010 if (!LaneNo)
6011 return false;
6012
6013 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6014 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6015 .addReg(Tuple)
6016 .addImm(LaneNo->getZExtValue())
6017 .addReg(Ptr);
6018 Load.cloneMemRefs(I);
6020 Register SelectedLoadDst = Load->getOperand(0).getReg();
6021 unsigned SubReg = AArch64::qsub0;
6022 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6023 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6024 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6025 : DstOp(I.getOperand(Idx).getReg())},
6026 {})
6027 .addReg(SelectedLoadDst, {}, SubReg + Idx);
6028 Register WideReg = Vec.getReg(0);
6029 // Emit the subreg copies and immediately select them.
6030 selectCopy(*Vec, TII, MRI, TRI, RBI);
6031 if (Narrow &&
6032 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6033 return false;
6034 }
6035 return true;
6036}
6037
6038void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6039 unsigned NumVecs,
6040 unsigned Opc) {
6041 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6042 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6043 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6044
6045 SmallVector<Register, 2> Regs(NumVecs);
6046 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6047 Regs.begin(), [](auto MO) { return MO.getReg(); });
6048
6049 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6050 : createDTuple(Regs, MIB);
6051 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6052 Store.cloneMemRefs(I);
6054}
6055
6056bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6057 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6058 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6059 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6060 bool Narrow = Ty.getSizeInBits() == 64;
6061
6062 SmallVector<Register, 2> Regs(NumVecs);
6063 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6064 Regs.begin(), [](auto MO) { return MO.getReg(); });
6065
6066 if (Narrow)
6067 transform(Regs, Regs.begin(), [this](Register Reg) {
6068 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6069 ->getOperand(0)
6070 .getReg();
6071 });
6072
6073 Register Tuple = createQTuple(Regs, MIB);
6074
6075 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6076 if (!LaneNo)
6077 return false;
6078 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6079 auto Store = MIB.buildInstr(Opc, {}, {})
6080 .addReg(Tuple)
6081 .addImm(LaneNo->getZExtValue())
6082 .addReg(Ptr);
6083 Store.cloneMemRefs(I);
6085 return true;
6086}
6087
6088bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6089 MachineInstr &I, MachineRegisterInfo &MRI) {
6090 // Find the intrinsic ID.
6091 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6092
6093 const LLT S8 = LLT::scalar(8);
6094 const LLT S16 = LLT::scalar(16);
6095 const LLT S32 = LLT::scalar(32);
6096 const LLT S64 = LLT::scalar(64);
6097 const LLT P0 = LLT::pointer(0, 64);
6098 // Select the instruction.
6099 switch (IntrinID) {
6100 default:
6101 return false;
6102 case Intrinsic::aarch64_ldxp:
6103 case Intrinsic::aarch64_ldaxp: {
6104 auto NewI = MIB.buildInstr(
6105 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6106 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6107 {I.getOperand(3)});
6108 NewI.cloneMemRefs(I);
6110 break;
6111 }
6112 case Intrinsic::aarch64_neon_ld1x2: {
6113 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6114 unsigned Opc = 0;
6115 if (Ty == LLT::fixed_vector(8, S8))
6116 Opc = AArch64::LD1Twov8b;
6117 else if (Ty == LLT::fixed_vector(16, S8))
6118 Opc = AArch64::LD1Twov16b;
6119 else if (Ty == LLT::fixed_vector(4, S16))
6120 Opc = AArch64::LD1Twov4h;
6121 else if (Ty == LLT::fixed_vector(8, S16))
6122 Opc = AArch64::LD1Twov8h;
6123 else if (Ty == LLT::fixed_vector(2, S32))
6124 Opc = AArch64::LD1Twov2s;
6125 else if (Ty == LLT::fixed_vector(4, S32))
6126 Opc = AArch64::LD1Twov4s;
6127 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6128 Opc = AArch64::LD1Twov2d;
6129 else if (Ty == S64 || Ty == P0)
6130 Opc = AArch64::LD1Twov1d;
6131 else
6132 llvm_unreachable("Unexpected type for ld1x2!");
6133 selectVectorLoadIntrinsic(Opc, 2, I);
6134 break;
6135 }
6136 case Intrinsic::aarch64_neon_ld1x3: {
6137 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6138 unsigned Opc = 0;
6139 if (Ty == LLT::fixed_vector(8, S8))
6140 Opc = AArch64::LD1Threev8b;
6141 else if (Ty == LLT::fixed_vector(16, S8))
6142 Opc = AArch64::LD1Threev16b;
6143 else if (Ty == LLT::fixed_vector(4, S16))
6144 Opc = AArch64::LD1Threev4h;
6145 else if (Ty == LLT::fixed_vector(8, S16))
6146 Opc = AArch64::LD1Threev8h;
6147 else if (Ty == LLT::fixed_vector(2, S32))
6148 Opc = AArch64::LD1Threev2s;
6149 else if (Ty == LLT::fixed_vector(4, S32))
6150 Opc = AArch64::LD1Threev4s;
6151 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6152 Opc = AArch64::LD1Threev2d;
6153 else if (Ty == S64 || Ty == P0)
6154 Opc = AArch64::LD1Threev1d;
6155 else
6156 llvm_unreachable("Unexpected type for ld1x3!");
6157 selectVectorLoadIntrinsic(Opc, 3, I);
6158 break;
6159 }
6160 case Intrinsic::aarch64_neon_ld1x4: {
6161 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6162 unsigned Opc = 0;
6163 if (Ty == LLT::fixed_vector(8, S8))
6164 Opc = AArch64::LD1Fourv8b;
6165 else if (Ty == LLT::fixed_vector(16, S8))
6166 Opc = AArch64::LD1Fourv16b;
6167 else if (Ty == LLT::fixed_vector(4, S16))
6168 Opc = AArch64::LD1Fourv4h;
6169 else if (Ty == LLT::fixed_vector(8, S16))
6170 Opc = AArch64::LD1Fourv8h;
6171 else if (Ty == LLT::fixed_vector(2, S32))
6172 Opc = AArch64::LD1Fourv2s;
6173 else if (Ty == LLT::fixed_vector(4, S32))
6174 Opc = AArch64::LD1Fourv4s;
6175 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6176 Opc = AArch64::LD1Fourv2d;
6177 else if (Ty == S64 || Ty == P0)
6178 Opc = AArch64::LD1Fourv1d;
6179 else
6180 llvm_unreachable("Unexpected type for ld1x4!");
6181 selectVectorLoadIntrinsic(Opc, 4, I);
6182 break;
6183 }
6184 case Intrinsic::aarch64_neon_ld2: {
6185 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6186 unsigned Opc = 0;
6187 if (Ty == LLT::fixed_vector(8, S8))
6188 Opc = AArch64::LD2Twov8b;
6189 else if (Ty == LLT::fixed_vector(16, S8))
6190 Opc = AArch64::LD2Twov16b;
6191 else if (Ty == LLT::fixed_vector(4, S16))
6192 Opc = AArch64::LD2Twov4h;
6193 else if (Ty == LLT::fixed_vector(8, S16))
6194 Opc = AArch64::LD2Twov8h;
6195 else if (Ty == LLT::fixed_vector(2, S32))
6196 Opc = AArch64::LD2Twov2s;
6197 else if (Ty == LLT::fixed_vector(4, S32))
6198 Opc = AArch64::LD2Twov4s;
6199 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6200 Opc = AArch64::LD2Twov2d;
6201 else if (Ty == S64 || Ty == P0)
6202 Opc = AArch64::LD1Twov1d;
6203 else
6204 llvm_unreachable("Unexpected type for ld2!");
6205 selectVectorLoadIntrinsic(Opc, 2, I);
6206 break;
6207 }
6208 case Intrinsic::aarch64_neon_ld2lane: {
6209 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6210 unsigned Opc;
6211 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6212 Opc = AArch64::LD2i8;
6213 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6214 Opc = AArch64::LD2i16;
6215 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6216 Opc = AArch64::LD2i32;
6217 else if (Ty == LLT::fixed_vector(2, S64) ||
6218 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6219 Opc = AArch64::LD2i64;
6220 else
6221 llvm_unreachable("Unexpected type for st2lane!");
6222 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6223 return false;
6224 break;
6225 }
6226 case Intrinsic::aarch64_neon_ld2r: {
6227 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6228 unsigned Opc = 0;
6229 if (Ty == LLT::fixed_vector(8, S8))
6230 Opc = AArch64::LD2Rv8b;
6231 else if (Ty == LLT::fixed_vector(16, S8))
6232 Opc = AArch64::LD2Rv16b;
6233 else if (Ty == LLT::fixed_vector(4, S16))
6234 Opc = AArch64::LD2Rv4h;
6235 else if (Ty == LLT::fixed_vector(8, S16))
6236 Opc = AArch64::LD2Rv8h;
6237 else if (Ty == LLT::fixed_vector(2, S32))
6238 Opc = AArch64::LD2Rv2s;
6239 else if (Ty == LLT::fixed_vector(4, S32))
6240 Opc = AArch64::LD2Rv4s;
6241 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6242 Opc = AArch64::LD2Rv2d;
6243 else if (Ty == S64 || Ty == P0)
6244 Opc = AArch64::LD2Rv1d;
6245 else
6246 llvm_unreachable("Unexpected type for ld2r!");
6247 selectVectorLoadIntrinsic(Opc, 2, I);
6248 break;
6249 }
6250 case Intrinsic::aarch64_neon_ld3: {
6251 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6252 unsigned Opc = 0;
6253 if (Ty == LLT::fixed_vector(8, S8))
6254 Opc = AArch64::LD3Threev8b;
6255 else if (Ty == LLT::fixed_vector(16, S8))
6256 Opc = AArch64::LD3Threev16b;
6257 else if (Ty == LLT::fixed_vector(4, S16))
6258 Opc = AArch64::LD3Threev4h;
6259 else if (Ty == LLT::fixed_vector(8, S16))
6260 Opc = AArch64::LD3Threev8h;
6261 else if (Ty == LLT::fixed_vector(2, S32))
6262 Opc = AArch64::LD3Threev2s;
6263 else if (Ty == LLT::fixed_vector(4, S32))
6264 Opc = AArch64::LD3Threev4s;
6265 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6266 Opc = AArch64::LD3Threev2d;
6267 else if (Ty == S64 || Ty == P0)
6268 Opc = AArch64::LD1Threev1d;
6269 else
6270 llvm_unreachable("Unexpected type for ld3!");
6271 selectVectorLoadIntrinsic(Opc, 3, I);
6272 break;
6273 }
6274 case Intrinsic::aarch64_neon_ld3lane: {
6275 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6276 unsigned Opc;
6277 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6278 Opc = AArch64::LD3i8;
6279 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6280 Opc = AArch64::LD3i16;
6281 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6282 Opc = AArch64::LD3i32;
6283 else if (Ty == LLT::fixed_vector(2, S64) ||
6284 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6285 Opc = AArch64::LD3i64;
6286 else
6287 llvm_unreachable("Unexpected type for st3lane!");
6288 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6289 return false;
6290 break;
6291 }
6292 case Intrinsic::aarch64_neon_ld3r: {
6293 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6294 unsigned Opc = 0;
6295 if (Ty == LLT::fixed_vector(8, S8))
6296 Opc = AArch64::LD3Rv8b;
6297 else if (Ty == LLT::fixed_vector(16, S8))
6298 Opc = AArch64::LD3Rv16b;
6299 else if (Ty == LLT::fixed_vector(4, S16))
6300 Opc = AArch64::LD3Rv4h;
6301 else if (Ty == LLT::fixed_vector(8, S16))
6302 Opc = AArch64::LD3Rv8h;
6303 else if (Ty == LLT::fixed_vector(2, S32))
6304 Opc = AArch64::LD3Rv2s;
6305 else if (Ty == LLT::fixed_vector(4, S32))
6306 Opc = AArch64::LD3Rv4s;
6307 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6308 Opc = AArch64::LD3Rv2d;
6309 else if (Ty == S64 || Ty == P0)
6310 Opc = AArch64::LD3Rv1d;
6311 else
6312 llvm_unreachable("Unexpected type for ld3r!");
6313 selectVectorLoadIntrinsic(Opc, 3, I);
6314 break;
6315 }
6316 case Intrinsic::aarch64_neon_ld4: {
6317 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6318 unsigned Opc = 0;
6319 if (Ty == LLT::fixed_vector(8, S8))
6320 Opc = AArch64::LD4Fourv8b;
6321 else if (Ty == LLT::fixed_vector(16, S8))
6322 Opc = AArch64::LD4Fourv16b;
6323 else if (Ty == LLT::fixed_vector(4, S16))
6324 Opc = AArch64::LD4Fourv4h;
6325 else if (Ty == LLT::fixed_vector(8, S16))
6326 Opc = AArch64::LD4Fourv8h;
6327 else if (Ty == LLT::fixed_vector(2, S32))
6328 Opc = AArch64::LD4Fourv2s;
6329 else if (Ty == LLT::fixed_vector(4, S32))
6330 Opc = AArch64::LD4Fourv4s;
6331 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6332 Opc = AArch64::LD4Fourv2d;
6333 else if (Ty == S64 || Ty == P0)
6334 Opc = AArch64::LD1Fourv1d;
6335 else
6336 llvm_unreachable("Unexpected type for ld4!");
6337 selectVectorLoadIntrinsic(Opc, 4, I);
6338 break;
6339 }
6340 case Intrinsic::aarch64_neon_ld4lane: {
6341 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6342 unsigned Opc;
6343 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6344 Opc = AArch64::LD4i8;
6345 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6346 Opc = AArch64::LD4i16;
6347 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6348 Opc = AArch64::LD4i32;
6349 else if (Ty == LLT::fixed_vector(2, S64) ||
6350 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6351 Opc = AArch64::LD4i64;
6352 else
6353 llvm_unreachable("Unexpected type for st4lane!");
6354 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6355 return false;
6356 break;
6357 }
6358 case Intrinsic::aarch64_neon_ld4r: {
6359 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6360 unsigned Opc = 0;
6361 if (Ty == LLT::fixed_vector(8, S8))
6362 Opc = AArch64::LD4Rv8b;
6363 else if (Ty == LLT::fixed_vector(16, S8))
6364 Opc = AArch64::LD4Rv16b;
6365 else if (Ty == LLT::fixed_vector(4, S16))
6366 Opc = AArch64::LD4Rv4h;
6367 else if (Ty == LLT::fixed_vector(8, S16))
6368 Opc = AArch64::LD4Rv8h;
6369 else if (Ty == LLT::fixed_vector(2, S32))
6370 Opc = AArch64::LD4Rv2s;
6371 else if (Ty == LLT::fixed_vector(4, S32))
6372 Opc = AArch64::LD4Rv4s;
6373 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6374 Opc = AArch64::LD4Rv2d;
6375 else if (Ty == S64 || Ty == P0)
6376 Opc = AArch64::LD4Rv1d;
6377 else
6378 llvm_unreachable("Unexpected type for ld4r!");
6379 selectVectorLoadIntrinsic(Opc, 4, I);
6380 break;
6381 }
6382 case Intrinsic::aarch64_neon_st1x2: {
6383 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6384 unsigned Opc;
6385 if (Ty == LLT::fixed_vector(8, S8))
6386 Opc = AArch64::ST1Twov8b;
6387 else if (Ty == LLT::fixed_vector(16, S8))
6388 Opc = AArch64::ST1Twov16b;
6389 else if (Ty == LLT::fixed_vector(4, S16))
6390 Opc = AArch64::ST1Twov4h;
6391 else if (Ty == LLT::fixed_vector(8, S16))
6392 Opc = AArch64::ST1Twov8h;
6393 else if (Ty == LLT::fixed_vector(2, S32))
6394 Opc = AArch64::ST1Twov2s;
6395 else if (Ty == LLT::fixed_vector(4, S32))
6396 Opc = AArch64::ST1Twov4s;
6397 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6398 Opc = AArch64::ST1Twov2d;
6399 else if (Ty == S64 || Ty == P0)
6400 Opc = AArch64::ST1Twov1d;
6401 else
6402 llvm_unreachable("Unexpected type for st1x2!");
6403 selectVectorStoreIntrinsic(I, 2, Opc);
6404 break;
6405 }
6406 case Intrinsic::aarch64_neon_st1x3: {
6407 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6408 unsigned Opc;
6409 if (Ty == LLT::fixed_vector(8, S8))
6410 Opc = AArch64::ST1Threev8b;
6411 else if (Ty == LLT::fixed_vector(16, S8))
6412 Opc = AArch64::ST1Threev16b;
6413 else if (Ty == LLT::fixed_vector(4, S16))
6414 Opc = AArch64::ST1Threev4h;
6415 else if (Ty == LLT::fixed_vector(8, S16))
6416 Opc = AArch64::ST1Threev8h;
6417 else if (Ty == LLT::fixed_vector(2, S32))
6418 Opc = AArch64::ST1Threev2s;
6419 else if (Ty == LLT::fixed_vector(4, S32))
6420 Opc = AArch64::ST1Threev4s;
6421 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6422 Opc = AArch64::ST1Threev2d;
6423 else if (Ty == S64 || Ty == P0)
6424 Opc = AArch64::ST1Threev1d;
6425 else
6426 llvm_unreachable("Unexpected type for st1x3!");
6427 selectVectorStoreIntrinsic(I, 3, Opc);
6428 break;
6429 }
6430 case Intrinsic::aarch64_neon_st1x4: {
6431 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6432 unsigned Opc;
6433 if (Ty == LLT::fixed_vector(8, S8))
6434 Opc = AArch64::ST1Fourv8b;
6435 else if (Ty == LLT::fixed_vector(16, S8))
6436 Opc = AArch64::ST1Fourv16b;
6437 else if (Ty == LLT::fixed_vector(4, S16))
6438 Opc = AArch64::ST1Fourv4h;
6439 else if (Ty == LLT::fixed_vector(8, S16))
6440 Opc = AArch64::ST1Fourv8h;
6441 else if (Ty == LLT::fixed_vector(2, S32))
6442 Opc = AArch64::ST1Fourv2s;
6443 else if (Ty == LLT::fixed_vector(4, S32))
6444 Opc = AArch64::ST1Fourv4s;
6445 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6446 Opc = AArch64::ST1Fourv2d;
6447 else if (Ty == S64 || Ty == P0)
6448 Opc = AArch64::ST1Fourv1d;
6449 else
6450 llvm_unreachable("Unexpected type for st1x4!");
6451 selectVectorStoreIntrinsic(I, 4, Opc);
6452 break;
6453 }
6454 case Intrinsic::aarch64_neon_st2: {
6455 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6456 unsigned Opc;
6457 if (Ty == LLT::fixed_vector(8, S8))
6458 Opc = AArch64::ST2Twov8b;
6459 else if (Ty == LLT::fixed_vector(16, S8))
6460 Opc = AArch64::ST2Twov16b;
6461 else if (Ty == LLT::fixed_vector(4, S16))
6462 Opc = AArch64::ST2Twov4h;
6463 else if (Ty == LLT::fixed_vector(8, S16))
6464 Opc = AArch64::ST2Twov8h;
6465 else if (Ty == LLT::fixed_vector(2, S32))
6466 Opc = AArch64::ST2Twov2s;
6467 else if (Ty == LLT::fixed_vector(4, S32))
6468 Opc = AArch64::ST2Twov4s;
6469 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6470 Opc = AArch64::ST2Twov2d;
6471 else if (Ty == S64 || Ty == P0)
6472 Opc = AArch64::ST1Twov1d;
6473 else
6474 llvm_unreachable("Unexpected type for st2!");
6475 selectVectorStoreIntrinsic(I, 2, Opc);
6476 break;
6477 }
6478 case Intrinsic::aarch64_neon_st3: {
6479 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6480 unsigned Opc;
6481 if (Ty == LLT::fixed_vector(8, S8))
6482 Opc = AArch64::ST3Threev8b;
6483 else if (Ty == LLT::fixed_vector(16, S8))
6484 Opc = AArch64::ST3Threev16b;
6485 else if (Ty == LLT::fixed_vector(4, S16))
6486 Opc = AArch64::ST3Threev4h;
6487 else if (Ty == LLT::fixed_vector(8, S16))
6488 Opc = AArch64::ST3Threev8h;
6489 else if (Ty == LLT::fixed_vector(2, S32))
6490 Opc = AArch64::ST3Threev2s;
6491 else if (Ty == LLT::fixed_vector(4, S32))
6492 Opc = AArch64::ST3Threev4s;
6493 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6494 Opc = AArch64::ST3Threev2d;
6495 else if (Ty == S64 || Ty == P0)
6496 Opc = AArch64::ST1Threev1d;
6497 else
6498 llvm_unreachable("Unexpected type for st3!");
6499 selectVectorStoreIntrinsic(I, 3, Opc);
6500 break;
6501 }
6502 case Intrinsic::aarch64_neon_st4: {
6503 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6504 unsigned Opc;
6505 if (Ty == LLT::fixed_vector(8, S8))
6506 Opc = AArch64::ST4Fourv8b;
6507 else if (Ty == LLT::fixed_vector(16, S8))
6508 Opc = AArch64::ST4Fourv16b;
6509 else if (Ty == LLT::fixed_vector(4, S16))
6510 Opc = AArch64::ST4Fourv4h;
6511 else if (Ty == LLT::fixed_vector(8, S16))
6512 Opc = AArch64::ST4Fourv8h;
6513 else if (Ty == LLT::fixed_vector(2, S32))
6514 Opc = AArch64::ST4Fourv2s;
6515 else if (Ty == LLT::fixed_vector(4, S32))
6516 Opc = AArch64::ST4Fourv4s;
6517 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6518 Opc = AArch64::ST4Fourv2d;
6519 else if (Ty == S64 || Ty == P0)
6520 Opc = AArch64::ST1Fourv1d;
6521 else
6522 llvm_unreachable("Unexpected type for st4!");
6523 selectVectorStoreIntrinsic(I, 4, Opc);
6524 break;
6525 }
6526 case Intrinsic::aarch64_neon_st2lane: {
6527 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6528 unsigned Opc;
6529 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6530 Opc = AArch64::ST2i8;
6531 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6532 Opc = AArch64::ST2i16;
6533 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6534 Opc = AArch64::ST2i32;
6535 else if (Ty == LLT::fixed_vector(2, S64) ||
6536 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6537 Opc = AArch64::ST2i64;
6538 else
6539 llvm_unreachable("Unexpected type for st2lane!");
6540 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6541 return false;
6542 break;
6543 }
6544 case Intrinsic::aarch64_neon_st3lane: {
6545 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6546 unsigned Opc;
6547 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6548 Opc = AArch64::ST3i8;
6549 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6550 Opc = AArch64::ST3i16;
6551 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6552 Opc = AArch64::ST3i32;
6553 else if (Ty == LLT::fixed_vector(2, S64) ||
6554 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6555 Opc = AArch64::ST3i64;
6556 else
6557 llvm_unreachable("Unexpected type for st3lane!");
6558 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6559 return false;
6560 break;
6561 }
6562 case Intrinsic::aarch64_neon_st4lane: {
6563 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6564 unsigned Opc;
6565 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6566 Opc = AArch64::ST4i8;
6567 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6568 Opc = AArch64::ST4i16;
6569 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6570 Opc = AArch64::ST4i32;
6571 else if (Ty == LLT::fixed_vector(2, S64) ||
6572 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6573 Opc = AArch64::ST4i64;
6574 else
6575 llvm_unreachable("Unexpected type for st4lane!");
6576 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6577 return false;
6578 break;
6579 }
6580 case Intrinsic::aarch64_mops_memset_tag: {
6581 // Transform
6582 // %dst:gpr(p0) = \
6583 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6584 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6585 // where %dst is updated, into
6586 // %Rd:GPR64common, %Rn:GPR64) = \
6587 // MOPSMemorySetTaggingPseudo \
6588 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6589 // where Rd and Rn are tied.
6590 // It is expected that %val has been extended to s64 in legalization.
6591 // Note that the order of the size/value operands are swapped.
6592
6593 Register DstDef = I.getOperand(0).getReg();
6594 // I.getOperand(1) is the intrinsic function
6595 Register DstUse = I.getOperand(2).getReg();
6596 Register ValUse = I.getOperand(3).getReg();
6597 Register SizeUse = I.getOperand(4).getReg();
6598
6599 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6600 // Therefore an additional virtual register is required for the updated size
6601 // operand. This value is not accessible via the semantics of the intrinsic.
6603
6604 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6605 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6606 Memset.cloneMemRefs(I);
6608 break;
6609 }
6610 case Intrinsic::ptrauth_resign_load_relative: {
6611 Register DstReg = I.getOperand(0).getReg();
6612 Register ValReg = I.getOperand(2).getReg();
6613 uint64_t AUTKey = I.getOperand(3).getImm();
6614 Register AUTDisc = I.getOperand(4).getReg();
6615 uint64_t PACKey = I.getOperand(5).getImm();
6616 Register PACDisc = I.getOperand(6).getReg();
6617 int64_t Addend = I.getOperand(7).getImm();
6618
6619 Register AUTAddrDisc = AUTDisc;
6620 uint16_t AUTConstDiscC = 0;
6621 std::tie(AUTConstDiscC, AUTAddrDisc) =
6623
6624 Register PACAddrDisc = PACDisc;
6625 uint16_t PACConstDiscC = 0;
6626 std::tie(PACConstDiscC, PACAddrDisc) =
6628
6629 MIB.buildCopy({AArch64::X16}, {ValReg});
6630
6631 MIB.buildInstr(AArch64::AUTRELLOADPAC)
6632 .addImm(AUTKey)
6633 .addImm(AUTConstDiscC)
6634 .addUse(AUTAddrDisc)
6635 .addImm(PACKey)
6636 .addImm(PACConstDiscC)
6637 .addUse(PACAddrDisc)
6638 .addImm(Addend)
6639 .constrainAllUses(TII, TRI, RBI);
6640 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6641
6642 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6643 I.eraseFromParent();
6644 return true;
6645 }
6646 }
6647
6648 I.eraseFromParent();
6649 return true;
6650}
6651
6652bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6653 MachineRegisterInfo &MRI) {
6654 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6655
6656 switch (IntrinID) {
6657 default:
6658 break;
6659 case Intrinsic::ptrauth_resign: {
6660 Register DstReg = I.getOperand(0).getReg();
6661 Register ValReg = I.getOperand(2).getReg();
6662 uint64_t AUTKey = I.getOperand(3).getImm();
6663 Register AUTDisc = I.getOperand(4).getReg();
6664 uint64_t PACKey = I.getOperand(5).getImm();
6665 Register PACDisc = I.getOperand(6).getReg();
6666
6667 Register AUTAddrDisc = AUTDisc;
6668 uint16_t AUTConstDiscC = 0;
6669 std::tie(AUTConstDiscC, AUTAddrDisc) =
6671
6672 Register PACAddrDisc = PACDisc;
6673 uint16_t PACConstDiscC = 0;
6674 std::tie(PACConstDiscC, PACAddrDisc) =
6676
6677 MIB.buildCopy({AArch64::X16}, {ValReg});
6678 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6679 MIB.buildInstr(AArch64::AUTPAC)
6680 .addImm(AUTKey)
6681 .addImm(AUTConstDiscC)
6682 .addUse(AUTAddrDisc)
6683 .addImm(PACKey)
6684 .addImm(PACConstDiscC)
6685 .addUse(PACAddrDisc)
6686 .constrainAllUses(TII, TRI, RBI);
6687 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6688
6689 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6690 I.eraseFromParent();
6691 return true;
6692 }
6693 case Intrinsic::ptrauth_auth: {
6694 Register DstReg = I.getOperand(0).getReg();
6695 Register ValReg = I.getOperand(2).getReg();
6696 uint64_t AUTKey = I.getOperand(3).getImm();
6697 Register AUTDisc = I.getOperand(4).getReg();
6698
6699 Register AUTAddrDisc = AUTDisc;
6700 uint16_t AUTConstDiscC = 0;
6701 std::tie(AUTConstDiscC, AUTAddrDisc) =
6703
6704 if (STI.isX16X17Safer()) {
6705 MIB.buildCopy({AArch64::X16}, {ValReg});
6706 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6707 MIB.buildInstr(AArch64::AUTx16x17)
6708 .addImm(AUTKey)
6709 .addImm(AUTConstDiscC)
6710 .addUse(AUTAddrDisc)
6711 .constrainAllUses(TII, TRI, RBI);
6712 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6713 } else {
6714 Register ScratchReg =
6715 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6716 MIB.buildInstr(AArch64::AUTxMxN)
6717 .addDef(DstReg)
6718 .addDef(ScratchReg)
6719 .addUse(ValReg)
6720 .addImm(AUTKey)
6721 .addImm(AUTConstDiscC)
6722 .addUse(AUTAddrDisc)
6723 .constrainAllUses(TII, TRI, RBI);
6724 }
6725
6726 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6727 I.eraseFromParent();
6728 return true;
6729 }
6730 case Intrinsic::frameaddress:
6731 case Intrinsic::returnaddress: {
6732 MachineFunction &MF = *I.getParent()->getParent();
6733 MachineFrameInfo &MFI = MF.getFrameInfo();
6734
6735 unsigned Depth = I.getOperand(2).getImm();
6736 Register DstReg = I.getOperand(0).getReg();
6737 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6738
6739 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6740 if (!MFReturnAddr) {
6741 // Insert the copy from LR/X30 into the entry block, before it can be
6742 // clobbered by anything.
6743 MFI.setReturnAddressIsTaken(true);
6744 MFReturnAddr = getFunctionLiveInPhysReg(
6745 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6746 }
6747
6748 if (STI.hasPAuth()) {
6749 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6750 } else {
6751 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6752 MIB.buildInstr(AArch64::XPACLRI);
6753 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6754 }
6755
6756 I.eraseFromParent();
6757 return true;
6758 }
6759
6760 MFI.setFrameAddressIsTaken(true);
6761 Register FrameAddr(AArch64::FP);
6762 while (Depth--) {
6763 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6764 auto Ldr =
6765 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6767 FrameAddr = NextFrame;
6768 }
6769
6770 if (IntrinID == Intrinsic::frameaddress)
6771 MIB.buildCopy({DstReg}, {FrameAddr});
6772 else {
6773 MFI.setReturnAddressIsTaken(true);
6774
6775 if (STI.hasPAuth()) {
6776 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6777 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6778 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6779 } else {
6780 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6781 .addImm(1);
6782 MIB.buildInstr(AArch64::XPACLRI);
6783 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6784 }
6785 }
6786
6787 I.eraseFromParent();
6788 return true;
6789 }
6790 case Intrinsic::aarch64_neon_tbl2:
6791 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6792 return true;
6793 case Intrinsic::aarch64_neon_tbl3:
6794 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6795 false);
6796 return true;
6797 case Intrinsic::aarch64_neon_tbl4:
6798 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6799 return true;
6800 case Intrinsic::aarch64_neon_tbx2:
6801 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6802 return true;
6803 case Intrinsic::aarch64_neon_tbx3:
6804 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6805 return true;
6806 case Intrinsic::aarch64_neon_tbx4:
6807 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6808 return true;
6809 case Intrinsic::swift_async_context_addr:
6810 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6811 {Register(AArch64::FP)})
6812 .addImm(8)
6813 .addImm(0);
6815
6817 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6818 I.eraseFromParent();
6819 return true;
6820 }
6821 return false;
6822}
6823
6824// G_PTRAUTH_GLOBAL_VALUE lowering
6825//
6826// We have 3 lowering alternatives to choose from:
6827// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6828// If the GV doesn't need a GOT load (i.e., is locally defined)
6829// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6830//
6831// - LOADgotPAC: similar to LOADgot, with added PAC.
6832// If the GV needs a GOT load, materialize the pointer using the usual
6833// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6834// section is assumed to be read-only (for example, via relro mechanism). See
6835// LowerMOVaddrPAC.
6836//
6837// - LOADauthptrstatic: similar to LOADgot, but use a
6838// special stub slot instead of a GOT slot.
6839// Load a signed pointer for symbol 'sym' from a stub slot named
6840// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6841// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6842// .data with an
6843// @AUTH relocation. See LowerLOADauthptrstatic.
6844//
6845// All 3 are pseudos that are expand late to longer sequences: this lets us
6846// provide integrity guarantees on the to-be-signed intermediate values.
6847//
6848// LOADauthptrstatic is undesirable because it requires a large section filled
6849// with often similarly-signed pointers, making it a good harvesting target.
6850// Thus, it's only used for ptrauth references to extern_weak to avoid null
6851// checks.
6852
6853bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6854 MachineInstr &I, MachineRegisterInfo &MRI) const {
6855 Register DefReg = I.getOperand(0).getReg();
6856 Register Addr = I.getOperand(1).getReg();
6857 uint64_t Key = I.getOperand(2).getImm();
6858 Register AddrDisc = I.getOperand(3).getReg();
6859 uint64_t Disc = I.getOperand(4).getImm();
6860 int64_t Offset = 0;
6861
6863 report_fatal_error("key in ptrauth global out of range [0, " +
6864 Twine((int)AArch64PACKey::LAST) + "]");
6865
6866 // Blend only works if the integer discriminator is 16-bit wide.
6867 if (!isUInt<16>(Disc))
6869 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6870
6871 // Choosing between 3 lowering alternatives is target-specific.
6872 if (!STI.isTargetELF() && !STI.isTargetMachO())
6873 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6874
6875 if (!MRI.hasOneDef(Addr))
6876 return false;
6877
6878 // First match any offset we take from the real global.
6879 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6880 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6881 Register OffsetReg = DefMI->getOperand(2).getReg();
6882 if (!MRI.hasOneDef(OffsetReg))
6883 return false;
6884 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6885 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6886 return false;
6887
6888 Addr = DefMI->getOperand(1).getReg();
6889 if (!MRI.hasOneDef(Addr))
6890 return false;
6891
6892 DefMI = &*MRI.def_instr_begin(Addr);
6893 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6894 }
6895
6896 // We should be left with a genuine unauthenticated GlobalValue.
6897 const GlobalValue *GV;
6898 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6899 GV = DefMI->getOperand(1).getGlobal();
6901 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6902 GV = DefMI->getOperand(2).getGlobal();
6904 } else {
6905 return false;
6906 }
6907
6908 MachineIRBuilder MIB(I);
6909
6910 // Classify the reference to determine whether it needs a GOT load.
6911 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6912 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6913 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6914 "unsupported non-GOT op flags on ptrauth global reference");
6915 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6916 "unsupported non-GOT reference to weak ptrauth global");
6917
6918 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6919 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6920
6921 // Non-extern_weak:
6922 // - No GOT load needed -> MOVaddrPAC
6923 // - GOT load for non-extern_weak -> LOADgotPAC
6924 // Note that we disallow extern_weak refs to avoid null checks later.
6925 if (!GV->hasExternalWeakLinkage()) {
6926 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6927 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6928 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6930 .addImm(Key)
6931 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6932 .addImm(Disc)
6933 .constrainAllUses(TII, TRI, RBI);
6934 MIB.buildCopy(DefReg, Register(AArch64::X16));
6935 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6936 I.eraseFromParent();
6937 return true;
6938 }
6939
6940 // extern_weak -> LOADauthptrstatic
6941
6942 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6943 // offset alone as a pointer if the symbol wasn't available, which would
6944 // probably break null checks in users. Ptrauth complicates things further:
6945 // error out.
6946 if (Offset != 0)
6948 "unsupported non-zero offset in weak ptrauth global reference");
6949
6950 if (HasAddrDisc)
6951 report_fatal_error("unsupported weak addr-div ptrauth global");
6952
6953 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6954 .addGlobalAddress(GV, Offset)
6955 .addImm(Key)
6956 .addImm(Disc);
6957 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6958
6959 I.eraseFromParent();
6960 return true;
6961}
6962
6963void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6964 MachineRegisterInfo &MRI,
6965 unsigned NumVec, unsigned Opc1,
6966 unsigned Opc2, bool isExt) {
6967 Register DstReg = I.getOperand(0).getReg();
6968 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6969
6970 // Create the REG_SEQUENCE
6972 for (unsigned i = 0; i < NumVec; i++)
6973 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6974 Register RegSeq = createQTuple(Regs, MIB);
6975
6976 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6977 MachineInstrBuilder Instr;
6978 if (isExt) {
6979 Register Reg = I.getOperand(2).getReg();
6980 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6981 } else
6982 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6984 I.eraseFromParent();
6985}
6986
6987InstructionSelector::ComplexRendererFns
6988AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6989 auto MaybeImmed = getImmedFromMO(Root);
6990 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6991 return std::nullopt;
6992 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6993 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6994}
6995
6996InstructionSelector::ComplexRendererFns
6997AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6998 auto MaybeImmed = getImmedFromMO(Root);
6999 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7000 return std::nullopt;
7001 uint64_t Enc = 31 - *MaybeImmed;
7002 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7003}
7004
7005InstructionSelector::ComplexRendererFns
7006AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7007 auto MaybeImmed = getImmedFromMO(Root);
7008 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7009 return std::nullopt;
7010 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7011 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7012}
7013
7014InstructionSelector::ComplexRendererFns
7015AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7016 auto MaybeImmed = getImmedFromMO(Root);
7017 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7018 return std::nullopt;
7019 uint64_t Enc = 63 - *MaybeImmed;
7020 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7021}
7022
7023/// Helper to select an immediate value that can be represented as a 12-bit
7024/// value shifted left by either 0 or 12. If it is possible to do so, return
7025/// the immediate and shift value. If not, return std::nullopt.
7026///
7027/// Used by selectArithImmed and selectNegArithImmed.
7028InstructionSelector::ComplexRendererFns
7029AArch64InstructionSelector::select12BitValueWithLeftShift(
7030 uint64_t Immed) const {
7031 unsigned ShiftAmt;
7032 if (Immed >> 12 == 0) {
7033 ShiftAmt = 0;
7034 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7035 ShiftAmt = 12;
7036 Immed = Immed >> 12;
7037 } else
7038 return std::nullopt;
7039
7040 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7041 return {{
7042 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7043 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7044 }};
7045}
7046
7047/// SelectArithImmed - Select an immediate value that can be represented as
7048/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7049/// Val set to the 12-bit value and Shift set to the shifter operand.
7050InstructionSelector::ComplexRendererFns
7051AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7052 // This function is called from the addsub_shifted_imm ComplexPattern,
7053 // which lists [imm] as the list of opcode it's interested in, however
7054 // we still need to check whether the operand is actually an immediate
7055 // here because the ComplexPattern opcode list is only used in
7056 // root-level opcode matching.
7057 auto MaybeImmed = getImmedFromMO(Root);
7058 if (MaybeImmed == std::nullopt)
7059 return std::nullopt;
7060 return select12BitValueWithLeftShift(*MaybeImmed);
7061}
7062
7063/// SelectNegArithImmed - As above, but negates the value before trying to
7064/// select it.
7065InstructionSelector::ComplexRendererFns
7066AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7067 // We need a register here, because we need to know if we have a 64 or 32
7068 // bit immediate.
7069 if (!Root.isReg())
7070 return std::nullopt;
7071 auto MaybeImmed = getImmedFromMO(Root);
7072 if (MaybeImmed == std::nullopt)
7073 return std::nullopt;
7074 uint64_t Immed = *MaybeImmed;
7075
7076 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7077 // have the opposite effect on the C flag, so this pattern mustn't match under
7078 // those circumstances.
7079 if (Immed == 0)
7080 return std::nullopt;
7081
7082 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7083 // the root.
7084 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7085 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7086 Immed = ~((uint32_t)Immed) + 1;
7087 else
7088 Immed = ~Immed + 1ULL;
7089
7090 if (Immed & 0xFFFFFFFFFF000000ULL)
7091 return std::nullopt;
7092
7093 Immed &= 0xFFFFFFULL;
7094 return select12BitValueWithLeftShift(Immed);
7095}
7096
7097/// Checks if we are sure that folding MI into load/store addressing mode is
7098/// beneficial or not.
7099///
7100/// Returns:
7101/// - true if folding MI would be beneficial.
7102/// - false if folding MI would be bad.
7103/// - std::nullopt if it is not sure whether folding MI is beneficial.
7104///
7105/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7106///
7107/// %13:gpr(s64) = G_CONSTANT i64 1
7108/// %8:gpr(s64) = G_SHL %6, %13(s64)
7109/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7110/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7111std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7112 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7113 if (MI.getOpcode() == AArch64::G_SHL) {
7114 // Address operands with shifts are free, except for running on subtargets
7115 // with AddrLSLSlow14.
7116 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7117 MI.getOperand(2).getReg(), MRI)) {
7118 const APInt ShiftVal = ValAndVeg->Value;
7119
7120 // Don't fold if we know this will be slow.
7121 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7122 }
7123 }
7124 return std::nullopt;
7125}
7126
7127/// Return true if it is worth folding MI into an extended register. That is,
7128/// if it's safe to pull it into the addressing mode of a load or store as a
7129/// shift.
7130/// \p IsAddrOperand whether the def of MI is used as an address operand
7131/// (e.g. feeding into an LDR/STR).
7132bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7133 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7134 bool IsAddrOperand) const {
7135
7136 // Always fold if there is one use, or if we're optimizing for size.
7137 Register DefReg = MI.getOperand(0).getReg();
7138 if (MRI.hasOneNonDBGUse(DefReg) ||
7139 MI.getParent()->getParent()->getFunction().hasOptSize())
7140 return true;
7141
7142 if (IsAddrOperand) {
7143 // If we are already sure that folding MI is good or bad, return the result.
7144 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7145 return *Worth;
7146
7147 // Fold G_PTR_ADD if its offset operand can be folded
7148 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7149 MachineInstr *OffsetInst =
7150 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7151
7152 // Note, we already know G_PTR_ADD is used by at least two instructions.
7153 // If we are also sure about whether folding is beneficial or not,
7154 // return the result.
7155 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7156 return *Worth;
7157 }
7158 }
7159
7160 // FIXME: Consider checking HasALULSLFast as appropriate.
7161
7162 // We have a fastpath, so folding a shift in and potentially computing it
7163 // many times may be beneficial. Check if this is only used in memory ops.
7164 // If it is, then we should fold.
7165 return all_of(MRI.use_nodbg_instructions(DefReg),
7166 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7167}
7168
7169InstructionSelector::ComplexRendererFns
7170AArch64InstructionSelector::selectExtendedSHL(
7171 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7172 unsigned SizeInBytes, bool WantsExt) const {
7173 assert(Base.isReg() && "Expected base to be a register operand");
7174 assert(Offset.isReg() && "Expected offset to be a register operand");
7175
7176 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7177 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7178
7179 unsigned OffsetOpc = OffsetInst->getOpcode();
7180 bool LookedThroughZExt = false;
7181 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7182 // Try to look through a ZEXT.
7183 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7184 return std::nullopt;
7185
7186 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7187 OffsetOpc = OffsetInst->getOpcode();
7188 LookedThroughZExt = true;
7189
7190 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7191 return std::nullopt;
7192 }
7193 // Make sure that the memory op is a valid size.
7194 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7195 if (LegalShiftVal == 0)
7196 return std::nullopt;
7197 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7198 return std::nullopt;
7199
7200 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7201 // register we will offset is the LHS, and the register containing the
7202 // constant is the RHS.
7203 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7204 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7205 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7206 if (!ValAndVReg) {
7207 // We didn't get a constant on the RHS. If the opcode is a shift, then
7208 // we're done.
7209 if (OffsetOpc == TargetOpcode::G_SHL)
7210 return std::nullopt;
7211
7212 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7213 std::swap(OffsetReg, ConstantReg);
7214 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7215 if (!ValAndVReg)
7216 return std::nullopt;
7217 }
7218
7219 // The value must fit into 3 bits, and must be positive. Make sure that is
7220 // true.
7221 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7222
7223 // Since we're going to pull this into a shift, the constant value must be
7224 // a power of 2. If we got a multiply, then we need to check this.
7225 if (OffsetOpc == TargetOpcode::G_MUL) {
7226 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7227 return std::nullopt;
7228
7229 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7230 ImmVal = Log2_32(ImmVal);
7231 }
7232
7233 if ((ImmVal & 0x7) != ImmVal)
7234 return std::nullopt;
7235
7236 // We are only allowed to shift by LegalShiftVal. This shift value is built
7237 // into the instruction, so we can't just use whatever we want.
7238 if (ImmVal != LegalShiftVal)
7239 return std::nullopt;
7240
7241 unsigned SignExtend = 0;
7242 if (WantsExt) {
7243 // Check if the offset is defined by an extend, unless we looked through a
7244 // G_ZEXT earlier.
7245 if (!LookedThroughZExt) {
7246 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7247 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7249 return std::nullopt;
7250
7251 SignExtend = AArch64_AM::isSignExtendShiftType(Ext) ? 1 : 0;
7252 // We only support SXTW for signed extension here.
7253 if (SignExtend && Ext != AArch64_AM::SXTW)
7254 return std::nullopt;
7255 OffsetReg = ExtInst->getOperand(1).getReg();
7256 }
7257
7258 // Need a 32-bit wide register here.
7259 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7260 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7261 }
7262
7263 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7264 // offset. Signify that we are shifting by setting the shift flag to 1.
7265 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7266 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7267 [=](MachineInstrBuilder &MIB) {
7268 // Need to add both immediates here to make sure that they are both
7269 // added to the instruction.
7270 MIB.addImm(SignExtend);
7271 MIB.addImm(1);
7272 }}};
7273}
7274
7275/// This is used for computing addresses like this:
7276///
7277/// ldr x1, [x2, x3, lsl #3]
7278///
7279/// Where x2 is the base register, and x3 is an offset register. The shift-left
7280/// is a constant value specific to this load instruction. That is, we'll never
7281/// see anything other than a 3 here (which corresponds to the size of the
7282/// element being loaded.)
7283InstructionSelector::ComplexRendererFns
7284AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7285 MachineOperand &Root, unsigned SizeInBytes) const {
7286 if (!Root.isReg())
7287 return std::nullopt;
7288 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7289
7290 // We want to find something like this:
7291 //
7292 // val = G_CONSTANT LegalShiftVal
7293 // shift = G_SHL off_reg val
7294 // ptr = G_PTR_ADD base_reg shift
7295 // x = G_LOAD ptr
7296 //
7297 // And fold it into this addressing mode:
7298 //
7299 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7300
7301 // Check if we can find the G_PTR_ADD.
7302 MachineInstr *PtrAdd =
7303 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7304 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7305 return std::nullopt;
7306
7307 // Now, try to match an opcode which will match our specific offset.
7308 // We want a G_SHL or a G_MUL.
7309 MachineInstr *OffsetInst =
7310 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
7311 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7312 OffsetInst->getOperand(0), SizeInBytes,
7313 /*WantsExt=*/false);
7314}
7315
7316/// This is used for computing addresses like this:
7317///
7318/// ldr x1, [x2, x3]
7319///
7320/// Where x2 is the base register, and x3 is an offset register.
7321///
7322/// When possible (or profitable) to fold a G_PTR_ADD into the address
7323/// calculation, this will do so. Otherwise, it will return std::nullopt.
7324InstructionSelector::ComplexRendererFns
7325AArch64InstructionSelector::selectAddrModeRegisterOffset(
7326 MachineOperand &Root) const {
7327 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7328
7329 // We need a GEP.
7330 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7331 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7332 return std::nullopt;
7333
7334 // If this is used more than once, let's not bother folding.
7335 // TODO: Check if they are memory ops. If they are, then we can still fold
7336 // without having to recompute anything.
7337 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7338 return std::nullopt;
7339
7340 // Base is the GEP's LHS, offset is its RHS.
7341 return {{[=](MachineInstrBuilder &MIB) {
7342 MIB.addUse(Gep->getOperand(1).getReg());
7343 },
7344 [=](MachineInstrBuilder &MIB) {
7345 MIB.addUse(Gep->getOperand(2).getReg());
7346 },
7347 [=](MachineInstrBuilder &MIB) {
7348 // Need to add both immediates here to make sure that they are both
7349 // added to the instruction.
7350 MIB.addImm(0);
7351 MIB.addImm(0);
7352 }}};
7353}
7354
7355/// This is intended to be equivalent to selectAddrModeXRO in
7356/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7357InstructionSelector::ComplexRendererFns
7358AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7359 unsigned SizeInBytes) const {
7360 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7361 if (!Root.isReg())
7362 return std::nullopt;
7363 MachineInstr *PtrAdd =
7364 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7365 if (!PtrAdd)
7366 return std::nullopt;
7367
7368 // Check for an immediates which cannot be encoded in the [base + imm]
7369 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7370 // end up with code like:
7371 //
7372 // mov x0, wide
7373 // add x1 base, x0
7374 // ldr x2, [x1, x0]
7375 //
7376 // In this situation, we can use the [base, xreg] addressing mode to save an
7377 // add/sub:
7378 //
7379 // mov x0, wide
7380 // ldr x2, [base, x0]
7381 auto ValAndVReg =
7383 if (ValAndVReg) {
7384 unsigned Scale = Log2_32(SizeInBytes);
7385 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7386
7387 // Skip immediates that can be selected in the load/store addressing
7388 // mode.
7389 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7390 ImmOff < (0x1000 << Scale))
7391 return std::nullopt;
7392
7393 // Helper lambda to decide whether or not it is preferable to emit an add.
7394 auto isPreferredADD = [](int64_t ImmOff) {
7395 // Constants in [0x0, 0xfff] can be encoded in an add.
7396 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7397 return true;
7398
7399 // Can it be encoded in an add lsl #12?
7400 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7401 return false;
7402
7403 // It can be encoded in an add lsl #12, but we may not want to. If it is
7404 // possible to select this as a single movz, then prefer that. A single
7405 // movz is faster than an add with a shift.
7406 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7407 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7408 };
7409
7410 // If the immediate can be encoded in a single add/sub, then bail out.
7411 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7412 return std::nullopt;
7413 }
7414
7415 // Try to fold shifts into the addressing mode.
7416 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7417 if (AddrModeFns)
7418 return AddrModeFns;
7419
7420 // If that doesn't work, see if it's possible to fold in registers from
7421 // a GEP.
7422 return selectAddrModeRegisterOffset(Root);
7423}
7424
7425/// This is used for computing addresses like this:
7426///
7427/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7428///
7429/// Where we have a 64-bit base register, a 32-bit offset register, and an
7430/// extend (which may or may not be signed).
7431InstructionSelector::ComplexRendererFns
7432AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7433 unsigned SizeInBytes) const {
7434 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7435
7436 MachineInstr *PtrAdd =
7437 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7438 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7439 return std::nullopt;
7440
7441 MachineOperand &LHS = PtrAdd->getOperand(1);
7442 MachineOperand &RHS = PtrAdd->getOperand(2);
7443 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7444
7445 // The first case is the same as selectAddrModeXRO, except we need an extend.
7446 // In this case, we try to find a shift and extend, and fold them into the
7447 // addressing mode.
7448 //
7449 // E.g.
7450 //
7451 // off_reg = G_Z/S/ANYEXT ext_reg
7452 // val = G_CONSTANT LegalShiftVal
7453 // shift = G_SHL off_reg val
7454 // ptr = G_PTR_ADD base_reg shift
7455 // x = G_LOAD ptr
7456 //
7457 // In this case we can get a load like this:
7458 //
7459 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7460 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7461 SizeInBytes, /*WantsExt=*/true);
7462 if (ExtendedShl)
7463 return ExtendedShl;
7464
7465 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7466 //
7467 // e.g.
7468 // ldr something, [base_reg, ext_reg, sxtw]
7469 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7470 return std::nullopt;
7471
7472 // Check if this is an extend. We'll get an extend type if it is.
7474 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7476 return std::nullopt;
7477
7478 // Need a 32-bit wide register.
7479 MachineIRBuilder MIB(*PtrAdd);
7480 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7481 AArch64::GPR32RegClass, MIB);
7482 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7483
7484 // Base is LHS, offset is ExtReg.
7485 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7486 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7487 [=](MachineInstrBuilder &MIB) {
7488 MIB.addImm(SignExtend);
7489 MIB.addImm(0);
7490 }}};
7491}
7492
7493/// Select a "register plus unscaled signed 9-bit immediate" address. This
7494/// should only match when there is an offset that is not valid for a scaled
7495/// immediate addressing mode. The "Size" argument is the size in bytes of the
7496/// memory reference, which is needed here to know what is valid for a scaled
7497/// immediate.
7498InstructionSelector::ComplexRendererFns
7499AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7500 unsigned Size) const {
7501 MachineRegisterInfo &MRI =
7502 Root.getParent()->getParent()->getParent()->getRegInfo();
7503
7504 if (!Root.isReg())
7505 return std::nullopt;
7506
7507 if (!isBaseWithConstantOffset(Root, MRI))
7508 return std::nullopt;
7509
7510 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7511
7512 MachineOperand &OffImm = RootDef->getOperand(2);
7513 if (!OffImm.isReg())
7514 return std::nullopt;
7515 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7516 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7517 return std::nullopt;
7518 int64_t RHSC;
7519 MachineOperand &RHSOp1 = RHS->getOperand(1);
7520 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7521 return std::nullopt;
7522 RHSC = RHSOp1.getCImm()->getSExtValue();
7523
7524 if (RHSC >= -256 && RHSC < 256) {
7525 MachineOperand &Base = RootDef->getOperand(1);
7526 return {{
7527 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7528 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7529 }};
7530 }
7531 return std::nullopt;
7532}
7533
7534InstructionSelector::ComplexRendererFns
7535AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7536 unsigned Size,
7537 MachineRegisterInfo &MRI) const {
7538 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7539 return std::nullopt;
7540 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7541 if (Adrp.getOpcode() != AArch64::ADRP)
7542 return std::nullopt;
7543
7544 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7545 auto Offset = Adrp.getOperand(1).getOffset();
7546 if (Offset % Size != 0)
7547 return std::nullopt;
7548
7549 auto GV = Adrp.getOperand(1).getGlobal();
7550 if (GV->isThreadLocal())
7551 return std::nullopt;
7552
7553 auto &MF = *RootDef.getParent()->getParent();
7554 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7555 return std::nullopt;
7556
7557 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7558 MachineIRBuilder MIRBuilder(RootDef);
7559 Register AdrpReg = Adrp.getOperand(0).getReg();
7560 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7561 [=](MachineInstrBuilder &MIB) {
7562 MIB.addGlobalAddress(GV, Offset,
7563 OpFlags | AArch64II::MO_PAGEOFF |
7565 }}};
7566}
7567
7568/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7569/// "Size" argument is the size in bytes of the memory reference, which
7570/// determines the scale.
7571InstructionSelector::ComplexRendererFns
7572AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7573 unsigned Size) const {
7574 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7575 MachineRegisterInfo &MRI = MF.getRegInfo();
7576
7577 if (!Root.isReg())
7578 return std::nullopt;
7579
7580 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7581 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7582 return {{
7583 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7584 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7585 }};
7586 }
7587
7589 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7590 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7591 // globals into the offset.
7592 MachineInstr *RootParent = Root.getParent();
7593 if (CM == CodeModel::Small &&
7594 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7595 STI.isTargetDarwin())) {
7596 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7597 if (OpFns)
7598 return OpFns;
7599 }
7600
7601 if (isBaseWithConstantOffset(Root, MRI)) {
7602 MachineOperand &LHS = RootDef->getOperand(1);
7603 MachineOperand &RHS = RootDef->getOperand(2);
7604 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7605 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7606
7607 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7608 unsigned Scale = Log2_32(Size);
7609 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7610 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7611 return {{
7612 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7613 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7614 }};
7615
7616 return {{
7617 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7618 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7619 }};
7620 }
7621 }
7622
7623 // Before falling back to our general case, check if the unscaled
7624 // instructions can handle this. If so, that's preferable.
7625 if (selectAddrModeUnscaled(Root, Size))
7626 return std::nullopt;
7627
7628 return {{
7629 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7630 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7631 }};
7632}
7633
7634/// Given a shift instruction, return the correct shift type for that
7635/// instruction.
7637 switch (MI.getOpcode()) {
7638 default:
7640 case TargetOpcode::G_SHL:
7641 return AArch64_AM::LSL;
7642 case TargetOpcode::G_LSHR:
7643 return AArch64_AM::LSR;
7644 case TargetOpcode::G_ASHR:
7645 return AArch64_AM::ASR;
7646 case TargetOpcode::G_ROTR:
7647 return AArch64_AM::ROR;
7648 }
7649}
7650
7651/// Select a "shifted register" operand. If the value is not shifted, set the
7652/// shift operand to a default value of "lsl 0".
7653InstructionSelector::ComplexRendererFns
7654AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7655 bool AllowROR) const {
7656 if (!Root.isReg())
7657 return std::nullopt;
7658 MachineRegisterInfo &MRI =
7659 Root.getParent()->getParent()->getParent()->getRegInfo();
7660
7661 // Check if the operand is defined by an instruction which corresponds to
7662 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7663 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7665 if (ShType == AArch64_AM::InvalidShiftExtend)
7666 return std::nullopt;
7667 if (ShType == AArch64_AM::ROR && !AllowROR)
7668 return std::nullopt;
7669 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7670 return std::nullopt;
7671
7672 // Need an immediate on the RHS.
7673 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7674 auto Immed = getImmedFromMO(ShiftRHS);
7675 if (!Immed)
7676 return std::nullopt;
7677
7678 // We have something that we can fold. Fold in the shift's LHS and RHS into
7679 // the instruction.
7680 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7681 Register ShiftReg = ShiftLHS.getReg();
7682
7683 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7684 unsigned Val = *Immed & (NumBits - 1);
7685 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7686
7687 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7688 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7689}
7690
7691AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7692 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7693 unsigned Opc = MI.getOpcode();
7694
7695 // Handle explicit extend instructions first.
7696 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7697 unsigned Size;
7698 if (Opc == TargetOpcode::G_SEXT)
7699 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7700 else
7701 Size = MI.getOperand(2).getImm();
7702 assert(Size != 64 && "Extend from 64 bits?");
7703 switch (Size) {
7704 case 8:
7705 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7706 case 16:
7707 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7708 case 32:
7709 return AArch64_AM::SXTW;
7710 default:
7712 }
7713 }
7714
7715 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7716 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7717 assert(Size != 64 && "Extend from 64 bits?");
7718 switch (Size) {
7719 case 8:
7720 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7721 case 16:
7722 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7723 case 32:
7724 return AArch64_AM::UXTW;
7725 default:
7727 }
7728 }
7729
7730 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7731 // on the RHS.
7732 if (Opc != TargetOpcode::G_AND)
7734
7735 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7736 if (!MaybeAndMask)
7738 uint64_t AndMask = *MaybeAndMask;
7739 switch (AndMask) {
7740 default:
7742 case 0xFF:
7743 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7744 case 0xFFFF:
7745 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7746 case 0xFFFFFFFF:
7747 return AArch64_AM::UXTW;
7748 }
7749}
7750
7751Register AArch64InstructionSelector::moveScalarRegClass(
7752 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7753 MachineRegisterInfo &MRI = *MIB.getMRI();
7754 auto Ty = MRI.getType(Reg);
7755 assert(!Ty.isVector() && "Expected scalars only!");
7756 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7757 return Reg;
7758
7759 // Create a copy and immediately select it.
7760 // FIXME: We should have an emitCopy function?
7761 auto Copy = MIB.buildCopy({&RC}, {Reg});
7762 selectCopy(*Copy, TII, MRI, TRI, RBI);
7763 return Copy.getReg(0);
7764}
7765
7766/// Select an "extended register" operand. This operand folds in an extend
7767/// followed by an optional left shift.
7768InstructionSelector::ComplexRendererFns
7769AArch64InstructionSelector::selectArithExtendedRegister(
7770 MachineOperand &Root) const {
7771 if (!Root.isReg())
7772 return std::nullopt;
7773 MachineRegisterInfo &MRI =
7774 Root.getParent()->getParent()->getParent()->getRegInfo();
7775
7776 uint64_t ShiftVal = 0;
7777 Register ExtReg;
7779 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7780 if (!RootDef)
7781 return std::nullopt;
7782
7783 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7784 return std::nullopt;
7785
7786 // Check if we can fold a shift and an extend.
7787 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7788 // Look for a constant on the RHS of the shift.
7789 MachineOperand &RHS = RootDef->getOperand(2);
7790 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7791 if (!MaybeShiftVal)
7792 return std::nullopt;
7793 ShiftVal = *MaybeShiftVal;
7794 if (ShiftVal > 4)
7795 return std::nullopt;
7796 // Look for a valid extend instruction on the LHS of the shift.
7797 MachineOperand &LHS = RootDef->getOperand(1);
7798 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7799 if (!ExtDef)
7800 return std::nullopt;
7801 Ext = getExtendTypeForInst(*ExtDef, MRI);
7803 return std::nullopt;
7804 ExtReg = ExtDef->getOperand(1).getReg();
7805 } else {
7806 // Didn't get a shift. Try just folding an extend.
7807 Ext = getExtendTypeForInst(*RootDef, MRI);
7809 return std::nullopt;
7810 ExtReg = RootDef->getOperand(1).getReg();
7811
7812 // If we have a 32 bit instruction which zeroes out the high half of a
7813 // register, we get an implicit zero extend for free. Check if we have one.
7814 // FIXME: We actually emit the extend right now even though we don't have
7815 // to.
7816 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7817 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7818 if (isDef32(*ExtInst))
7819 return std::nullopt;
7820 }
7821 }
7822
7823 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7824 // copy.
7825 MachineIRBuilder MIB(*RootDef);
7826 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7827
7828 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7829 [=](MachineInstrBuilder &MIB) {
7830 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7831 }}};
7832}
7833
7834InstructionSelector::ComplexRendererFns
7835AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7836 if (!Root.isReg())
7837 return std::nullopt;
7838 MachineRegisterInfo &MRI =
7839 Root.getParent()->getParent()->getParent()->getRegInfo();
7840
7841 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7842 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7843 STI.isLittleEndian())
7844 Extract =
7845 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7846 if (!Extract)
7847 return std::nullopt;
7848
7849 if (auto *Unmerge = dyn_cast<GUnmerge>(Extract->MI)) {
7850 if (Unmerge->getNumDefs() == 2 &&
7851 Extract->Reg == Unmerge->getOperand(1).getReg()) {
7852 Register ExtReg = Unmerge->getSourceReg();
7853 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7854 }
7855 }
7856 if (auto *ExtElt = dyn_cast<GExtractVectorElement>(Extract->MI)) {
7857 LLT SrcTy = MRI.getType(ExtElt->getVectorReg());
7858 auto LaneIdx =
7859 getIConstantVRegValWithLookThrough(ExtElt->getIndexReg(), MRI);
7860 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7861 LaneIdx->Value.getSExtValue() == 1) {
7862 Register ExtReg = ExtElt->getVectorReg();
7863 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7864 }
7865 }
7866 if (auto *Subvec = dyn_cast<GExtractSubvector>(Extract->MI)) {
7867 LLT SrcTy = MRI.getType(Subvec->getSrcVec());
7868 auto LaneIdx = Subvec->getIndexImm();
7869 if (LaneIdx == SrcTy.getNumElements() / 2) {
7870 Register ExtReg = Subvec->getSrcVec();
7871 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7872 }
7873 }
7874
7875 return std::nullopt;
7876}
7877
7878InstructionSelector::ComplexRendererFns
7879AArch64InstructionSelector::selectCVTFixedPointVecBase(
7880 const MachineOperand &Root, bool isReciprocal) const {
7881 if (!Root.isReg())
7882 return std::nullopt;
7883 const MachineRegisterInfo &MRI =
7884 Root.getParent()->getParent()->getParent()->getRegInfo();
7885
7886 MachineInstr *Dup = getDefIgnoringCopies(Root.getReg(), MRI);
7887 if (Dup->getOpcode() != AArch64::G_DUP)
7888 return std::nullopt;
7889 std::optional<ValueAndVReg> CstVal =
7891 if (!CstVal)
7892 return std::nullopt;
7893
7894 unsigned RegWidth = MRI.getType(Root.getReg()).getScalarSizeInBits();
7895 APFloat FVal(0.0);
7896 switch (RegWidth) {
7897 case 16:
7898 FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
7899 break;
7900 case 32:
7901 FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
7902 break;
7903 case 64:
7904 FVal = APFloat(APFloat::IEEEdouble(), CstVal->Value);
7905 break;
7906 default:
7907 return std::nullopt;
7908 };
7909 if (unsigned FBits =
7910 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal))
7911 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(FBits); }}};
7912
7913 return std::nullopt;
7914}
7915
7916InstructionSelector::ComplexRendererFns
7917AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7918 return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ false);
7919}
7920
7921InstructionSelector::ComplexRendererFns
7922AArch64InstructionSelector::selectCVTFixedPosRecipOperandVec(
7923 MachineOperand &Root) const {
7924 return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ true);
7925}
7926
7927void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
7928 const MachineInstr &MI,
7929 int OpIdx) const {
7930 // FIXME: This is only needed to satisfy the type checking in tablegen, and
7931 // should be able to reuse the Renderers already calculated by
7932 // selectCVTFixedPointVecBase.
7933 InstructionSelector::ComplexRendererFns Renderer =
7934 selectCVTFixedPointVecBase(MI.getOperand(2), /*isReciprocal*/ false);
7935 assert((Renderer && Renderer->size() == 1) &&
7936 "Expected selectCVTFixedPointVec to provide a function\n");
7937 (Renderer->front())(MIB);
7938}
7939
7940void AArch64InstructionSelector::renderFixedPointRecipXForm(
7941 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7942 InstructionSelector::ComplexRendererFns Renderer =
7943 selectCVTFixedPointVecBase(MI.getOperand(2), /*isReciprocal*/ true);
7944 assert((Renderer && Renderer->size() == 1) &&
7945 "Expected selectCVTFixedPosRecipOperandVec to provide a function\n");
7946 (Renderer->front())(MIB);
7947}
7948
7949void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7950 const MachineInstr &MI,
7951 int OpIdx) const {
7952 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7953 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7954 "Expected G_CONSTANT");
7955 std::optional<int64_t> CstVal =
7956 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7957 assert(CstVal && "Expected constant value");
7958 MIB.addImm(*CstVal);
7959}
7960
7961void AArch64InstructionSelector::renderLogicalImm32(
7962 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7963 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7964 "Expected G_CONSTANT");
7965 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7966 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7967 MIB.addImm(Enc);
7968}
7969
7970void AArch64InstructionSelector::renderLogicalImm64(
7971 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7972 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7973 "Expected G_CONSTANT");
7974 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7975 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7976 MIB.addImm(Enc);
7977}
7978
7979void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7980 const MachineInstr &MI,
7981 int OpIdx) const {
7982 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7983 "Expected G_UBSANTRAP");
7984 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7985}
7986
7987void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7988 const MachineInstr &MI,
7989 int OpIdx) const {
7990 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7991 "Expected G_FCONSTANT");
7992 MIB.addImm(
7993 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7994}
7995
7996void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7997 const MachineInstr &MI,
7998 int OpIdx) const {
7999 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8000 "Expected G_FCONSTANT");
8001 MIB.addImm(
8002 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8003}
8004
8005void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
8006 const MachineInstr &MI,
8007 int OpIdx) const {
8008 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8009 "Expected G_FCONSTANT");
8010 MIB.addImm(
8011 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8012}
8013
8014void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
8015 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
8016 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8017 "Expected G_FCONSTANT");
8019 .getFPImm()
8020 ->getValueAPF()
8021 .bitcastToAPInt()
8022 .getZExtValue()));
8023}
8024
8025bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8026 const MachineInstr &MI, unsigned NumBytes) const {
8027 if (!MI.mayLoadOrStore())
8028 return false;
8029 assert(MI.hasOneMemOperand() &&
8030 "Expected load/store to have only one mem op!");
8031 return (*MI.memoperands_begin())->getSize() == NumBytes;
8032}
8033
8034bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8035 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8036 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
8037 return false;
8038
8039 // Only return true if we know the operation will zero-out the high half of
8040 // the 64-bit register. Truncates can be subregister copies, which don't
8041 // zero out the high bits. Copies and other copy-like instructions can be
8042 // fed by truncates, or could be lowered as subregister copies.
8043 switch (MI.getOpcode()) {
8044 default:
8045 return true;
8046 case TargetOpcode::COPY:
8047 case TargetOpcode::G_BITCAST:
8048 case TargetOpcode::G_TRUNC:
8049 case TargetOpcode::G_PHI:
8050 return false;
8051 }
8052}
8053
8054
8055// Perform fixups on the given PHI instruction's operands to force them all
8056// to be the same as the destination regbank.
8058 const AArch64RegisterBankInfo &RBI) {
8059 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8060 Register DstReg = MI.getOperand(0).getReg();
8061 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
8062 assert(DstRB && "Expected PHI dst to have regbank assigned");
8063 MachineIRBuilder MIB(MI);
8064
8065 // Go through each operand and ensure it has the same regbank.
8066 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
8067 if (!MO.isReg())
8068 continue;
8069 Register OpReg = MO.getReg();
8070 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
8071 if (RB != DstRB) {
8072 // Insert a cross-bank copy.
8073 auto *OpDef = MRI.getVRegDef(OpReg);
8074 const LLT &Ty = MRI.getType(OpReg);
8075 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8076
8077 // Any instruction we insert must appear after all PHIs in the block
8078 // for the block to be valid MIR.
8079 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8080 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8081 InsertPt = OpDefBB.getFirstNonPHI();
8082 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8083 auto Copy = MIB.buildCopy(Ty, OpReg);
8084 MRI.setRegBank(Copy.getReg(0), *DstRB);
8085 MO.setReg(Copy.getReg(0));
8086 }
8087 }
8088}
8089
8090void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8091 // We're looking for PHIs, build a list so we don't invalidate iterators.
8092 MachineRegisterInfo &MRI = MF.getRegInfo();
8094 for (auto &BB : MF) {
8095 for (auto &MI : BB) {
8096 if (MI.getOpcode() == TargetOpcode::G_PHI)
8097 Phis.emplace_back(&MI);
8098 }
8099 }
8100
8101 for (auto *MI : Phis) {
8102 // We need to do some work here if the operand types are < 16 bit and they
8103 // are split across fpr/gpr banks. Since all types <32b on gpr
8104 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8105 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8106 // be selecting heterogenous regbanks for operands if possible, but we
8107 // still need to be able to deal with it here.
8108 //
8109 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8110 // one other operand is on the fpr bank, then we add cross-bank copies
8111 // to homogenize the operand banks. For simplicity the bank that we choose
8112 // to settle on is whatever bank the def operand has. For example:
8113 //
8114 // %endbb:
8115 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8116 // =>
8117 // %bb2:
8118 // ...
8119 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8120 // ...
8121 // %endbb:
8122 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8123 bool HasGPROp = false, HasFPROp = false;
8124 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8125 if (!MO.isReg())
8126 continue;
8127 const LLT &Ty = MRI.getType(MO.getReg());
8128 if (!Ty.isValid() || !Ty.isScalar())
8129 break;
8130 if (Ty.getSizeInBits() >= 32)
8131 break;
8132 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8133 // If for some reason we don't have a regbank yet. Don't try anything.
8134 if (!RB)
8135 break;
8136
8137 if (RB->getID() == AArch64::GPRRegBankID)
8138 HasGPROp = true;
8139 else
8140 HasFPROp = true;
8141 }
8142 // We have heterogenous regbanks, need to fixup.
8143 if (HasGPROp && HasFPROp)
8144 fixupPHIOpBanks(*MI, MRI, RBI);
8145 }
8146}
8147
8148namespace llvm {
8149InstructionSelector *
8151 const AArch64Subtarget &Subtarget,
8152 const AArch64RegisterBankInfo &RBI) {
8153 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8154}
8155}
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:463
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:473
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:467
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:579
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
LLT getScalarType() const
constexpr bool isPointerVector() const
constexpr bool isInteger() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
void constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
def_instr_iterator def_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:858
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:57
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:156
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.