LLVM 20.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
354 AArch64CC::CondCode Predicate,
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
359 AArch64CC::CondCode Predicate,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508 MachineOperand &Predicate,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
626 }
627
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
630 default:
631 return nullptr;
632 case 8:
633 return &AArch64::FPR8RegClass;
634 case 16:
635 return &AArch64::FPR16RegClass;
636 case 32:
637 return &AArch64::FPR32RegClass;
638 case 64:
639 return &AArch64::FPR64RegClass;
640 case 128:
641 return &AArch64::FPR128RegClass;
642 }
643 }
644
645 return nullptr;
646}
647
648/// Returns the correct subregister to use for a given register class.
650 const TargetRegisterInfo &TRI, unsigned &SubReg) {
651 switch (TRI.getRegSizeInBits(*RC)) {
652 case 8:
653 SubReg = AArch64::bsub;
654 break;
655 case 16:
656 SubReg = AArch64::hsub;
657 break;
658 case 32:
659 if (RC != &AArch64::FPR32RegClass)
660 SubReg = AArch64::sub_32;
661 else
662 SubReg = AArch64::ssub;
663 break;
664 case 64:
665 SubReg = AArch64::dsub;
666 break;
667 default:
669 dbgs() << "Couldn't find appropriate subregister for register class.");
670 return false;
671 }
672
673 return true;
674}
675
676/// Returns the minimum size the given register bank can hold.
677static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
678 switch (RB.getID()) {
679 case AArch64::GPRRegBankID:
680 return 32;
681 case AArch64::FPRRegBankID:
682 return 8;
683 default:
684 llvm_unreachable("Tried to get minimum size for unknown register bank.");
685 }
686}
687
688/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
689/// Helper function for functions like createDTuple and createQTuple.
690///
691/// \p RegClassIDs - The list of register class IDs available for some tuple of
692/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
693/// expected to contain between 2 and 4 tuple classes.
694///
695/// \p SubRegs - The list of subregister classes associated with each register
696/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
697/// subregister class. The index of each subregister class is expected to
698/// correspond with the index of each register class.
699///
700/// \returns Either the destination register of REG_SEQUENCE instruction that
701/// was created, or the 0th element of \p Regs if \p Regs contains a single
702/// element.
704 const unsigned RegClassIDs[],
705 const unsigned SubRegs[], MachineIRBuilder &MIB) {
706 unsigned NumRegs = Regs.size();
707 if (NumRegs == 1)
708 return Regs[0];
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
713 auto RegSequence =
714 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
716 RegSequence.addUse(Regs[I]);
717 RegSequence.addImm(SubRegs[I]);
718 }
719 return RegSequence.getReg(0);
720}
721
722/// Create a tuple of D-registers using the registers in \p Regs.
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
729}
730
731/// Create a tuple of Q-registers using the registers in \p Regs.
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
738}
739
740static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
741 auto &MI = *Root.getParent();
742 auto &MBB = *MI.getParent();
743 auto &MF = *MBB.getParent();
744 auto &MRI = MF.getRegInfo();
745 uint64_t Immed;
746 if (Root.isImm())
747 Immed = Root.getImm();
748 else if (Root.isCImm())
749 Immed = Root.getCImm()->getZExtValue();
750 else if (Root.isReg()) {
751 auto ValAndVReg =
753 if (!ValAndVReg)
754 return std::nullopt;
755 Immed = ValAndVReg->Value.getSExtValue();
756 } else
757 return std::nullopt;
758 return Immed;
759}
760
761/// Check whether \p I is a currently unsupported binary operation:
762/// - it has an unsized type
763/// - an operand is not a vreg
764/// - all operands are not in the same bank
765/// These are checks that should someday live in the verifier, but right now,
766/// these are mostly limitations of the aarch64 selector.
767static bool unsupportedBinOp(const MachineInstr &I,
768 const AArch64RegisterBankInfo &RBI,
770 const AArch64RegisterInfo &TRI) {
771 LLT Ty = MRI.getType(I.getOperand(0).getReg());
772 if (!Ty.isValid()) {
773 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
774 return true;
775 }
776
777 const RegisterBank *PrevOpBank = nullptr;
778 for (auto &MO : I.operands()) {
779 // FIXME: Support non-register operands.
780 if (!MO.isReg()) {
781 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
782 return true;
783 }
784
785 // FIXME: Can generic operations have physical registers operands? If
786 // so, this will need to be taught about that, and we'll need to get the
787 // bank out of the minimal class for the register.
788 // Either way, this needs to be documented (and possibly verified).
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
791 return true;
792 }
793
794 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
795 if (!OpBank) {
796 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
797 return true;
798 }
799
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
802 return true;
803 }
804 PrevOpBank = OpBank;
805 }
806 return false;
807}
808
809/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
810/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
811/// and of size \p OpSize.
812/// \returns \p GenericOpc if the combination is unsupported.
813static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
814 unsigned OpSize) {
815 switch (RegBankID) {
816 case AArch64::GPRRegBankID:
817 if (OpSize == 32) {
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
825 default:
826 return GenericOpc;
827 }
828 } else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
838 default:
839 return GenericOpc;
840 }
841 }
842 break;
843 case AArch64::FPRRegBankID:
844 switch (OpSize) {
845 case 32:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
855 default:
856 return GenericOpc;
857 }
858 case 64:
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
870 default:
871 return GenericOpc;
872 }
873 }
874 break;
875 }
876 return GenericOpc;
877}
878
879/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
880/// appropriate for the (value) register bank \p RegBankID and of memory access
881/// size \p OpSize. This returns the variant with the base+unsigned-immediate
882/// addressing mode (e.g., LDRXui).
883/// \returns \p GenericOpc if the combination is unsupported.
884static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
885 unsigned OpSize) {
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
887 switch (RegBankID) {
888 case AArch64::GPRRegBankID:
889 switch (OpSize) {
890 case 8:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
892 case 16:
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
894 case 32:
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
896 case 64:
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
898 }
899 break;
900 case AArch64::FPRRegBankID:
901 switch (OpSize) {
902 case 8:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
904 case 16:
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
906 case 32:
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
908 case 64:
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
910 case 128:
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
912 }
913 break;
914 }
915 return GenericOpc;
916}
917
918/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
919/// to \p *To.
920///
921/// E.g "To = COPY SrcReg:SubReg"
923 const RegisterBankInfo &RBI, Register SrcReg,
924 const TargetRegisterClass *To, unsigned SubReg) {
925 assert(SrcReg.isValid() && "Expected a valid source register?");
926 assert(To && "Destination register class cannot be null");
927 assert(SubReg && "Expected a valid subregister");
928
929 MachineIRBuilder MIB(I);
930 auto SubRegCopy =
931 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
932 MachineOperand &RegOp = I.getOperand(1);
933 RegOp.setReg(SubRegCopy.getReg(0));
934
935 // It's possible that the destination register won't be constrained. Make
936 // sure that happens.
937 if (!I.getOperand(0).getReg().isPhysical())
938 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
939
940 return true;
941}
942
943/// Helper function to get the source and destination register classes for a
944/// copy. Returns a std::pair containing the source register class for the
945/// copy, and the destination register class for the copy. If a register class
946/// cannot be determined, then it will be nullptr.
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
950 const RegisterBankInfo &RBI) {
951 Register DstReg = I.getOperand(0).getReg();
952 Register SrcReg = I.getOperand(1).getReg();
953 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
954 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
955
956 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
957 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
958
959 // Special casing for cross-bank copies of s1s. We can technically represent
960 // a 1-bit value with any size of register. The minimum size for a GPR is 32
961 // bits. So, we need to put the FPR on 32 bits as well.
962 //
963 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
964 // then we can pull it into the helpers that get the appropriate class for a
965 // register bank. Or make a new helper that carries along some constraint
966 // information.
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
968 SrcSize = DstSize = TypeSize::getFixed(32);
969
970 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
971 getMinClassForRegBank(DstRegBank, DstSize, true)};
972}
973
974// FIXME: We need some sort of API in RBI/TRI to allow generic code to
975// constrain operands of simple instructions given a TargetRegisterClass
976// and LLT
978 const RegisterBankInfo &RBI) {
979 for (MachineOperand &MO : I.operands()) {
980 if (!MO.isReg())
981 continue;
982 Register Reg = MO.getReg();
983 if (!Reg)
984 continue;
985 if (Reg.isPhysical())
986 continue;
987 LLT Ty = MRI.getType(Reg);
988 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
989 const TargetRegisterClass *RC =
990 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
991 if (!RC) {
992 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
993 RC = getRegClassForTypeOnBank(Ty, RB);
994 if (!RC) {
996 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
997 break;
998 }
999 }
1000 RBI.constrainGenericRegister(Reg, *RC, MRI);
1001 }
1002
1003 return true;
1004}
1005
1008 const RegisterBankInfo &RBI) {
1009 Register DstReg = I.getOperand(0).getReg();
1010 Register SrcReg = I.getOperand(1).getReg();
1011 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1012 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1013
1014 // Find the correct register classes for the source and destination registers.
1015 const TargetRegisterClass *SrcRC;
1016 const TargetRegisterClass *DstRC;
1017 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1018
1019 if (!DstRC) {
1020 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1021 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1022 return false;
1023 }
1024
1025 // Is this a copy? If so, then we may need to insert a subregister copy.
1026 if (I.isCopy()) {
1027 // Yes. Check if there's anything to fix up.
1028 if (!SrcRC) {
1029 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1030 return false;
1031 }
1032
1033 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1035 unsigned SubReg;
1036
1037 // If the source bank doesn't support a subregister copy small enough,
1038 // then we first need to copy to the destination bank.
1039 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1040 const TargetRegisterClass *DstTempRC =
1041 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1042 getSubRegForClass(DstRC, TRI, SubReg);
1043
1044 MachineIRBuilder MIB(I);
1045 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1046 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1047 } else if (SrcSize > DstSize) {
1048 // If the source register is bigger than the destination we need to
1049 // perform a subregister copy.
1050 const TargetRegisterClass *SubRegRC =
1051 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1052 getSubRegForClass(SubRegRC, TRI, SubReg);
1053 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1054 } else if (DstSize > SrcSize) {
1055 // If the destination register is bigger than the source we need to do
1056 // a promotion using SUBREG_TO_REG.
1057 const TargetRegisterClass *PromotionRC =
1058 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1059 getSubRegForClass(SrcRC, TRI, SubReg);
1060
1061 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1062 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1064 .addImm(0)
1065 .addUse(SrcReg)
1066 .addImm(SubReg);
1067 MachineOperand &RegOp = I.getOperand(1);
1068 RegOp.setReg(PromoteReg);
1069 }
1070
1071 // If the destination is a physical register, then there's nothing to
1072 // change, so we're done.
1073 if (DstReg.isPhysical())
1074 return true;
1075 }
1076
1077 // No need to constrain SrcReg. It will get constrained when we hit another
1078 // of its use or its defs. Copies do not have constraints.
1079 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1080 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1081 << " operand\n");
1082 return false;
1083 }
1084
1085 // If this a GPR ZEXT that we want to just reduce down into a copy.
1086 // The sizes will be mismatched with the source < 32b but that's ok.
1087 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(TII.get(AArch64::COPY));
1089 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1090 return selectCopy(I, TII, MRI, TRI, RBI);
1091 }
1092
1093 I.setDesc(TII.get(AArch64::COPY));
1094 return true;
1095}
1096
1097static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1098 if (!DstTy.isScalar() || !SrcTy.isScalar())
1099 return GenericOpc;
1100
1101 const unsigned DstSize = DstTy.getSizeInBits();
1102 const unsigned SrcSize = SrcTy.getSizeInBits();
1103
1104 switch (DstSize) {
1105 case 32:
1106 switch (SrcSize) {
1107 case 32:
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1117 default:
1118 return GenericOpc;
1119 }
1120 case 64:
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1130 default:
1131 return GenericOpc;
1132 }
1133 default:
1134 return GenericOpc;
1135 }
1136 case 64:
1137 switch (SrcSize) {
1138 case 32:
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1148 default:
1149 return GenericOpc;
1150 }
1151 case 64:
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1161 default:
1162 return GenericOpc;
1163 }
1164 default:
1165 return GenericOpc;
1166 }
1167 default:
1168 return GenericOpc;
1169 };
1170 return GenericOpc;
1171}
1172
1174AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1176 MachineIRBuilder &MIB) const {
1177 MachineRegisterInfo &MRI = *MIB.getMRI();
1178 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1179 RBI.getRegBank(True, MRI, TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty = MRI.getType(True);
1182 if (Ty.isVector())
1183 return nullptr;
1184 const unsigned Size = Ty.getSizeInBits();
1185 assert((Size == 32 || Size == 64) &&
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit = Size == 32;
1188 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1192 return &*FCSel;
1193 }
1194
1195 // By default, we'll try and emit a CSEL.
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1197 bool Optimized = false;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1199 &Optimized](Register &Reg, Register &OtherReg,
1200 bool Invert) {
1201 if (Optimized)
1202 return false;
1203
1204 // Attempt to fold:
1205 //
1206 // %sub = G_SUB 0, %x
1207 // %select = G_SELECT cc, %reg, %sub
1208 //
1209 // Into:
1210 // %select = CSNEG %reg, %x, cc
1211 Register MatchReg;
1212 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214 Reg = MatchReg;
1215 if (Invert) {
1217 std::swap(Reg, OtherReg);
1218 }
1219 return true;
1220 }
1221
1222 // Attempt to fold:
1223 //
1224 // %xor = G_XOR %x, -1
1225 // %select = G_SELECT cc, %reg, %xor
1226 //
1227 // Into:
1228 // %select = CSINV %reg, %x, cc
1229 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231 Reg = MatchReg;
1232 if (Invert) {
1234 std::swap(Reg, OtherReg);
1235 }
1236 return true;
1237 }
1238
1239 // Attempt to fold:
1240 //
1241 // %add = G_ADD %x, 1
1242 // %select = G_SELECT cc, %reg, %add
1243 //
1244 // Into:
1245 // %select = CSINC %reg, %x, cc
1246 if (mi_match(Reg, MRI,
1247 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1248 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1250 Reg = MatchReg;
1251 if (Invert) {
1253 std::swap(Reg, OtherReg);
1254 }
1255 return true;
1256 }
1257
1258 return false;
1259 };
1260
1261 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1262 // true/false values are constants.
1263 // FIXME: All of these patterns already exist in tablegen. We should be
1264 // able to import these.
1265 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1266 &Optimized]() {
1267 if (Optimized)
1268 return false;
1269 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1270 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1271 if (!TrueCst && !FalseCst)
1272 return false;
1273
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t T = TrueCst->Value.getSExtValue();
1277 int64_t F = FalseCst->Value.getSExtValue();
1278
1279 if (T == 0 && F == 1) {
1280 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1282 True = ZReg;
1283 False = ZReg;
1284 return true;
1285 }
1286
1287 if (T == 0 && F == -1) {
1288 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1290 True = ZReg;
1291 False = ZReg;
1292 return true;
1293 }
1294 }
1295
1296 if (TrueCst) {
1297 int64_t T = TrueCst->Value.getSExtValue();
1298 if (T == 1) {
1299 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1301 True = False;
1302 False = ZReg;
1304 return true;
1305 }
1306
1307 if (T == -1) {
1308 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1310 True = False;
1311 False = ZReg;
1313 return true;
1314 }
1315 }
1316
1317 if (FalseCst) {
1318 int64_t F = FalseCst->Value.getSExtValue();
1319 if (F == 1) {
1320 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1322 False = ZReg;
1323 return true;
1324 }
1325
1326 if (F == -1) {
1327 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1329 False = ZReg;
1330 return true;
1331 }
1332 }
1333 return false;
1334 };
1335
1336 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1338 Optimized |= TryOptSelectCst();
1339 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1341 return &*SelectInst;
1342}
1343
1345 switch (P) {
1346 default:
1347 llvm_unreachable("Unknown condition code!");
1348 case CmpInst::ICMP_NE:
1349 return AArch64CC::NE;
1350 case CmpInst::ICMP_EQ:
1351 return AArch64CC::EQ;
1352 case CmpInst::ICMP_SGT:
1353 return AArch64CC::GT;
1354 case CmpInst::ICMP_SGE:
1355 return AArch64CC::GE;
1356 case CmpInst::ICMP_SLT:
1357 return AArch64CC::LT;
1358 case CmpInst::ICMP_SLE:
1359 return AArch64CC::LE;
1360 case CmpInst::ICMP_UGT:
1361 return AArch64CC::HI;
1362 case CmpInst::ICMP_UGE:
1363 return AArch64CC::HS;
1364 case CmpInst::ICMP_ULT:
1365 return AArch64CC::LO;
1366 case CmpInst::ICMP_ULE:
1367 return AArch64CC::LS;
1368 }
1369}
1370
1371/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1373 AArch64CC::CondCode &CondCode,
1374 AArch64CC::CondCode &CondCode2) {
1375 CondCode2 = AArch64CC::AL;
1376 switch (CC) {
1377 default:
1378 llvm_unreachable("Unknown FP condition!");
1379 case CmpInst::FCMP_OEQ:
1380 CondCode = AArch64CC::EQ;
1381 break;
1382 case CmpInst::FCMP_OGT:
1383 CondCode = AArch64CC::GT;
1384 break;
1385 case CmpInst::FCMP_OGE:
1386 CondCode = AArch64CC::GE;
1387 break;
1388 case CmpInst::FCMP_OLT:
1389 CondCode = AArch64CC::MI;
1390 break;
1391 case CmpInst::FCMP_OLE:
1392 CondCode = AArch64CC::LS;
1393 break;
1394 case CmpInst::FCMP_ONE:
1395 CondCode = AArch64CC::MI;
1396 CondCode2 = AArch64CC::GT;
1397 break;
1398 case CmpInst::FCMP_ORD:
1399 CondCode = AArch64CC::VC;
1400 break;
1401 case CmpInst::FCMP_UNO:
1402 CondCode = AArch64CC::VS;
1403 break;
1404 case CmpInst::FCMP_UEQ:
1405 CondCode = AArch64CC::EQ;
1406 CondCode2 = AArch64CC::VS;
1407 break;
1408 case CmpInst::FCMP_UGT:
1409 CondCode = AArch64CC::HI;
1410 break;
1411 case CmpInst::FCMP_UGE:
1412 CondCode = AArch64CC::PL;
1413 break;
1414 case CmpInst::FCMP_ULT:
1415 CondCode = AArch64CC::LT;
1416 break;
1417 case CmpInst::FCMP_ULE:
1418 CondCode = AArch64CC::LE;
1419 break;
1420 case CmpInst::FCMP_UNE:
1421 CondCode = AArch64CC::NE;
1422 break;
1423 }
1424}
1425
1426/// Convert an IR fp condition code to an AArch64 CC.
1427/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1428/// should be AND'ed instead of OR'ed.
1430 AArch64CC::CondCode &CondCode,
1431 AArch64CC::CondCode &CondCode2) {
1432 CondCode2 = AArch64CC::AL;
1433 switch (CC) {
1434 default:
1435 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1436 assert(CondCode2 == AArch64CC::AL);
1437 break;
1438 case CmpInst::FCMP_ONE:
1439 // (a one b)
1440 // == ((a olt b) || (a ogt b))
1441 // == ((a ord b) && (a une b))
1442 CondCode = AArch64CC::VC;
1443 CondCode2 = AArch64CC::NE;
1444 break;
1445 case CmpInst::FCMP_UEQ:
1446 // (a ueq b)
1447 // == ((a uno b) || (a oeq b))
1448 // == ((a ule b) && (a uge b))
1449 CondCode = AArch64CC::PL;
1450 CondCode2 = AArch64CC::LE;
1451 break;
1452 }
1453}
1454
1455/// Return a register which can be used as a bit to test in a TB(N)Z.
1456static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1458 assert(Reg.isValid() && "Expected valid register!");
1459 bool HasZext = false;
1460 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1461 unsigned Opc = MI->getOpcode();
1462
1463 if (!MI->getOperand(0).isReg() ||
1464 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1465 break;
1466
1467 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1468 //
1469 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1470 // on the truncated x is the same as the bit number on x.
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1474 HasZext = true;
1475
1476 Register NextReg = MI->getOperand(1).getReg();
1477 // Did we find something worth folding?
1478 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1479 break;
1480
1481 // NextReg is worth folding. Keep looking.
1482 Reg = NextReg;
1483 continue;
1484 }
1485
1486 // Attempt to find a suitable operation with a constant on one side.
1487 std::optional<uint64_t> C;
1488 Register TestReg;
1489 switch (Opc) {
1490 default:
1491 break;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg = MI->getOperand(1).getReg();
1495 Register ConstantReg = MI->getOperand(2).getReg();
1496 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1497 if (!VRegAndVal) {
1498 // AND commutes, check the other side for a constant.
1499 // FIXME: Can we canonicalize the constant so that it's always on the
1500 // same side at some point earlier?
1501 std::swap(ConstantReg, TestReg);
1502 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1503 }
1504 if (VRegAndVal) {
1505 if (HasZext)
1506 C = VRegAndVal->Value.getZExtValue();
1507 else
1508 C = VRegAndVal->Value.getSExtValue();
1509 }
1510 break;
1511 }
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg = MI->getOperand(1).getReg();
1516 auto VRegAndVal =
1517 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1518 if (VRegAndVal)
1519 C = VRegAndVal->Value.getSExtValue();
1520 break;
1521 }
1522 }
1523
1524 // Didn't find a constant or viable register. Bail out of the loop.
1525 if (!C || !TestReg.isValid())
1526 break;
1527
1528 // We found a suitable instruction with a constant. Check to see if we can
1529 // walk through the instruction.
1530 Register NextReg;
1531 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1532 switch (Opc) {
1533 default:
1534 break;
1535 case TargetOpcode::G_AND:
1536 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1537 if ((*C >> Bit) & 1)
1538 NextReg = TestReg;
1539 break;
1540 case TargetOpcode::G_SHL:
1541 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1542 // the type of the register.
1543 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1544 NextReg = TestReg;
1545 Bit = Bit - *C;
1546 }
1547 break;
1548 case TargetOpcode::G_ASHR:
1549 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1550 // in x
1551 NextReg = TestReg;
1552 Bit = Bit + *C;
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1555 break;
1556 case TargetOpcode::G_LSHR:
1557 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1558 if ((Bit + *C) < TestRegSize) {
1559 NextReg = TestReg;
1560 Bit = Bit + *C;
1561 }
1562 break;
1563 case TargetOpcode::G_XOR:
1564 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1565 // appropriate.
1566 //
1567 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1568 //
1569 // tbz x', b -> tbnz x, b
1570 //
1571 // Because x' only has the b-th bit set if x does not.
1572 if ((*C >> Bit) & 1)
1573 Invert = !Invert;
1574 NextReg = TestReg;
1575 break;
1576 }
1577
1578 // Check if we found anything worth folding.
1579 if (!NextReg.isValid())
1580 return Reg;
1581 Reg = NextReg;
1582 }
1583
1584 return Reg;
1585}
1586
1587MachineInstr *AArch64InstructionSelector::emitTestBit(
1588 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1589 MachineIRBuilder &MIB) const {
1590 assert(TestReg.isValid());
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1593 MachineRegisterInfo &MRI = *MIB.getMRI();
1594
1595 // Attempt to optimize the test bit by walking over instructions.
1596 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1597 LLT Ty = MRI.getType(TestReg);
1598 unsigned Size = Ty.getSizeInBits();
1599 assert(!Ty.isVector() && "Expected a scalar!");
1600 assert(Bit < 64 && "Bit is too large!");
1601
1602 // When the test register is a 64-bit register, we have to narrow to make
1603 // TBNZW work.
1604 bool UseWReg = Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1609 MIB);
1610
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1614 auto TestBitMI =
1615 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1616 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1617 return &*TestBitMI;
1618}
1619
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1621 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1622 MachineIRBuilder &MIB) const {
1623 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1624 // Given something like this:
1625 //
1626 // %x = ...Something...
1627 // %one = G_CONSTANT i64 1
1628 // %zero = G_CONSTANT i64 0
1629 // %and = G_AND %x, %one
1630 // %cmp = G_ICMP intpred(ne), %and, %zero
1631 // %cmp_trunc = G_TRUNC %cmp
1632 // G_BRCOND %cmp_trunc, %bb.3
1633 //
1634 // We want to try and fold the AND into the G_BRCOND and produce either a
1635 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1636 //
1637 // In this case, we'd get
1638 //
1639 // TBNZ %x %bb.3
1640 //
1641
1642 // Check if the AND has a constant on its RHS which we can use as a mask.
1643 // If it's a power of 2, then it's the same as checking a specific bit.
1644 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1645 auto MaybeBit = getIConstantVRegValWithLookThrough(
1646 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1647 if (!MaybeBit)
1648 return false;
1649
1650 int32_t Bit = MaybeBit->Value.exactLogBase2();
1651 if (Bit < 0)
1652 return false;
1653
1654 Register TestReg = AndInst.getOperand(1).getReg();
1655
1656 // Emit a TB(N)Z.
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1658 return true;
1659}
1660
1661MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1662 bool IsNegative,
1663 MachineBasicBlock *DestMBB,
1664 MachineIRBuilder &MIB) const {
1665 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1666 MachineRegisterInfo &MRI = *MIB.getMRI();
1667 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty = MRI.getType(CompareReg);
1671 unsigned Width = Ty.getSizeInBits();
1672 assert(!Ty.isVector() && "Expected scalar only?");
1673 assert(Width <= 64 && "Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1678 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1679 return &*BranchMI;
1680}
1681
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1683 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1684 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1685 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1686 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1687 // totally clean. Some of them require two branches to implement.
1688 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1689 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1690 Pred);
1691 AArch64CC::CondCode CC1, CC2;
1692 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1693 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1694 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1695 if (CC2 != AArch64CC::AL)
1696 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1697 I.eraseFromParent();
1698 return true;
1699}
1700
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1702 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1703 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1704 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1705 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1706 //
1707 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1708 // instructions will not be produced, as they are conditional branch
1709 // instructions that do not set flags.
1710 if (!ProduceNonFlagSettingCondBr)
1711 return false;
1712
1713 MachineRegisterInfo &MRI = *MIB.getMRI();
1714 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1715 auto Pred =
1716 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1717 Register LHS = ICmp.getOperand(2).getReg();
1718 Register RHS = ICmp.getOperand(3).getReg();
1719
1720 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1721 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1722 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1723
1724 // When we can emit a TB(N)Z, prefer that.
1725 //
1726 // Handle non-commutative condition codes first.
1727 // Note that we don't want to do this when we have a G_AND because it can
1728 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1729 if (VRegAndVal && !AndInst) {
1730 int64_t C = VRegAndVal->Value.getSExtValue();
1731
1732 // When we have a greater-than comparison, we can just test if the msb is
1733 // zero.
1734 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1735 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1736 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1737 I.eraseFromParent();
1738 return true;
1739 }
1740
1741 // When we have a less than comparison, we can just test if the msb is not
1742 // zero.
1743 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1744 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1745 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1746 I.eraseFromParent();
1747 return true;
1748 }
1749
1750 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1751 // we can test if the msb is zero.
1752 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1753 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1754 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1755 I.eraseFromParent();
1756 return true;
1757 }
1758 }
1759
1760 // Attempt to handle commutative condition codes. Right now, that's only
1761 // eq/ne.
1762 if (ICmpInst::isEquality(Pred)) {
1763 if (!VRegAndVal) {
1764 std::swap(RHS, LHS);
1765 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1766 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1767 }
1768
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1770 // If there's a G_AND feeding into this branch, try to fold it away by
1771 // emitting a TB(N)Z instead.
1772 //
1773 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1774 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1775 // would be redundant.
1776 if (AndInst &&
1777 tryOptAndIntoCompareBranch(
1778 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1779 I.eraseFromParent();
1780 return true;
1781 }
1782
1783 // Otherwise, try to emit a CB(N)Z instead.
1784 auto LHSTy = MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1786 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1787 I.eraseFromParent();
1788 return true;
1789 }
1790 }
1791 }
1792
1793 return false;
1794}
1795
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1797 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1798 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1799 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1801 return true;
1802
1803 // Couldn't optimize. Emit a compare + a Bcc.
1804 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1805 auto PredOp = ICmp.getOperand(1);
1806 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1808 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1809 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1810 I.eraseFromParent();
1811 return true;
1812}
1813
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg = I.getOperand(0).getReg();
1817 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1818 // Try to select the G_BRCOND using whatever is feeding the condition if
1819 // possible.
1820 unsigned CCMIOpc = CCMI->getOpcode();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1825
1826 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1827 // instructions will not be produced, as they are conditional branch
1828 // instructions that do not set flags.
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1833 return true;
1834 }
1835
1836 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1837 auto TstMI =
1838 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1840 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1842 .addMBB(I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1844 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1845}
1846
1847/// Returns the element immediate value of a vector shift operand if found.
1848/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1849static std::optional<int64_t> getVectorShiftImm(Register Reg,
1851 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1852 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1853 return getAArch64VectorSplatScalar(*OpMI, MRI);
1854}
1855
1856/// Matches and returns the shift immediate value for a SHL instruction given
1857/// a shift operand.
1858static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1860 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1861 if (!ShiftImm)
1862 return std::nullopt;
1863 // Check the immediate is in range for a SHL.
1864 int64_t Imm = *ShiftImm;
1865 if (Imm < 0)
1866 return std::nullopt;
1867 switch (SrcTy.getElementType().getSizeInBits()) {
1868 default:
1869 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1870 return std::nullopt;
1871 case 8:
1872 if (Imm > 7)
1873 return std::nullopt;
1874 break;
1875 case 16:
1876 if (Imm > 15)
1877 return std::nullopt;
1878 break;
1879 case 32:
1880 if (Imm > 31)
1881 return std::nullopt;
1882 break;
1883 case 64:
1884 if (Imm > 63)
1885 return std::nullopt;
1886 break;
1887 }
1888 return Imm;
1889}
1890
1891bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1893 assert(I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg = I.getOperand(0).getReg();
1895 const LLT Ty = MRI.getType(DstReg);
1896 Register Src1Reg = I.getOperand(1).getReg();
1897 Register Src2Reg = I.getOperand(2).getReg();
1898
1899 if (!Ty.isVector())
1900 return false;
1901
1902 // Check if we have a vector of constants on RHS that we can select as the
1903 // immediate form.
1904 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1905
1906 unsigned Opc = 0;
1907 if (Ty == LLT::fixed_vector(2, 64)) {
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1911 } else if (Ty == LLT::fixed_vector(2, 32)) {
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1913 } else if (Ty == LLT::fixed_vector(4, 16)) {
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1915 } else if (Ty == LLT::fixed_vector(8, 16)) {
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1917 } else if (Ty == LLT::fixed_vector(16, 8)) {
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1919 } else if (Ty == LLT::fixed_vector(8, 8)) {
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1921 } else {
1922 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1923 return false;
1924 }
1925
1926 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1927 if (ImmVal)
1928 Shl.addImm(*ImmVal);
1929 else
1930 Shl.addUse(Src2Reg);
1932 I.eraseFromParent();
1933 return true;
1934}
1935
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg = I.getOperand(0).getReg();
1941 const LLT Ty = MRI.getType(DstReg);
1942 Register Src1Reg = I.getOperand(1).getReg();
1943 Register Src2Reg = I.getOperand(2).getReg();
1944
1945 if (!Ty.isVector())
1946 return false;
1947
1948 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1949
1950 // We expect the immediate case to be lowered in the PostLegalCombiner to
1951 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1952
1953 // There is not a shift right register instruction, but the shift left
1954 // register instruction takes a signed value, where negative numbers specify a
1955 // right shift.
1956
1957 unsigned Opc = 0;
1958 unsigned NegOpc = 0;
1959 const TargetRegisterClass *RC =
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1961 if (Ty == LLT::fixed_vector(2, 64)) {
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1964 } else if (Ty == LLT::fixed_vector(4, 32)) {
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1967 } else if (Ty == LLT::fixed_vector(2, 32)) {
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1970 } else if (Ty == LLT::fixed_vector(4, 16)) {
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1973 } else if (Ty == LLT::fixed_vector(8, 16)) {
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1976 } else if (Ty == LLT::fixed_vector(16, 8)) {
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1979 } else if (Ty == LLT::fixed_vector(8, 8)) {
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1982 } else {
1983 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1984 return false;
1985 }
1986
1987 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1992 return true;
1993}
1994
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
1997 return false;
1998}
1999
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg = I.getOperand(0).getReg();
2004
2005 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2006
2007 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2010 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2011 ? FuncInfo->getVarArgsGPRIndex()
2012 : FuncInfo->getVarArgsStackIndex();
2013 }
2014
2015 auto MIB =
2016 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2017 .addDef(ArgsAddrReg)
2018 .addFrameIndex(FrameIdx)
2019 .addImm(0)
2020 .addImm(0);
2021
2023
2024 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2025 .addUse(ArgsAddrReg)
2026 .addUse(ListReg)
2027 .addImm(0)
2028 .addMemOperand(*I.memoperands_begin());
2029
2031 I.eraseFromParent();
2032 return true;
2033}
2034
2035void AArch64InstructionSelector::materializeLargeCMVal(
2036 MachineInstr &I, const Value *V, unsigned OpFlags) {
2037 MachineBasicBlock &MBB = *I.getParent();
2038 MachineFunction &MF = *MBB.getParent();
2040
2041 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2042 MovZ->addOperand(MF, I.getOperand(1));
2043 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2045 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2047
2048 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2049 Register ForceDstReg) {
2050 Register DstReg = ForceDstReg
2051 ? ForceDstReg
2052 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2053 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2054 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2057 } else {
2058 MovI->addOperand(
2059 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2060 MovZ->getOperand(1).getOffset(), Flags));
2061 }
2062 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2064 return DstReg;
2065 };
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2068 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2069 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2070}
2071
2072bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2073 MachineBasicBlock &MBB = *I.getParent();
2074 MachineFunction &MF = *MBB.getParent();
2076
2077 switch (I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2080 MachineOperand &SrcOp = I.getOperand(0);
2081 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2082 // Allow matching with imported patterns for stores of pointers. Unlike
2083 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2084 // and constrain.
2085 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2086 Register NewSrc = Copy.getReg(0);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2089 Changed = true;
2090 }
2091 return Changed;
2092 }
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(I, MRI);
2095 case TargetOpcode::G_LOAD: {
2096 // For scalar loads of pointers, we try to convert the dest type from p0
2097 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2098 // conversion, this should be ok because all users should have been
2099 // selected already, so the type doesn't matter for them.
2100 Register DstReg = I.getOperand(0).getReg();
2101 const LLT DstTy = MRI.getType(DstReg);
2102 if (!DstTy.isPointer())
2103 return false;
2104 MRI.setType(DstReg, LLT::scalar(64));
2105 return true;
2106 }
2107 case AArch64::G_DUP: {
2108 // Convert the type from p0 to s64 to help selection.
2109 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2110 if (!DstTy.isPointerVector())
2111 return false;
2112 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2113 MRI.setType(I.getOperand(0).getReg(),
2114 DstTy.changeElementType(LLT::scalar(64)));
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2117 return true;
2118 }
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2121 // If both source and destination regbanks are FPR, then convert the opcode
2122 // to G_SITOF so that the importer can select it to an fpr variant.
2123 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2124 // copy.
2125 Register SrcReg = I.getOperand(1).getReg();
2126 LLT SrcTy = MRI.getType(SrcReg);
2127 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2128 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2129 return false;
2130
2131 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(TII.get(AArch64::G_SITOF));
2134 else
2135 I.setDesc(TII.get(AArch64::G_UITOF));
2136 return true;
2137 }
2138 return false;
2139 }
2140 default:
2141 return false;
2142 }
2143}
2144
2145/// This lowering tries to look for G_PTR_ADD instructions and then converts
2146/// them to a standard G_ADD with a COPY on the source.
2147///
2148/// The motivation behind this is to expose the add semantics to the imported
2149/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2150/// because the selector works bottom up, uses before defs. By the time we
2151/// end up trying to select a G_PTR_ADD, we should have already attempted to
2152/// fold this into addressing modes and were therefore unsuccessful.
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2156 Register DstReg = I.getOperand(0).getReg();
2157 Register AddOp1Reg = I.getOperand(1).getReg();
2158 const LLT PtrTy = MRI.getType(DstReg);
2159 if (PtrTy.getAddressSpace() != 0)
2160 return false;
2161
2162 const LLT CastPtrTy =
2163 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2164 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2165 // Set regbanks on the registers.
2166 if (PtrTy.isVector())
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2168 else
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2170
2171 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2172 // %dst(intty) = G_ADD %intbase, off
2173 I.setDesc(TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2178 return false;
2179 }
2180
2181 // Also take the opportunity here to try to do some optimization.
2182 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2183 Register NegatedReg;
2184 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2185 return true;
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(TII.get(TargetOpcode::G_SUB));
2188 return true;
2189}
2190
2191bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2193 // We try to match the immediate variant of LSL, which is actually an alias
2194 // for a special case of UBFM. Otherwise, we fall back to the imported
2195 // selector which will match the register variant.
2196 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2197 const auto &MO = I.getOperand(2);
2198 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2199 if (!VRegAndVal)
2200 return false;
2201
2202 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2203 if (DstTy.isVector())
2204 return false;
2205 bool Is64Bit = DstTy.getSizeInBits() == 64;
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2208
2209 if (!Imm1Fn || !Imm2Fn)
2210 return false;
2211
2212 auto NewI =
2213 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2214 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2215
2216 for (auto &RenderFn : *Imm1Fn)
2217 RenderFn(NewI);
2218 for (auto &RenderFn : *Imm2Fn)
2219 RenderFn(NewI);
2220
2221 I.eraseFromParent();
2222 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2223}
2224
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2228 // If we're storing a scalar, it doesn't matter what register bank that
2229 // scalar is on. All that matters is the size.
2230 //
2231 // So, if we see something like this (with a 32-bit scalar as an example):
2232 //
2233 // %x:gpr(s32) = ... something ...
2234 // %y:fpr(s32) = COPY %x:gpr(s32)
2235 // G_STORE %y:fpr(s32)
2236 //
2237 // We can fix this up into something like this:
2238 //
2239 // G_STORE %x:gpr(s32)
2240 //
2241 // And then continue the selection process normally.
2242 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2243 if (!DefDstReg.isValid())
2244 return false;
2245 LLT DefDstTy = MRI.getType(DefDstReg);
2246 Register StoreSrcReg = I.getOperand(0).getReg();
2247 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2248
2249 // If we get something strange like a physical register, then we shouldn't
2250 // go any further.
2251 if (!DefDstTy.isValid())
2252 return false;
2253
2254 // Are the source and dst types the same size?
2255 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2256 return false;
2257
2258 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2259 RBI.getRegBank(DefDstReg, MRI, TRI))
2260 return false;
2261
2262 // We have a cross-bank copy, which is entering a store. Let's fold it.
2263 I.getOperand(0).setReg(DefDstReg);
2264 return true;
2265}
2266
2267bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2268 assert(I.getParent() && "Instruction should be in a basic block!");
2269 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2270
2271 MachineBasicBlock &MBB = *I.getParent();
2272 MachineFunction &MF = *MBB.getParent();
2274
2275 switch (I.getOpcode()) {
2276 case AArch64::G_DUP: {
2277 // Before selecting a DUP instruction, check if it is better selected as a
2278 // MOV or load from a constant pool.
2279 Register Src = I.getOperand(1).getReg();
2280 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2281 if (!ValAndVReg)
2282 return false;
2283 LLVMContext &Ctx = MF.getFunction().getContext();
2284 Register Dst = I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2287 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2288 ValAndVReg->Value));
2289 if (!emitConstantVector(Dst, CV, MIB, MRI))
2290 return false;
2291 I.eraseFromParent();
2292 return true;
2293 }
2294 case TargetOpcode::G_SEXT:
2295 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2296 // over a normal extend.
2297 if (selectUSMovFromExtend(I, MRI))
2298 return true;
2299 return false;
2300 case TargetOpcode::G_BR:
2301 return false;
2302 case TargetOpcode::G_SHL:
2303 return earlySelectSHL(I, MRI);
2304 case TargetOpcode::G_CONSTANT: {
2305 bool IsZero = false;
2306 if (I.getOperand(1).isCImm())
2307 IsZero = I.getOperand(1).getCImm()->isZero();
2308 else if (I.getOperand(1).isImm())
2309 IsZero = I.getOperand(1).getImm() == 0;
2310
2311 if (!IsZero)
2312 return false;
2313
2314 Register DefReg = I.getOperand(0).getReg();
2315 LLT Ty = MRI.getType(DefReg);
2316 if (Ty.getSizeInBits() == 64) {
2317 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2318 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2319 } else if (Ty.getSizeInBits() == 32) {
2320 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2321 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2322 } else
2323 return false;
2324
2325 I.setDesc(TII.get(TargetOpcode::COPY));
2326 return true;
2327 }
2328
2329 case TargetOpcode::G_ADD: {
2330 // Check if this is being fed by a G_ICMP on either side.
2331 //
2332 // (cmp pred, x, y) + z
2333 //
2334 // In the above case, when the cmp is true, we increment z by 1. So, we can
2335 // fold the add into the cset for the cmp by using cinc.
2336 //
2337 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2338 Register AddDst = I.getOperand(0).getReg();
2339 Register AddLHS = I.getOperand(1).getReg();
2340 Register AddRHS = I.getOperand(2).getReg();
2341 // Only handle scalars.
2342 LLT Ty = MRI.getType(AddLHS);
2343 if (Ty.isVector())
2344 return false;
2345 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2346 // bits.
2347 unsigned Size = Ty.getSizeInBits();
2348 if (Size != 32 && Size != 64)
2349 return false;
2350 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2351 if (!MRI.hasOneNonDBGUse(Reg))
2352 return nullptr;
2353 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2354 // compare.
2355 if (Size == 32)
2356 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2357 // We model scalar compares using 32-bit destinations right now.
2358 // If it's a 64-bit compare, it'll have 64-bit sources.
2359 Register ZExt;
2360 if (!mi_match(Reg, MRI,
2362 return nullptr;
2363 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2364 if (!Cmp ||
2365 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2366 return nullptr;
2367 return Cmp;
2368 };
2369 // Try to match
2370 // z + (cmp pred, x, y)
2371 MachineInstr *Cmp = MatchCmp(AddRHS);
2372 if (!Cmp) {
2373 // (cmp pred, x, y) + z
2374 std::swap(AddLHS, AddRHS);
2375 Cmp = MatchCmp(AddRHS);
2376 if (!Cmp)
2377 return false;
2378 }
2379 auto &PredOp = Cmp->getOperand(1);
2380 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2381 const AArch64CC::CondCode InvCC =
2384 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2385 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2386 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2387 I.eraseFromParent();
2388 return true;
2389 }
2390 case TargetOpcode::G_OR: {
2391 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2392 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2393 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2394 Register Dst = I.getOperand(0).getReg();
2395 LLT Ty = MRI.getType(Dst);
2396
2397 if (!Ty.isScalar())
2398 return false;
2399
2400 unsigned Size = Ty.getSizeInBits();
2401 if (Size != 32 && Size != 64)
2402 return false;
2403
2404 Register ShiftSrc;
2405 int64_t ShiftImm;
2406 Register MaskSrc;
2407 int64_t MaskImm;
2408 if (!mi_match(
2409 Dst, MRI,
2410 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2411 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2412 return false;
2413
2414 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2415 return false;
2416
2417 int64_t Immr = Size - ShiftImm;
2418 int64_t Imms = Size - ShiftImm - 1;
2419 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2420 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2421 I.eraseFromParent();
2422 return true;
2423 }
2424 case TargetOpcode::G_FENCE: {
2425 if (I.getOperand(1).getImm() == 0)
2426 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2427 else
2428 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2429 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2430 I.eraseFromParent();
2431 return true;
2432 }
2433 default:
2434 return false;
2435 }
2436}
2437
2438bool AArch64InstructionSelector::select(MachineInstr &I) {
2439 assert(I.getParent() && "Instruction should be in a basic block!");
2440 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2441
2442 MachineBasicBlock &MBB = *I.getParent();
2443 MachineFunction &MF = *MBB.getParent();
2445
2446 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2447 if (Subtarget->requiresStrictAlign()) {
2448 // We don't support this feature yet.
2449 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2450 return false;
2451 }
2452
2454
2455 unsigned Opcode = I.getOpcode();
2456 // G_PHI requires same handling as PHI
2457 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2458 // Certain non-generic instructions also need some special handling.
2459
2460 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2462
2463 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2464 const Register DefReg = I.getOperand(0).getReg();
2465 const LLT DefTy = MRI.getType(DefReg);
2466
2467 const RegClassOrRegBank &RegClassOrBank =
2468 MRI.getRegClassOrRegBank(DefReg);
2469
2470 const TargetRegisterClass *DefRC
2471 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2472 if (!DefRC) {
2473 if (!DefTy.isValid()) {
2474 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2475 return false;
2476 }
2477 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2478 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2479 if (!DefRC) {
2480 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2481 return false;
2482 }
2483 }
2484
2485 I.setDesc(TII.get(TargetOpcode::PHI));
2486
2487 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2488 }
2489
2490 if (I.isCopy())
2491 return selectCopy(I, TII, MRI, TRI, RBI);
2492
2493 if (I.isDebugInstr())
2494 return selectDebugInstr(I, MRI, RBI);
2495
2496 return true;
2497 }
2498
2499
2500 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2501 LLVM_DEBUG(
2502 dbgs() << "Generic instruction has unexpected implicit operands\n");
2503 return false;
2504 }
2505
2506 // Try to do some lowering before we start instruction selecting. These
2507 // lowerings are purely transformations on the input G_MIR and so selection
2508 // must continue after any modification of the instruction.
2509 if (preISelLower(I)) {
2510 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2511 }
2512
2513 // There may be patterns where the importer can't deal with them optimally,
2514 // but does select it to a suboptimal sequence so our custom C++ selection
2515 // code later never has a chance to work on it. Therefore, we have an early
2516 // selection attempt here to give priority to certain selection routines
2517 // over the imported ones.
2518 if (earlySelect(I))
2519 return true;
2520
2521 if (selectImpl(I, *CoverageInfo))
2522 return true;
2523
2524 LLT Ty =
2525 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2526
2527 switch (Opcode) {
2528 case TargetOpcode::G_SBFX:
2529 case TargetOpcode::G_UBFX: {
2530 static const unsigned OpcTable[2][2] = {
2531 {AArch64::UBFMWri, AArch64::UBFMXri},
2532 {AArch64::SBFMWri, AArch64::SBFMXri}};
2533 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2534 unsigned Size = Ty.getSizeInBits();
2535 unsigned Opc = OpcTable[IsSigned][Size == 64];
2536 auto Cst1 =
2537 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2538 assert(Cst1 && "Should have gotten a constant for src 1?");
2539 auto Cst2 =
2540 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2541 assert(Cst2 && "Should have gotten a constant for src 2?");
2542 auto LSB = Cst1->Value.getZExtValue();
2543 auto Width = Cst2->Value.getZExtValue();
2544 auto BitfieldInst =
2545 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2546 .addImm(LSB)
2547 .addImm(LSB + Width - 1);
2548 I.eraseFromParent();
2549 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2550 }
2551 case TargetOpcode::G_BRCOND:
2552 return selectCompareBranch(I, MF, MRI);
2553
2554 case TargetOpcode::G_BRINDIRECT: {
2555 const Function &Fn = MF.getFunction();
2556 if (std::optional<uint16_t> BADisc =
2557 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2558 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2559 MI.addImm(AArch64PACKey::IA);
2560 MI.addImm(*BADisc);
2561 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2562 I.eraseFromParent();
2564 }
2565 I.setDesc(TII.get(AArch64::BR));
2567 }
2568
2569 case TargetOpcode::G_BRJT:
2570 return selectBrJT(I, MRI);
2571
2572 case AArch64::G_ADD_LOW: {
2573 // This op may have been separated from it's ADRP companion by the localizer
2574 // or some other code motion pass. Given that many CPUs will try to
2575 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2576 // which will later be expanded into an ADRP+ADD pair after scheduling.
2577 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2578 if (BaseMI->getOpcode() != AArch64::ADRP) {
2579 I.setDesc(TII.get(AArch64::ADDXri));
2580 I.addOperand(MachineOperand::CreateImm(0));
2582 }
2583 assert(TM.getCodeModel() == CodeModel::Small &&
2584 "Expected small code model");
2585 auto Op1 = BaseMI->getOperand(1);
2586 auto Op2 = I.getOperand(2);
2587 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2588 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2589 Op1.getTargetFlags())
2590 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2591 Op2.getTargetFlags());
2592 I.eraseFromParent();
2593 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2594 }
2595
2596 case TargetOpcode::G_FCONSTANT:
2597 case TargetOpcode::G_CONSTANT: {
2598 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2599
2600 const LLT s8 = LLT::scalar(8);
2601 const LLT s16 = LLT::scalar(16);
2602 const LLT s32 = LLT::scalar(32);
2603 const LLT s64 = LLT::scalar(64);
2604 const LLT s128 = LLT::scalar(128);
2605 const LLT p0 = LLT::pointer(0, 64);
2606
2607 const Register DefReg = I.getOperand(0).getReg();
2608 const LLT DefTy = MRI.getType(DefReg);
2609 const unsigned DefSize = DefTy.getSizeInBits();
2610 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2611
2612 // FIXME: Redundant check, but even less readable when factored out.
2613 if (isFP) {
2614 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2615 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2616 << " constant, expected: " << s16 << " or " << s32
2617 << " or " << s64 << " or " << s128 << '\n');
2618 return false;
2619 }
2620
2621 if (RB.getID() != AArch64::FPRRegBankID) {
2622 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2623 << " constant on bank: " << RB
2624 << ", expected: FPR\n");
2625 return false;
2626 }
2627
2628 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2629 // can be sure tablegen works correctly and isn't rescued by this code.
2630 // 0.0 is not covered by tablegen for FP128. So we will handle this
2631 // scenario in the code here.
2632 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2633 return false;
2634 } else {
2635 // s32 and s64 are covered by tablegen.
2636 if (Ty != p0 && Ty != s8 && Ty != s16) {
2637 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2638 << " constant, expected: " << s32 << ", " << s64
2639 << ", or " << p0 << '\n');
2640 return false;
2641 }
2642
2643 if (RB.getID() != AArch64::GPRRegBankID) {
2644 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2645 << " constant on bank: " << RB
2646 << ", expected: GPR\n");
2647 return false;
2648 }
2649 }
2650
2651 if (isFP) {
2652 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2653 // For 16, 64, and 128b values, emit a constant pool load.
2654 switch (DefSize) {
2655 default:
2656 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2657 case 32:
2658 case 64: {
2659 bool OptForSize = shouldOptForSize(&MF);
2660 const auto &TLI = MF.getSubtarget().getTargetLowering();
2661 // If TLI says that this fpimm is illegal, then we'll expand to a
2662 // constant pool load.
2663 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2664 EVT::getFloatingPointVT(DefSize), OptForSize))
2665 break;
2666 [[fallthrough]];
2667 }
2668 case 16:
2669 case 128: {
2670 auto *FPImm = I.getOperand(1).getFPImm();
2671 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2672 if (!LoadMI) {
2673 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2674 return false;
2675 }
2676 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2677 I.eraseFromParent();
2678 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2679 }
2680 }
2681
2682 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2683 // Either emit a FMOV, or emit a copy to emit a normal mov.
2684 const Register DefGPRReg = MRI.createVirtualRegister(
2685 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2686 MachineOperand &RegOp = I.getOperand(0);
2687 RegOp.setReg(DefGPRReg);
2688 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2689 MIB.buildCopy({DefReg}, {DefGPRReg});
2690
2691 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2692 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2693 return false;
2694 }
2695
2696 MachineOperand &ImmOp = I.getOperand(1);
2697 // FIXME: Is going through int64_t always correct?
2698 ImmOp.ChangeToImmediate(
2700 } else if (I.getOperand(1).isCImm()) {
2701 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2702 I.getOperand(1).ChangeToImmediate(Val);
2703 } else if (I.getOperand(1).isImm()) {
2704 uint64_t Val = I.getOperand(1).getImm();
2705 I.getOperand(1).ChangeToImmediate(Val);
2706 }
2707
2708 const unsigned MovOpc =
2709 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2710 I.setDesc(TII.get(MovOpc));
2712 return true;
2713 }
2714 case TargetOpcode::G_EXTRACT: {
2715 Register DstReg = I.getOperand(0).getReg();
2716 Register SrcReg = I.getOperand(1).getReg();
2717 LLT SrcTy = MRI.getType(SrcReg);
2718 LLT DstTy = MRI.getType(DstReg);
2719 (void)DstTy;
2720 unsigned SrcSize = SrcTy.getSizeInBits();
2721
2722 if (SrcTy.getSizeInBits() > 64) {
2723 // This should be an extract of an s128, which is like a vector extract.
2724 if (SrcTy.getSizeInBits() != 128)
2725 return false;
2726 // Only support extracting 64 bits from an s128 at the moment.
2727 if (DstTy.getSizeInBits() != 64)
2728 return false;
2729
2730 unsigned Offset = I.getOperand(2).getImm();
2731 if (Offset % 64 != 0)
2732 return false;
2733
2734 // Check we have the right regbank always.
2735 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2736 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2737 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2738
2739 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2740 auto NewI =
2741 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2742 .addUse(SrcReg, 0,
2743 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2744 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2745 AArch64::GPR64RegClass, NewI->getOperand(0));
2746 I.eraseFromParent();
2747 return true;
2748 }
2749
2750 // Emit the same code as a vector extract.
2751 // Offset must be a multiple of 64.
2752 unsigned LaneIdx = Offset / 64;
2753 MachineInstr *Extract = emitExtractVectorElt(
2754 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2755 if (!Extract)
2756 return false;
2757 I.eraseFromParent();
2758 return true;
2759 }
2760
2761 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2762 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2763 Ty.getSizeInBits() - 1);
2764
2765 if (SrcSize < 64) {
2766 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2767 "unexpected G_EXTRACT types");
2769 }
2770
2771 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2772 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2773 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2774 .addReg(DstReg, 0, AArch64::sub_32);
2775 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2776 AArch64::GPR32RegClass, MRI);
2777 I.getOperand(0).setReg(DstReg);
2778
2780 }
2781
2782 case TargetOpcode::G_INSERT: {
2783 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2784 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2785 unsigned DstSize = DstTy.getSizeInBits();
2786 // Larger inserts are vectors, same-size ones should be something else by
2787 // now (split up or turned into COPYs).
2788 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2789 return false;
2790
2791 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2792 unsigned LSB = I.getOperand(3).getImm();
2793 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2794 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2795 MachineInstrBuilder(MF, I).addImm(Width - 1);
2796
2797 if (DstSize < 64) {
2798 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2799 "unexpected G_INSERT types");
2801 }
2802
2803 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2804 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2805 TII.get(AArch64::SUBREG_TO_REG))
2806 .addDef(SrcReg)
2807 .addImm(0)
2808 .addUse(I.getOperand(2).getReg())
2809 .addImm(AArch64::sub_32);
2810 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2811 AArch64::GPR32RegClass, MRI);
2812 I.getOperand(2).setReg(SrcReg);
2813
2815 }
2816 case TargetOpcode::G_FRAME_INDEX: {
2817 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2818 if (Ty != LLT::pointer(0, 64)) {
2819 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2820 << ", expected: " << LLT::pointer(0, 64) << '\n');
2821 return false;
2822 }
2823 I.setDesc(TII.get(AArch64::ADDXri));
2824
2825 // MOs for a #0 shifted immediate.
2826 I.addOperand(MachineOperand::CreateImm(0));
2827 I.addOperand(MachineOperand::CreateImm(0));
2828
2830 }
2831
2832 case TargetOpcode::G_GLOBAL_VALUE: {
2833 const GlobalValue *GV = nullptr;
2834 unsigned OpFlags;
2835 if (I.getOperand(1).isSymbol()) {
2836 OpFlags = I.getOperand(1).getTargetFlags();
2837 // Currently only used by "RtLibUseGOT".
2838 assert(OpFlags == AArch64II::MO_GOT);
2839 } else {
2840 GV = I.getOperand(1).getGlobal();
2841 if (GV->isThreadLocal())
2842 return selectTLSGlobalValue(I, MRI);
2843 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2844 }
2845
2846 if (OpFlags & AArch64II::MO_GOT) {
2847 I.setDesc(TII.get(AArch64::LOADgot));
2848 I.getOperand(1).setTargetFlags(OpFlags);
2849 } else if (TM.getCodeModel() == CodeModel::Large &&
2850 !TM.isPositionIndependent()) {
2851 // Materialize the global using movz/movk instructions.
2852 materializeLargeCMVal(I, GV, OpFlags);
2853 I.eraseFromParent();
2854 return true;
2855 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2856 I.setDesc(TII.get(AArch64::ADR));
2857 I.getOperand(1).setTargetFlags(OpFlags);
2858 } else {
2859 I.setDesc(TII.get(AArch64::MOVaddr));
2860 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2861 MachineInstrBuilder MIB(MF, I);
2862 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2864 }
2866 }
2867
2868 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2869 return selectPtrAuthGlobalValue(I, MRI);
2870
2871 case TargetOpcode::G_ZEXTLOAD:
2872 case TargetOpcode::G_LOAD:
2873 case TargetOpcode::G_STORE: {
2874 GLoadStore &LdSt = cast<GLoadStore>(I);
2875 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2876 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2877
2878 if (PtrTy != LLT::pointer(0, 64)) {
2879 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2880 << ", expected: " << LLT::pointer(0, 64) << '\n');
2881 return false;
2882 }
2883
2884 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2885 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2886 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2887
2888 // Need special instructions for atomics that affect ordering.
2889 if (Order != AtomicOrdering::NotAtomic &&
2890 Order != AtomicOrdering::Unordered &&
2891 Order != AtomicOrdering::Monotonic) {
2892 assert(!isa<GZExtLoad>(LdSt));
2893 assert(MemSizeInBytes <= 8 &&
2894 "128-bit atomics should already be custom-legalized");
2895
2896 if (isa<GLoad>(LdSt)) {
2897 static constexpr unsigned LDAPROpcodes[] = {
2898 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2899 static constexpr unsigned LDAROpcodes[] = {
2900 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2901 ArrayRef<unsigned> Opcodes =
2902 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2903 ? LDAPROpcodes
2904 : LDAROpcodes;
2905 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2906 } else {
2907 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2908 AArch64::STLRW, AArch64::STLRX};
2909 Register ValReg = LdSt.getReg(0);
2910 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2911 // Emit a subreg copy of 32 bits.
2912 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2913 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2914 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2915 I.getOperand(0).setReg(NewVal);
2916 }
2917 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2918 }
2920 return true;
2921 }
2922
2923#ifndef NDEBUG
2924 const Register PtrReg = LdSt.getPointerReg();
2925 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2926 // Check that the pointer register is valid.
2927 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2928 "Load/Store pointer operand isn't a GPR");
2929 assert(MRI.getType(PtrReg).isPointer() &&
2930 "Load/Store pointer operand isn't a pointer");
2931#endif
2932
2933 const Register ValReg = LdSt.getReg(0);
2934 const LLT ValTy = MRI.getType(ValReg);
2935 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2936
2937 // The code below doesn't support truncating stores, so we need to split it
2938 // again.
2939 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2940 unsigned SubReg;
2941 LLT MemTy = LdSt.getMMO().getMemoryType();
2942 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2943 if (!getSubRegForClass(RC, TRI, SubReg))
2944 return false;
2945
2946 // Generate a subreg copy.
2947 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2948 .addReg(ValReg, 0, SubReg)
2949 .getReg(0);
2950 RBI.constrainGenericRegister(Copy, *RC, MRI);
2951 LdSt.getOperand(0).setReg(Copy);
2952 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2953 // If this is an any-extending load from the FPR bank, split it into a regular
2954 // load + extend.
2955 if (RB.getID() == AArch64::FPRRegBankID) {
2956 unsigned SubReg;
2957 LLT MemTy = LdSt.getMMO().getMemoryType();
2958 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2959 if (!getSubRegForClass(RC, TRI, SubReg))
2960 return false;
2961 Register OldDst = LdSt.getReg(0);
2962 Register NewDst =
2963 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2964 LdSt.getOperand(0).setReg(NewDst);
2965 MRI.setRegBank(NewDst, RB);
2966 // Generate a SUBREG_TO_REG to extend it.
2967 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2968 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2969 .addImm(0)
2970 .addUse(NewDst)
2971 .addImm(SubReg);
2972 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2973 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2974 MIB.setInstr(LdSt);
2975 }
2976 }
2977
2978 // Helper lambda for partially selecting I. Either returns the original
2979 // instruction with an updated opcode, or a new instruction.
2980 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2981 bool IsStore = isa<GStore>(I);
2982 const unsigned NewOpc =
2983 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2984 if (NewOpc == I.getOpcode())
2985 return nullptr;
2986 // Check if we can fold anything into the addressing mode.
2987 auto AddrModeFns =
2988 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2989 if (!AddrModeFns) {
2990 // Can't fold anything. Use the original instruction.
2991 I.setDesc(TII.get(NewOpc));
2992 I.addOperand(MachineOperand::CreateImm(0));
2993 return &I;
2994 }
2995
2996 // Folded something. Create a new instruction and return it.
2997 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2998 Register CurValReg = I.getOperand(0).getReg();
2999 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3000 NewInst.cloneMemRefs(I);
3001 for (auto &Fn : *AddrModeFns)
3002 Fn(NewInst);
3003 I.eraseFromParent();
3004 return &*NewInst;
3005 };
3006
3007 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3008 if (!LoadStore)
3009 return false;
3010
3011 // If we're storing a 0, use WZR/XZR.
3012 if (Opcode == TargetOpcode::G_STORE) {
3014 LoadStore->getOperand(0).getReg(), MRI);
3015 if (CVal && CVal->Value == 0) {
3016 switch (LoadStore->getOpcode()) {
3017 case AArch64::STRWui:
3018 case AArch64::STRHHui:
3019 case AArch64::STRBBui:
3020 LoadStore->getOperand(0).setReg(AArch64::WZR);
3021 break;
3022 case AArch64::STRXui:
3023 LoadStore->getOperand(0).setReg(AArch64::XZR);
3024 break;
3025 }
3026 }
3027 }
3028
3029 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3030 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3031 // The any/zextload from a smaller type to i32 should be handled by the
3032 // importer.
3033 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3034 return false;
3035 // If we have an extending load then change the load's type to be a
3036 // narrower reg and zero_extend with SUBREG_TO_REG.
3037 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3038 Register DstReg = LoadStore->getOperand(0).getReg();
3039 LoadStore->getOperand(0).setReg(LdReg);
3040
3041 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3042 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3043 .addImm(0)
3044 .addUse(LdReg)
3045 .addImm(AArch64::sub_32);
3046 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3047 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3048 MRI);
3049 }
3050 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3051 }
3052
3053 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3054 case TargetOpcode::G_INDEXED_SEXTLOAD:
3055 return selectIndexedExtLoad(I, MRI);
3056 case TargetOpcode::G_INDEXED_LOAD:
3057 return selectIndexedLoad(I, MRI);
3058 case TargetOpcode::G_INDEXED_STORE:
3059 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3060
3061 case TargetOpcode::G_LSHR:
3062 case TargetOpcode::G_ASHR:
3063 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3064 return selectVectorAshrLshr(I, MRI);
3065 [[fallthrough]];
3066 case TargetOpcode::G_SHL:
3067 if (Opcode == TargetOpcode::G_SHL &&
3068 MRI.getType(I.getOperand(0).getReg()).isVector())
3069 return selectVectorSHL(I, MRI);
3070
3071 // These shifts were legalized to have 64 bit shift amounts because we
3072 // want to take advantage of the selection patterns that assume the
3073 // immediates are s64s, however, selectBinaryOp will assume both operands
3074 // will have the same bit size.
3075 {
3076 Register SrcReg = I.getOperand(1).getReg();
3077 Register ShiftReg = I.getOperand(2).getReg();
3078 const LLT ShiftTy = MRI.getType(ShiftReg);
3079 const LLT SrcTy = MRI.getType(SrcReg);
3080 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3081 ShiftTy.getSizeInBits() == 64) {
3082 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3083 // Insert a subregister copy to implement a 64->32 trunc
3084 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3085 .addReg(ShiftReg, 0, AArch64::sub_32);
3086 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3087 I.getOperand(2).setReg(Trunc.getReg(0));
3088 }
3089 }
3090 [[fallthrough]];
3091 case TargetOpcode::G_OR: {
3092 // Reject the various things we don't support yet.
3093 if (unsupportedBinOp(I, RBI, MRI, TRI))
3094 return false;
3095
3096 const unsigned OpSize = Ty.getSizeInBits();
3097
3098 const Register DefReg = I.getOperand(0).getReg();
3099 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3100
3101 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3102 if (NewOpc == I.getOpcode())
3103 return false;
3104
3105 I.setDesc(TII.get(NewOpc));
3106 // FIXME: Should the type be always reset in setDesc?
3107
3108 // Now that we selected an opcode, we need to constrain the register
3109 // operands to use appropriate classes.
3111 }
3112
3113 case TargetOpcode::G_PTR_ADD: {
3114 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3115 I.eraseFromParent();
3116 return true;
3117 }
3118
3119 case TargetOpcode::G_SADDE:
3120 case TargetOpcode::G_UADDE:
3121 case TargetOpcode::G_SSUBE:
3122 case TargetOpcode::G_USUBE:
3123 case TargetOpcode::G_SADDO:
3124 case TargetOpcode::G_UADDO:
3125 case TargetOpcode::G_SSUBO:
3126 case TargetOpcode::G_USUBO:
3127 return selectOverflowOp(I, MRI);
3128
3129 case TargetOpcode::G_PTRMASK: {
3130 Register MaskReg = I.getOperand(2).getReg();
3131 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3132 // TODO: Implement arbitrary cases
3133 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3134 return false;
3135
3136 uint64_t Mask = *MaskVal;
3137 I.setDesc(TII.get(AArch64::ANDXri));
3138 I.getOperand(2).ChangeToImmediate(
3140
3142 }
3143 case TargetOpcode::G_PTRTOINT:
3144 case TargetOpcode::G_TRUNC: {
3145 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3146 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3147
3148 const Register DstReg = I.getOperand(0).getReg();
3149 const Register SrcReg = I.getOperand(1).getReg();
3150
3151 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3152 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3153
3154 if (DstRB.getID() != SrcRB.getID()) {
3155 LLVM_DEBUG(
3156 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3157 return false;
3158 }
3159
3160 if (DstRB.getID() == AArch64::GPRRegBankID) {
3161 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3162 if (!DstRC)
3163 return false;
3164
3165 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3166 if (!SrcRC)
3167 return false;
3168
3169 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3170 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3171 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3172 return false;
3173 }
3174
3175 if (DstRC == SrcRC) {
3176 // Nothing to be done
3177 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3178 SrcTy == LLT::scalar(64)) {
3179 llvm_unreachable("TableGen can import this case");
3180 return false;
3181 } else if (DstRC == &AArch64::GPR32RegClass &&
3182 SrcRC == &AArch64::GPR64RegClass) {
3183 I.getOperand(1).setSubReg(AArch64::sub_32);
3184 } else {
3185 LLVM_DEBUG(
3186 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3187 return false;
3188 }
3189
3190 I.setDesc(TII.get(TargetOpcode::COPY));
3191 return true;
3192 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3193 if (DstTy == LLT::fixed_vector(4, 16) &&
3194 SrcTy == LLT::fixed_vector(4, 32)) {
3195 I.setDesc(TII.get(AArch64::XTNv4i16));
3197 return true;
3198 }
3199
3200 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3201 MachineInstr *Extract = emitExtractVectorElt(
3202 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3203 if (!Extract)
3204 return false;
3205 I.eraseFromParent();
3206 return true;
3207 }
3208
3209 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3210 if (Opcode == TargetOpcode::G_PTRTOINT) {
3211 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3212 I.setDesc(TII.get(TargetOpcode::COPY));
3213 return selectCopy(I, TII, MRI, TRI, RBI);
3214 }
3215 }
3216
3217 return false;
3218 }
3219
3220 case TargetOpcode::G_ANYEXT: {
3221 if (selectUSMovFromExtend(I, MRI))
3222 return true;
3223
3224 const Register DstReg = I.getOperand(0).getReg();
3225 const Register SrcReg = I.getOperand(1).getReg();
3226
3227 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3228 if (RBDst.getID() != AArch64::GPRRegBankID) {
3229 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3230 << ", expected: GPR\n");
3231 return false;
3232 }
3233
3234 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3235 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3236 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3237 << ", expected: GPR\n");
3238 return false;
3239 }
3240
3241 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3242
3243 if (DstSize == 0) {
3244 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3245 return false;
3246 }
3247
3248 if (DstSize != 64 && DstSize > 32) {
3249 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3250 << ", expected: 32 or 64\n");
3251 return false;
3252 }
3253 // At this point G_ANYEXT is just like a plain COPY, but we need
3254 // to explicitly form the 64-bit value if any.
3255 if (DstSize > 32) {
3256 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3257 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3258 .addDef(ExtSrc)
3259 .addImm(0)
3260 .addUse(SrcReg)
3261 .addImm(AArch64::sub_32);
3262 I.getOperand(1).setReg(ExtSrc);
3263 }
3264 return selectCopy(I, TII, MRI, TRI, RBI);
3265 }
3266
3267 case TargetOpcode::G_ZEXT:
3268 case TargetOpcode::G_SEXT_INREG:
3269 case TargetOpcode::G_SEXT: {
3270 if (selectUSMovFromExtend(I, MRI))
3271 return true;
3272
3273 unsigned Opcode = I.getOpcode();
3274 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3275 const Register DefReg = I.getOperand(0).getReg();
3276 Register SrcReg = I.getOperand(1).getReg();
3277 const LLT DstTy = MRI.getType(DefReg);
3278 const LLT SrcTy = MRI.getType(SrcReg);
3279 unsigned DstSize = DstTy.getSizeInBits();
3280 unsigned SrcSize = SrcTy.getSizeInBits();
3281
3282 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3283 // extended is encoded in the imm.
3284 if (Opcode == TargetOpcode::G_SEXT_INREG)
3285 SrcSize = I.getOperand(2).getImm();
3286
3287 if (DstTy.isVector())
3288 return false; // Should be handled by imported patterns.
3289
3290 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3291 AArch64::GPRRegBankID &&
3292 "Unexpected ext regbank");
3293
3294 MachineInstr *ExtI;
3295
3296 // First check if we're extending the result of a load which has a dest type
3297 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3298 // GPR register on AArch64 and all loads which are smaller automatically
3299 // zero-extend the upper bits. E.g.
3300 // %v(s8) = G_LOAD %p, :: (load 1)
3301 // %v2(s32) = G_ZEXT %v(s8)
3302 if (!IsSigned) {
3303 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3304 bool IsGPR =
3305 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3306 if (LoadMI && IsGPR) {
3307 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3308 unsigned BytesLoaded = MemOp->getSize().getValue();
3309 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3310 return selectCopy(I, TII, MRI, TRI, RBI);
3311 }
3312
3313 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3314 // + SUBREG_TO_REG.
3315 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3316 Register SubregToRegSrc =
3317 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3318 const Register ZReg = AArch64::WZR;
3319 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3320 .addImm(0);
3321
3322 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3323 .addImm(0)
3324 .addUse(SubregToRegSrc)
3325 .addImm(AArch64::sub_32);
3326
3327 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3328 MRI)) {
3329 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3330 return false;
3331 }
3332
3333 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3334 MRI)) {
3335 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3336 return false;
3337 }
3338
3339 I.eraseFromParent();
3340 return true;
3341 }
3342 }
3343
3344 if (DstSize == 64) {
3345 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3346 // FIXME: Can we avoid manually doing this?
3347 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3348 MRI)) {
3349 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3350 << " operand\n");
3351 return false;
3352 }
3353 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3354 {&AArch64::GPR64RegClass}, {})
3355 .addImm(0)
3356 .addUse(SrcReg)
3357 .addImm(AArch64::sub_32)
3358 .getReg(0);
3359 }
3360
3361 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3362 {DefReg}, {SrcReg})
3363 .addImm(0)
3364 .addImm(SrcSize - 1);
3365 } else if (DstSize <= 32) {
3366 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3367 {DefReg}, {SrcReg})
3368 .addImm(0)
3369 .addImm(SrcSize - 1);
3370 } else {
3371 return false;
3372 }
3373
3375 I.eraseFromParent();
3376 return true;
3377 }
3378
3379 case TargetOpcode::G_SITOFP:
3380 case TargetOpcode::G_UITOFP:
3381 case TargetOpcode::G_FPTOSI:
3382 case TargetOpcode::G_FPTOUI: {
3383 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3384 SrcTy = MRI.getType(I.getOperand(1).getReg());
3385 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3386 if (NewOpc == Opcode)
3387 return false;
3388
3389 I.setDesc(TII.get(NewOpc));
3391 I.setFlags(MachineInstr::NoFPExcept);
3392
3393 return true;
3394 }
3395
3396 case TargetOpcode::G_FREEZE:
3397 return selectCopy(I, TII, MRI, TRI, RBI);
3398
3399 case TargetOpcode::G_INTTOPTR:
3400 // The importer is currently unable to import pointer types since they
3401 // didn't exist in SelectionDAG.
3402 return selectCopy(I, TII, MRI, TRI, RBI);
3403
3404 case TargetOpcode::G_BITCAST:
3405 // Imported SelectionDAG rules can handle every bitcast except those that
3406 // bitcast from a type to the same type. Ideally, these shouldn't occur
3407 // but we might not run an optimizer that deletes them. The other exception
3408 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3409 // of them.
3410 return selectCopy(I, TII, MRI, TRI, RBI);
3411
3412 case TargetOpcode::G_SELECT: {
3413 auto &Sel = cast<GSelect>(I);
3414 const Register CondReg = Sel.getCondReg();
3415 const Register TReg = Sel.getTrueReg();
3416 const Register FReg = Sel.getFalseReg();
3417
3418 if (tryOptSelect(Sel))
3419 return true;
3420
3421 // Make sure to use an unused vreg instead of wzr, so that the peephole
3422 // optimizations will be able to optimize these.
3423 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3424 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3425 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3427 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3428 return false;
3429 Sel.eraseFromParent();
3430 return true;
3431 }
3432 case TargetOpcode::G_ICMP: {
3433 if (Ty.isVector())
3434 return false;
3435
3436 if (Ty != LLT::scalar(32)) {
3437 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3438 << ", expected: " << LLT::scalar(32) << '\n');
3439 return false;
3440 }
3441
3442 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3443 const AArch64CC::CondCode InvCC =
3445 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3446 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3447 /*Src2=*/AArch64::WZR, InvCC, MIB);
3448 I.eraseFromParent();
3449 return true;
3450 }
3451
3452 case TargetOpcode::G_FCMP: {
3453 CmpInst::Predicate Pred =
3454 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3455 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3456 Pred) ||
3457 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3458 return false;
3459 I.eraseFromParent();
3460 return true;
3461 }
3462 case TargetOpcode::G_VASTART:
3463 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3464 : selectVaStartAAPCS(I, MF, MRI);
3465 case TargetOpcode::G_INTRINSIC:
3466 return selectIntrinsic(I, MRI);
3467 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3468 return selectIntrinsicWithSideEffects(I, MRI);
3469 case TargetOpcode::G_IMPLICIT_DEF: {
3470 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3471 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3472 const Register DstReg = I.getOperand(0).getReg();
3473 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3474 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3475 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3476 return true;
3477 }
3478 case TargetOpcode::G_BLOCK_ADDR: {
3479 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3480 if (std::optional<uint16_t> BADisc =
3481 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3482 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3483 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3484 MIB.buildInstr(AArch64::MOVaddrPAC)
3485 .addBlockAddress(I.getOperand(1).getBlockAddress())
3487 .addReg(/*AddrDisc=*/AArch64::XZR)
3488 .addImm(*BADisc)
3489 .constrainAllUses(TII, TRI, RBI);
3490 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3491 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3492 AArch64::GPR64RegClass, MRI);
3493 I.eraseFromParent();
3494 return true;
3495 }
3496 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3497 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3498 I.eraseFromParent();
3499 return true;
3500 } else {
3501 I.setDesc(TII.get(AArch64::MOVaddrBA));
3502 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3503 I.getOperand(0).getReg())
3504 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3505 /* Offset */ 0, AArch64II::MO_PAGE)
3507 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3509 I.eraseFromParent();
3510 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3511 }
3512 }
3513 case AArch64::G_DUP: {
3514 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3515 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3516 // difficult because at RBS we may end up pessimizing the fpr case if we
3517 // decided to add an anyextend to fix this. Manual selection is the most
3518 // robust solution for now.
3519 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3520 AArch64::GPRRegBankID)
3521 return false; // We expect the fpr regbank case to be imported.
3522 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3523 if (VecTy == LLT::fixed_vector(8, 8))
3524 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3525 else if (VecTy == LLT::fixed_vector(16, 8))
3526 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3527 else if (VecTy == LLT::fixed_vector(4, 16))
3528 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3529 else if (VecTy == LLT::fixed_vector(8, 16))
3530 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3531 else
3532 return false;
3534 }
3535 case TargetOpcode::G_BUILD_VECTOR:
3536 return selectBuildVector(I, MRI);
3537 case TargetOpcode::G_MERGE_VALUES:
3538 return selectMergeValues(I, MRI);
3539 case TargetOpcode::G_UNMERGE_VALUES:
3540 return selectUnmergeValues(I, MRI);
3541 case TargetOpcode::G_SHUFFLE_VECTOR:
3542 return selectShuffleVector(I, MRI);
3543 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3544 return selectExtractElt(I, MRI);
3545 case TargetOpcode::G_CONCAT_VECTORS:
3546 return selectConcatVectors(I, MRI);
3547 case TargetOpcode::G_JUMP_TABLE:
3548 return selectJumpTable(I, MRI);
3549 case TargetOpcode::G_MEMCPY:
3550 case TargetOpcode::G_MEMCPY_INLINE:
3551 case TargetOpcode::G_MEMMOVE:
3552 case TargetOpcode::G_MEMSET:
3553 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3554 return selectMOPS(I, MRI);
3555 }
3556
3557 return false;
3558}
3559
3560bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3561 MachineIRBuilderState OldMIBState = MIB.getState();
3562 bool Success = select(I);
3563 MIB.setState(OldMIBState);
3564 return Success;
3565}
3566
3567bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3569 unsigned Mopcode;
3570 switch (GI.getOpcode()) {
3571 case TargetOpcode::G_MEMCPY:
3572 case TargetOpcode::G_MEMCPY_INLINE:
3573 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3574 break;
3575 case TargetOpcode::G_MEMMOVE:
3576 Mopcode = AArch64::MOPSMemoryMovePseudo;
3577 break;
3578 case TargetOpcode::G_MEMSET:
3579 // For tagged memset see llvm.aarch64.mops.memset.tag
3580 Mopcode = AArch64::MOPSMemorySetPseudo;
3581 break;
3582 }
3583
3584 auto &DstPtr = GI.getOperand(0);
3585 auto &SrcOrVal = GI.getOperand(1);
3586 auto &Size = GI.getOperand(2);
3587
3588 // Create copies of the registers that can be clobbered.
3589 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3590 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3591 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3592
3593 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3594 const auto &SrcValRegClass =
3595 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3596
3597 // Constrain to specific registers
3598 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3599 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3600 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3601
3602 MIB.buildCopy(DstPtrCopy, DstPtr);
3603 MIB.buildCopy(SrcValCopy, SrcOrVal);
3604 MIB.buildCopy(SizeCopy, Size);
3605
3606 // New instruction uses the copied registers because it must update them.
3607 // The defs are not used since they don't exist in G_MEM*. They are still
3608 // tied.
3609 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3610 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3611 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3612 if (IsSet) {
3613 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3614 {DstPtrCopy, SizeCopy, SrcValCopy});
3615 } else {
3616 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3617 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3618 {DstPtrCopy, SrcValCopy, SizeCopy});
3619 }
3620
3621 GI.eraseFromParent();
3622 return true;
3623}
3624
3625bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3627 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3628 Register JTAddr = I.getOperand(0).getReg();
3629 unsigned JTI = I.getOperand(1).getIndex();
3630 Register Index = I.getOperand(2).getReg();
3631
3632 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3633
3634 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3635 // sequence later, to guarantee the integrity of the intermediate values.
3636 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3637 CodeModel::Model CM = TM.getCodeModel();
3638 if (STI.isTargetMachO()) {
3639 if (CM != CodeModel::Small && CM != CodeModel::Large)
3640 report_fatal_error("Unsupported code-model for hardened jump-table");
3641 } else {
3642 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3643 assert(STI.isTargetELF() &&
3644 "jump table hardening only supported on MachO/ELF");
3645 if (CM != CodeModel::Small)
3646 report_fatal_error("Unsupported code-model for hardened jump-table");
3647 }
3648
3649 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3650 MIB.buildInstr(AArch64::BR_JumpTable)
3651 .addJumpTableIndex(I.getOperand(1).getIndex());
3652 I.eraseFromParent();
3653 return true;
3654 }
3655
3656 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3657 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3658
3659 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3660 {TargetReg, ScratchReg}, {JTAddr, Index})
3661 .addJumpTableIndex(JTI);
3662 // Save the jump table info.
3663 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3664 {static_cast<int64_t>(JTI)});
3665 // Build the indirect branch.
3666 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3667 I.eraseFromParent();
3668 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3669}
3670
3671bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3673 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3674 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3675
3676 Register DstReg = I.getOperand(0).getReg();
3677 unsigned JTI = I.getOperand(1).getIndex();
3678 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3679 auto MovMI =
3680 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3681 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3683 I.eraseFromParent();
3684 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3685}
3686
3687bool AArch64InstructionSelector::selectTLSGlobalValue(
3689 if (!STI.isTargetMachO())
3690 return false;
3691 MachineFunction &MF = *I.getParent()->getParent();
3692 MF.getFrameInfo().setAdjustsStack(true);
3693
3694 const auto &GlobalOp = I.getOperand(1);
3695 assert(GlobalOp.getOffset() == 0 &&
3696 "Shouldn't have an offset on TLS globals!");
3697 const GlobalValue &GV = *GlobalOp.getGlobal();
3698
3699 auto LoadGOT =
3700 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3701 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3702
3703 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3704 {LoadGOT.getReg(0)})
3705 .addImm(0);
3706
3707 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3708 // TLS calls preserve all registers except those that absolutely must be
3709 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3710 // silly).
3711 unsigned Opcode = getBLRCallOpcode(MF);
3712
3713 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3714 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3715 assert(Opcode == AArch64::BLR);
3716 Opcode = AArch64::BLRAAZ;
3717 }
3718
3719 MIB.buildInstr(Opcode, {}, {Load})
3720 .addUse(AArch64::X0, RegState::Implicit)
3721 .addDef(AArch64::X0, RegState::Implicit)
3722 .addRegMask(TRI.getTLSCallPreservedMask());
3723
3724 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3725 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3726 MRI);
3727 I.eraseFromParent();
3728 return true;
3729}
3730
3731MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3732 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3733 MachineIRBuilder &MIRBuilder) const {
3734 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3735
3736 auto BuildFn = [&](unsigned SubregIndex) {
3737 auto Ins =
3738 MIRBuilder
3739 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3740 .addImm(SubregIndex);
3743 return &*Ins;
3744 };
3745
3746 switch (EltSize) {
3747 case 8:
3748 return BuildFn(AArch64::bsub);
3749 case 16:
3750 return BuildFn(AArch64::hsub);
3751 case 32:
3752 return BuildFn(AArch64::ssub);
3753 case 64:
3754 return BuildFn(AArch64::dsub);
3755 default:
3756 return nullptr;
3757 }
3758}
3759
3761AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3762 MachineIRBuilder &MIB,
3763 MachineRegisterInfo &MRI) const {
3764 LLT DstTy = MRI.getType(DstReg);
3765 const TargetRegisterClass *RC =
3766 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3767 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3768 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3769 return nullptr;
3770 }
3771 unsigned SubReg = 0;
3772 if (!getSubRegForClass(RC, TRI, SubReg))
3773 return nullptr;
3774 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3775 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3776 << DstTy.getSizeInBits() << "\n");
3777 return nullptr;
3778 }
3779 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3780 .addReg(SrcReg, 0, SubReg);
3781 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3782 return Copy;
3783}
3784
3785bool AArch64InstructionSelector::selectMergeValues(
3787 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3788 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3789 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3790 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3791 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3792
3793 if (I.getNumOperands() != 3)
3794 return false;
3795
3796 // Merging 2 s64s into an s128.
3797 if (DstTy == LLT::scalar(128)) {
3798 if (SrcTy.getSizeInBits() != 64)
3799 return false;
3800 Register DstReg = I.getOperand(0).getReg();
3801 Register Src1Reg = I.getOperand(1).getReg();
3802 Register Src2Reg = I.getOperand(2).getReg();
3803 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3804 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3805 /* LaneIdx */ 0, RB, MIB);
3806 if (!InsMI)
3807 return false;
3808 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3809 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3810 if (!Ins2MI)
3811 return false;
3814 I.eraseFromParent();
3815 return true;
3816 }
3817
3818 if (RB.getID() != AArch64::GPRRegBankID)
3819 return false;
3820
3821 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3822 return false;
3823
3824 auto *DstRC = &AArch64::GPR64RegClass;
3825 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3826 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3827 TII.get(TargetOpcode::SUBREG_TO_REG))
3828 .addDef(SubToRegDef)
3829 .addImm(0)
3830 .addUse(I.getOperand(1).getReg())
3831 .addImm(AArch64::sub_32);
3832 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3833 // Need to anyext the second scalar before we can use bfm
3834 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3835 TII.get(TargetOpcode::SUBREG_TO_REG))
3836 .addDef(SubToRegDef2)
3837 .addImm(0)
3838 .addUse(I.getOperand(2).getReg())
3839 .addImm(AArch64::sub_32);
3840 MachineInstr &BFM =
3841 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3842 .addDef(I.getOperand(0).getReg())
3843 .addUse(SubToRegDef)
3844 .addUse(SubToRegDef2)
3845 .addImm(32)
3846 .addImm(31);
3847 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3848 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3850 I.eraseFromParent();
3851 return true;
3852}
3853
3854static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3855 const unsigned EltSize) {
3856 // Choose a lane copy opcode and subregister based off of the size of the
3857 // vector's elements.
3858 switch (EltSize) {
3859 case 8:
3860 CopyOpc = AArch64::DUPi8;
3861 ExtractSubReg = AArch64::bsub;
3862 break;
3863 case 16:
3864 CopyOpc = AArch64::DUPi16;
3865 ExtractSubReg = AArch64::hsub;
3866 break;
3867 case 32:
3868 CopyOpc = AArch64::DUPi32;
3869 ExtractSubReg = AArch64::ssub;
3870 break;
3871 case 64:
3872 CopyOpc = AArch64::DUPi64;
3873 ExtractSubReg = AArch64::dsub;
3874 break;
3875 default:
3876 // Unknown size, bail out.
3877 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3878 return false;
3879 }
3880 return true;
3881}
3882
3883MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3884 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3885 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3886 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3887 unsigned CopyOpc = 0;
3888 unsigned ExtractSubReg = 0;
3889 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3890 LLVM_DEBUG(
3891 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3892 return nullptr;
3893 }
3894
3895 const TargetRegisterClass *DstRC =
3896 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3897 if (!DstRC) {
3898 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3899 return nullptr;
3900 }
3901
3902 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3903 const LLT &VecTy = MRI.getType(VecReg);
3904 const TargetRegisterClass *VecRC =
3905 getRegClassForTypeOnBank(VecTy, VecRB, true);
3906 if (!VecRC) {
3907 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3908 return nullptr;
3909 }
3910
3911 // The register that we're going to copy into.
3912 Register InsertReg = VecReg;
3913 if (!DstReg)
3914 DstReg = MRI.createVirtualRegister(DstRC);
3915 // If the lane index is 0, we just use a subregister COPY.
3916 if (LaneIdx == 0) {
3917 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3918 .addReg(VecReg, 0, ExtractSubReg);
3919 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3920 return &*Copy;
3921 }
3922
3923 // Lane copies require 128-bit wide registers. If we're dealing with an
3924 // unpacked vector, then we need to move up to that width. Insert an implicit
3925 // def and a subregister insert to get us there.
3926 if (VecTy.getSizeInBits() != 128) {
3927 MachineInstr *ScalarToVector = emitScalarToVector(
3928 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3929 if (!ScalarToVector)
3930 return nullptr;
3931 InsertReg = ScalarToVector->getOperand(0).getReg();
3932 }
3933
3934 MachineInstr *LaneCopyMI =
3935 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3936 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3937
3938 // Make sure that we actually constrain the initial copy.
3939 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3940 return LaneCopyMI;
3941}
3942
3943bool AArch64InstructionSelector::selectExtractElt(
3945 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3946 "unexpected opcode!");
3947 Register DstReg = I.getOperand(0).getReg();
3948 const LLT NarrowTy = MRI.getType(DstReg);
3949 const Register SrcReg = I.getOperand(1).getReg();
3950 const LLT WideTy = MRI.getType(SrcReg);
3951 (void)WideTy;
3952 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3953 "source register size too small!");
3954 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3955
3956 // Need the lane index to determine the correct copy opcode.
3957 MachineOperand &LaneIdxOp = I.getOperand(2);
3958 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3959
3960 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3961 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3962 return false;
3963 }
3964
3965 // Find the index to extract from.
3966 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3967 if (!VRegAndVal)
3968 return false;
3969 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3970
3971
3972 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3973 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3974 LaneIdx, MIB);
3975 if (!Extract)
3976 return false;
3977
3978 I.eraseFromParent();
3979 return true;
3980}
3981
3982bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3984 unsigned NumElts = I.getNumOperands() - 1;
3985 Register SrcReg = I.getOperand(NumElts).getReg();
3986 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3987 const LLT SrcTy = MRI.getType(SrcReg);
3988
3989 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3990 if (SrcTy.getSizeInBits() > 128) {
3991 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3992 return false;
3993 }
3994
3995 // We implement a split vector operation by treating the sub-vectors as
3996 // scalars and extracting them.
3997 const RegisterBank &DstRB =
3998 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3999 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4000 Register Dst = I.getOperand(OpIdx).getReg();
4001 MachineInstr *Extract =
4002 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4003 if (!Extract)
4004 return false;
4005 }
4006 I.eraseFromParent();
4007 return true;
4008}
4009
4010bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4012 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4013 "unexpected opcode");
4014
4015 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4016 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4017 AArch64::FPRRegBankID ||
4018 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4019 AArch64::FPRRegBankID) {
4020 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4021 "currently unsupported.\n");
4022 return false;
4023 }
4024
4025 // The last operand is the vector source register, and every other operand is
4026 // a register to unpack into.
4027 unsigned NumElts = I.getNumOperands() - 1;
4028 Register SrcReg = I.getOperand(NumElts).getReg();
4029 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4030 const LLT WideTy = MRI.getType(SrcReg);
4031 (void)WideTy;
4032 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4033 "can only unmerge from vector or s128 types!");
4034 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4035 "source register size too small!");
4036
4037 if (!NarrowTy.isScalar())
4038 return selectSplitVectorUnmerge(I, MRI);
4039
4040 // Choose a lane copy opcode and subregister based off of the size of the
4041 // vector's elements.
4042 unsigned CopyOpc = 0;
4043 unsigned ExtractSubReg = 0;
4044 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4045 return false;
4046
4047 // Set up for the lane copies.
4048 MachineBasicBlock &MBB = *I.getParent();
4049
4050 // Stores the registers we'll be copying from.
4051 SmallVector<Register, 4> InsertRegs;
4052
4053 // We'll use the first register twice, so we only need NumElts-1 registers.
4054 unsigned NumInsertRegs = NumElts - 1;
4055
4056 // If our elements fit into exactly 128 bits, then we can copy from the source
4057 // directly. Otherwise, we need to do a bit of setup with some subregister
4058 // inserts.
4059 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4060 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4061 } else {
4062 // No. We have to perform subregister inserts. For each insert, create an
4063 // implicit def and a subregister insert, and save the register we create.
4064 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4065 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4066 *RBI.getRegBank(SrcReg, MRI, TRI));
4067 unsigned SubReg = 0;
4068 bool Found = getSubRegForClass(RC, TRI, SubReg);
4069 (void)Found;
4070 assert(Found && "expected to find last operand's subeg idx");
4071 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4072 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4073 MachineInstr &ImpDefMI =
4074 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4075 ImpDefReg);
4076
4077 // Now, create the subregister insert from SrcReg.
4078 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4079 MachineInstr &InsMI =
4080 *BuildMI(MBB, I, I.getDebugLoc(),
4081 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4082 .addUse(ImpDefReg)
4083 .addUse(SrcReg)
4084 .addImm(SubReg);
4085
4086 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4088
4089 // Save the register so that we can copy from it after.
4090 InsertRegs.push_back(InsertReg);
4091 }
4092 }
4093
4094 // Now that we've created any necessary subregister inserts, we can
4095 // create the copies.
4096 //
4097 // Perform the first copy separately as a subregister copy.
4098 Register CopyTo = I.getOperand(0).getReg();
4099 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4100 .addReg(InsertRegs[0], 0, ExtractSubReg);
4101 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4102
4103 // Now, perform the remaining copies as vector lane copies.
4104 unsigned LaneIdx = 1;
4105 for (Register InsReg : InsertRegs) {
4106 Register CopyTo = I.getOperand(LaneIdx).getReg();
4107 MachineInstr &CopyInst =
4108 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4109 .addUse(InsReg)
4110 .addImm(LaneIdx);
4111 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4112 ++LaneIdx;
4113 }
4114
4115 // Separately constrain the first copy's destination. Because of the
4116 // limitation in constrainOperandRegClass, we can't guarantee that this will
4117 // actually be constrained. So, do it ourselves using the second operand.
4118 const TargetRegisterClass *RC =
4119 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4120 if (!RC) {
4121 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4122 return false;
4123 }
4124
4125 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4126 I.eraseFromParent();
4127 return true;
4128}
4129
4130bool AArch64InstructionSelector::selectConcatVectors(
4132 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4133 "Unexpected opcode");
4134 Register Dst = I.getOperand(0).getReg();
4135 Register Op1 = I.getOperand(1).getReg();
4136 Register Op2 = I.getOperand(2).getReg();
4137 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4138 if (!ConcatMI)
4139 return false;
4140 I.eraseFromParent();
4141 return true;
4142}
4143
4144unsigned
4145AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4146 MachineFunction &MF) const {
4147 Type *CPTy = CPVal->getType();
4148 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4149
4151 return MCP->getConstantPoolIndex(CPVal, Alignment);
4152}
4153
4154MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4155 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4156 const TargetRegisterClass *RC;
4157 unsigned Opc;
4158 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4159 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4160 switch (Size) {
4161 case 16:
4162 RC = &AArch64::FPR128RegClass;
4163 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4164 break;
4165 case 8:
4166 RC = &AArch64::FPR64RegClass;
4167 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4168 break;
4169 case 4:
4170 RC = &AArch64::FPR32RegClass;
4171 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4172 break;
4173 case 2:
4174 RC = &AArch64::FPR16RegClass;
4175 Opc = AArch64::LDRHui;
4176 break;
4177 default:
4178 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4179 << *CPVal->getType());
4180 return nullptr;
4181 }
4182
4183 MachineInstr *LoadMI = nullptr;
4184 auto &MF = MIRBuilder.getMF();
4185 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4186 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4187 // Use load(literal) for tiny code model.
4188 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4189 } else {
4190 auto Adrp =
4191 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4192 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4193
4194 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4195 .addConstantPoolIndex(
4197
4199 }
4200
4202 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4204 Size, Align(Size)));
4206 return LoadMI;
4207}
4208
4209/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4210/// size and RB.
4211static std::pair<unsigned, unsigned>
4212getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4213 unsigned Opc, SubregIdx;
4214 if (RB.getID() == AArch64::GPRRegBankID) {
4215 if (EltSize == 8) {
4216 Opc = AArch64::INSvi8gpr;
4217 SubregIdx = AArch64::bsub;
4218 } else if (EltSize == 16) {
4219 Opc = AArch64::INSvi16gpr;
4220 SubregIdx = AArch64::ssub;
4221 } else if (EltSize == 32) {
4222 Opc = AArch64::INSvi32gpr;
4223 SubregIdx = AArch64::ssub;
4224 } else if (EltSize == 64) {
4225 Opc = AArch64::INSvi64gpr;
4226 SubregIdx = AArch64::dsub;
4227 } else {
4228 llvm_unreachable("invalid elt size!");
4229 }
4230 } else {
4231 if (EltSize == 8) {
4232 Opc = AArch64::INSvi8lane;
4233 SubregIdx = AArch64::bsub;
4234 } else if (EltSize == 16) {
4235 Opc = AArch64::INSvi16lane;
4236 SubregIdx = AArch64::hsub;
4237 } else if (EltSize == 32) {
4238 Opc = AArch64::INSvi32lane;
4239 SubregIdx = AArch64::ssub;
4240 } else if (EltSize == 64) {
4241 Opc = AArch64::INSvi64lane;
4242 SubregIdx = AArch64::dsub;
4243 } else {
4244 llvm_unreachable("invalid elt size!");
4245 }
4246 }
4247 return std::make_pair(Opc, SubregIdx);
4248}
4249
4250MachineInstr *AArch64InstructionSelector::emitInstr(
4251 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4252 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4253 const ComplexRendererFns &RenderFns) const {
4254 assert(Opcode && "Expected an opcode?");
4255 assert(!isPreISelGenericOpcode(Opcode) &&
4256 "Function should only be used to produce selected instructions!");
4257 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4258 if (RenderFns)
4259 for (auto &Fn : *RenderFns)
4260 Fn(MI);
4262 return &*MI;
4263}
4264
4265MachineInstr *AArch64InstructionSelector::emitAddSub(
4266 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4267 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4268 MachineIRBuilder &MIRBuilder) const {
4269 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4270 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4271 auto Ty = MRI.getType(LHS.getReg());
4272 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4273 unsigned Size = Ty.getSizeInBits();
4274 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4275 bool Is32Bit = Size == 32;
4276
4277 // INSTRri form with positive arithmetic immediate.
4278 if (auto Fns = selectArithImmed(RHS))
4279 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4280 MIRBuilder, Fns);
4281
4282 // INSTRri form with negative arithmetic immediate.
4283 if (auto Fns = selectNegArithImmed(RHS))
4284 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4285 MIRBuilder, Fns);
4286
4287 // INSTRrx form.
4288 if (auto Fns = selectArithExtendedRegister(RHS))
4289 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4290 MIRBuilder, Fns);
4291
4292 // INSTRrs form.
4293 if (auto Fns = selectShiftedRegister(RHS))
4294 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4295 MIRBuilder, Fns);
4296 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4297 MIRBuilder);
4298}
4299
4301AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4302 MachineOperand &RHS,
4303 MachineIRBuilder &MIRBuilder) const {
4304 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4305 {{AArch64::ADDXri, AArch64::ADDWri},
4306 {AArch64::ADDXrs, AArch64::ADDWrs},
4307 {AArch64::ADDXrr, AArch64::ADDWrr},
4308 {AArch64::SUBXri, AArch64::SUBWri},
4309 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4310 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4311}
4312
4314AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4315 MachineOperand &RHS,
4316 MachineIRBuilder &MIRBuilder) const {
4317 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4318 {{AArch64::ADDSXri, AArch64::ADDSWri},
4319 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4320 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4321 {AArch64::SUBSXri, AArch64::SUBSWri},
4322 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4323 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4324}
4325
4327AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4328 MachineOperand &RHS,
4329 MachineIRBuilder &MIRBuilder) const {
4330 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4331 {{AArch64::SUBSXri, AArch64::SUBSWri},
4332 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4333 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4334 {AArch64::ADDSXri, AArch64::ADDSWri},
4335 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4336 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4337}
4338
4340AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4341 MachineOperand &RHS,
4342 MachineIRBuilder &MIRBuilder) const {
4343 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4344 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4345 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4346 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4347 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4348}
4349
4351AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4352 MachineOperand &RHS,
4353 MachineIRBuilder &MIRBuilder) const {
4354 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4355 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4356 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4357 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4358 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4359}
4360
4362AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4363 MachineIRBuilder &MIRBuilder) const {
4364 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4365 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4366 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4367 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4368}
4369
4371AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4372 MachineIRBuilder &MIRBuilder) const {
4373 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4374 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4375 LLT Ty = MRI.getType(LHS.getReg());
4376 unsigned RegSize = Ty.getSizeInBits();
4377 bool Is32Bit = (RegSize == 32);
4378 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4379 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4380 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4381 // ANDS needs a logical immediate for its immediate form. Check if we can
4382 // fold one in.
4383 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4384 int64_t Imm = ValAndVReg->Value.getSExtValue();
4385
4387 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4390 return &*TstMI;
4391 }
4392 }
4393
4394 if (auto Fns = selectLogicalShiftedRegister(RHS))
4395 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4396 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4397}
4398
4399MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4400 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4401 MachineIRBuilder &MIRBuilder) const {
4402 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4403 assert(Predicate.isPredicate() && "Expected predicate?");
4404 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4405 LLT CmpTy = MRI.getType(LHS.getReg());
4406 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4407 unsigned Size = CmpTy.getSizeInBits();
4408 (void)Size;
4409 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4410 // Fold the compare into a cmn or tst if possible.
4411 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4412 return FoldCmp;
4413 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4414 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4415}
4416
4417MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4418 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4419 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4420#ifndef NDEBUG
4421 LLT Ty = MRI.getType(Dst);
4422 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4423 "Expected a 32-bit scalar register?");
4424#endif
4425 const Register ZReg = AArch64::WZR;
4426 AArch64CC::CondCode CC1, CC2;
4427 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4428 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4429 if (CC2 == AArch64CC::AL)
4430 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4431 MIRBuilder);
4432 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4433 Register Def1Reg = MRI.createVirtualRegister(RC);
4434 Register Def2Reg = MRI.createVirtualRegister(RC);
4435 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4436 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4437 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4438 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4440 return &*OrMI;
4441}
4442
4443MachineInstr *AArch64InstructionSelector::emitFPCompare(
4444 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4445 std::optional<CmpInst::Predicate> Pred) const {
4446 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4447 LLT Ty = MRI.getType(LHS);
4448 if (Ty.isVector())
4449 return nullptr;
4450 unsigned OpSize = Ty.getSizeInBits();
4451 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4452
4453 // If this is a compare against +0.0, then we don't have
4454 // to explicitly materialize a constant.
4455 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4456 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4457
4458 auto IsEqualityPred = [](CmpInst::Predicate P) {
4459 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4461 };
4462 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4463 // Try commutating the operands.
4464 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4465 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4466 ShouldUseImm = true;
4467 std::swap(LHS, RHS);
4468 }
4469 }
4470 unsigned CmpOpcTbl[2][3] = {
4471 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4472 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4473 unsigned CmpOpc =
4474 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4475
4476 // Partially build the compare. Decide if we need to add a use for the
4477 // third operand based off whether or not we're comparing against 0.0.
4478 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4480 if (!ShouldUseImm)
4481 CmpMI.addUse(RHS);
4483 return &*CmpMI;
4484}
4485
4486MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4487 std::optional<Register> Dst, Register Op1, Register Op2,
4488 MachineIRBuilder &MIRBuilder) const {
4489 // We implement a vector concat by:
4490 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4491 // 2. Insert the upper vector into the destination's upper element
4492 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4493 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4494
4495 const LLT Op1Ty = MRI.getType(Op1);
4496 const LLT Op2Ty = MRI.getType(Op2);
4497
4498 if (Op1Ty != Op2Ty) {
4499 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4500 return nullptr;
4501 }
4502 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4503
4504 if (Op1Ty.getSizeInBits() >= 128) {
4505 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4506 return nullptr;
4507 }
4508
4509 // At the moment we just support 64 bit vector concats.
4510 if (Op1Ty.getSizeInBits() != 64) {
4511 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4512 return nullptr;
4513 }
4514
4515 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4516 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4517 const TargetRegisterClass *DstRC =
4518 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4519
4520 MachineInstr *WidenedOp1 =
4521 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4522 MachineInstr *WidenedOp2 =
4523 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4524 if (!WidenedOp1 || !WidenedOp2) {
4525 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4526 return nullptr;
4527 }
4528
4529 // Now do the insert of the upper element.
4530 unsigned InsertOpc, InsSubRegIdx;
4531 std::tie(InsertOpc, InsSubRegIdx) =
4532 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4533
4534 if (!Dst)
4535 Dst = MRI.createVirtualRegister(DstRC);
4536 auto InsElt =
4537 MIRBuilder
4538 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4539 .addImm(1) /* Lane index */
4540 .addUse(WidenedOp2->getOperand(0).getReg())
4541 .addImm(0);
4543 return &*InsElt;
4544}
4545
4547AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4548 Register Src2, AArch64CC::CondCode Pred,
4549 MachineIRBuilder &MIRBuilder) const {
4550 auto &MRI = *MIRBuilder.getMRI();
4551 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4552 // If we used a register class, then this won't necessarily have an LLT.
4553 // Compute the size based off whether or not we have a class or bank.
4554 unsigned Size;
4555 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4556 Size = TRI.getRegSizeInBits(*RC);
4557 else
4558 Size = MRI.getType(Dst).getSizeInBits();
4559 // Some opcodes use s1.
4560 assert(Size <= 64 && "Expected 64 bits or less only!");
4561 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4562 unsigned Opc = OpcTable[Size == 64];
4563 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4565 return &*CSINC;
4566}
4567
4568MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4569 Register CarryReg) {
4571 unsigned Opcode = I.getOpcode();
4572
4573 // If the instruction is a SUB, we need to negate the carry,
4574 // because borrowing is indicated by carry-flag == 0.
4575 bool NeedsNegatedCarry =
4576 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4577
4578 // If the previous instruction will already produce the correct carry, do not
4579 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4580 // generated during legalization of wide add/sub. This optimization depends on
4581 // these sequences not being interrupted by other instructions.
4582 // We have to select the previous instruction before the carry-using
4583 // instruction is deleted by the calling function, otherwise the previous
4584 // instruction might become dead and would get deleted.
4585 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4586 if (SrcMI == I.getPrevNode()) {
4587 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4588 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4589 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4590 CarrySrcMI->isUnsigned() &&
4591 CarrySrcMI->getCarryOutReg() == CarryReg &&
4592 selectAndRestoreState(*SrcMI))
4593 return nullptr;
4594 }
4595 }
4596
4597 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4598
4599 if (NeedsNegatedCarry) {
4600 // (0 - Carry) sets !C in NZCV when Carry == 1
4601 Register ZReg = AArch64::WZR;
4602 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4603 }
4604
4605 // (Carry - 1) sets !C in NZCV when Carry == 0
4606 auto Fns = select12BitValueWithLeftShift(1);
4607 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4608}
4609
4610bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4612 auto &CarryMI = cast<GAddSubCarryOut>(I);
4613
4614 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4615 // Set NZCV carry according to carry-in VReg
4616 emitCarryIn(I, CarryInMI->getCarryInReg());
4617 }
4618
4619 // Emit the operation and get the correct condition code.
4620 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4621 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4622
4623 Register CarryOutReg = CarryMI.getCarryOutReg();
4624
4625 // Don't convert carry-out to VReg if it is never used
4626 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4627 // Now, put the overflow result in the register given by the first operand
4628 // to the overflow op. CSINC increments the result when the predicate is
4629 // false, so to get the increment when it's true, we need to use the
4630 // inverse. In this case, we want to increment when carry is set.
4631 Register ZReg = AArch64::WZR;
4632 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4633 getInvertedCondCode(OpAndCC.second), MIB);
4634 }
4635
4636 I.eraseFromParent();
4637 return true;
4638}
4639
4640std::pair<MachineInstr *, AArch64CC::CondCode>
4641AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4642 MachineOperand &LHS,
4643 MachineOperand &RHS,
4644 MachineIRBuilder &MIRBuilder) const {
4645 switch (Opcode) {
4646 default:
4647 llvm_unreachable("Unexpected opcode!");
4648 case TargetOpcode::G_SADDO:
4649 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4650 case TargetOpcode::G_UADDO:
4651 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4652 case TargetOpcode::G_SSUBO:
4653 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4654 case TargetOpcode::G_USUBO:
4655 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4656 case TargetOpcode::G_SADDE:
4657 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4658 case TargetOpcode::G_UADDE:
4659 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4660 case TargetOpcode::G_SSUBE:
4661 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4662 case TargetOpcode::G_USUBE:
4663 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4664 }
4665}
4666
4667/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4668/// expressed as a conjunction.
4669/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4670/// changing the conditions on the CMP tests.
4671/// (this means we can call emitConjunctionRec() with
4672/// Negate==true on this sub-tree)
4673/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4674/// cannot do the negation naturally. We are required to
4675/// emit the subtree first in this case.
4676/// \param WillNegate Is true if are called when the result of this
4677/// subexpression must be negated. This happens when the
4678/// outer expression is an OR. We can use this fact to know
4679/// that we have a double negation (or (or ...) ...) that
4680/// can be implemented for free.
4681static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4682 bool WillNegate, MachineRegisterInfo &MRI,
4683 unsigned Depth = 0) {
4684 if (!MRI.hasOneNonDBGUse(Val))
4685 return false;
4686 MachineInstr *ValDef = MRI.getVRegDef(Val);
4687 unsigned Opcode = ValDef->getOpcode();
4688 if (isa<GAnyCmp>(ValDef)) {
4689 CanNegate = true;
4690 MustBeFirst = false;
4691 return true;
4692 }
4693 // Protect against exponential runtime and stack overflow.
4694 if (Depth > 6)
4695 return false;
4696 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4697 bool IsOR = Opcode == TargetOpcode::G_OR;
4698 Register O0 = ValDef->getOperand(1).getReg();
4699 Register O1 = ValDef->getOperand(2).getReg();
4700 bool CanNegateL;
4701 bool MustBeFirstL;
4702 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4703 return false;
4704 bool CanNegateR;
4705 bool MustBeFirstR;
4706 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4707 return false;
4708
4709 if (MustBeFirstL && MustBeFirstR)
4710 return false;
4711
4712 if (IsOR) {
4713 // For an OR expression we need to be able to naturally negate at least
4714 // one side or we cannot do the transformation at all.
4715 if (!CanNegateL && !CanNegateR)
4716 return false;
4717 // If we the result of the OR will be negated and we can naturally negate
4718 // the leaves, then this sub-tree as a whole negates naturally.
4719 CanNegate = WillNegate && CanNegateL && CanNegateR;
4720 // If we cannot naturally negate the whole sub-tree, then this must be
4721 // emitted first.
4722 MustBeFirst = !CanNegate;
4723 } else {
4724 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4725 // We cannot naturally negate an AND operation.
4726 CanNegate = false;
4727 MustBeFirst = MustBeFirstL || MustBeFirstR;
4728 }
4729 return true;
4730 }
4731 return false;
4732}
4733
4734MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4737 MachineIRBuilder &MIB) const {
4738 auto &MRI = *MIB.getMRI();
4739 LLT OpTy = MRI.getType(LHS);
4740 unsigned CCmpOpc;
4741 std::optional<ValueAndVReg> C;
4743 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4745 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4746 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4747 else if (C->Value.ule(31))
4748 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4749 else
4750 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4751 } else {
4752 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4753 OpTy.getSizeInBits() == 64);
4754 switch (OpTy.getSizeInBits()) {
4755 case 16:
4756 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4757 CCmpOpc = AArch64::FCCMPHrr;
4758 break;
4759 case 32:
4760 CCmpOpc = AArch64::FCCMPSrr;
4761 break;
4762 case 64:
4763 CCmpOpc = AArch64::FCCMPDrr;
4764 break;
4765 default:
4766 return nullptr;
4767 }
4768 }
4770 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4771 auto CCmp =
4772 MIB.buildInstr(CCmpOpc, {}, {LHS});
4773 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4774 CCmp.addImm(C->Value.getZExtValue());
4775 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4776 CCmp.addImm(C->Value.abs().getZExtValue());
4777 else
4778 CCmp.addReg(RHS);
4779 CCmp.addImm(NZCV).addImm(Predicate);
4781 return &*CCmp;
4782}
4783
4784MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4785 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4786 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4787 // We're at a tree leaf, produce a conditional comparison operation.
4788 auto &MRI = *MIB.getMRI();
4789 MachineInstr *ValDef = MRI.getVRegDef(Val);
4790 unsigned Opcode = ValDef->getOpcode();
4791 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4792 Register LHS = Cmp->getLHSReg();
4793 Register RHS = Cmp->getRHSReg();
4794 CmpInst::Predicate CC = Cmp->getCond();
4795 if (Negate)
4797 if (isa<GICmp>(Cmp)) {
4799 } else {
4800 // Handle special FP cases.
4801 AArch64CC::CondCode ExtraCC;
4802 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4803 // Some floating point conditions can't be tested with a single condition
4804 // code. Construct an additional comparison in this case.
4805 if (ExtraCC != AArch64CC::AL) {
4806 MachineInstr *ExtraCmp;
4807 if (!CCOp)
4808 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4809 else
4810 ExtraCmp =
4811 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4812 CCOp = ExtraCmp->getOperand(0).getReg();
4813 Predicate = ExtraCC;
4814 }
4815 }
4816
4817 // Produce a normal comparison if we are first in the chain
4818 if (!CCOp) {
4819 auto Dst = MRI.cloneVirtualRegister(LHS);
4820 if (isa<GICmp>(Cmp))
4821 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4822 return emitFPCompare(Cmp->getOperand(2).getReg(),
4823 Cmp->getOperand(3).getReg(), MIB);
4824 }
4825 // Otherwise produce a ccmp.
4826 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4827 }
4828 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4829
4830 bool IsOR = Opcode == TargetOpcode::G_OR;
4831
4832 Register LHS = ValDef->getOperand(1).getReg();
4833 bool CanNegateL;
4834 bool MustBeFirstL;
4835 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4836 assert(ValidL && "Valid conjunction/disjunction tree");
4837 (void)ValidL;
4838
4839 Register RHS = ValDef->getOperand(2).getReg();
4840 bool CanNegateR;
4841 bool MustBeFirstR;
4842 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4843 assert(ValidR && "Valid conjunction/disjunction tree");
4844 (void)ValidR;
4845
4846 // Swap sub-tree that must come first to the right side.
4847 if (MustBeFirstL) {
4848 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4849 std::swap(LHS, RHS);
4850 std::swap(CanNegateL, CanNegateR);
4851 std::swap(MustBeFirstL, MustBeFirstR);
4852 }
4853
4854 bool NegateR;
4855 bool NegateAfterR;
4856 bool NegateL;
4857 bool NegateAfterAll;
4858 if (Opcode == TargetOpcode::G_OR) {
4859 // Swap the sub-tree that we can negate naturally to the left.
4860 if (!CanNegateL) {
4861 assert(CanNegateR && "at least one side must be negatable");
4862 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4863 assert(!Negate);
4864 std::swap(LHS, RHS);
4865 NegateR = false;
4866 NegateAfterR = true;
4867 } else {
4868 // Negate the left sub-tree if possible, otherwise negate the result.
4869 NegateR = CanNegateR;
4870 NegateAfterR = !CanNegateR;
4871 }
4872 NegateL = true;
4873 NegateAfterAll = !Negate;
4874 } else {
4875 assert(Opcode == TargetOpcode::G_AND &&
4876 "Valid conjunction/disjunction tree");
4877 assert(!Negate && "Valid conjunction/disjunction tree");
4878
4879 NegateL = false;
4880 NegateR = false;
4881 NegateAfterR = false;
4882 NegateAfterAll = false;
4883 }
4884
4885 // Emit sub-trees.
4886 AArch64CC::CondCode RHSCC;
4887 MachineInstr *CmpR =
4888 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4889 if (NegateAfterR)
4890 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4892 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4893 if (NegateAfterAll)
4894 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4895 return CmpL;
4896}
4897
4898MachineInstr *AArch64InstructionSelector::emitConjunction(
4899 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4900 bool DummyCanNegate;
4901 bool DummyMustBeFirst;
4902 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4903 *MIB.getMRI()))
4904 return nullptr;
4905 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4906}
4907
4908bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4909 MachineInstr &CondMI) {
4910 AArch64CC::CondCode AArch64CC;
4911 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4912 if (!ConjMI)
4913 return false;
4914
4915 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4916 SelI.eraseFromParent();
4917 return true;
4918}
4919
4920bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4921 MachineRegisterInfo &MRI = *MIB.getMRI();
4922 // We want to recognize this pattern:
4923 //
4924 // $z = G_FCMP pred, $x, $y
4925 // ...
4926 // $w = G_SELECT $z, $a, $b
4927 //
4928 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4929 // some copies/truncs in between.)
4930 //
4931 // If we see this, then we can emit something like this:
4932 //
4933 // fcmp $x, $y
4934 // fcsel $w, $a, $b, pred
4935 //
4936 // Rather than emitting both of the rather long sequences in the standard
4937 // G_FCMP/G_SELECT select methods.
4938
4939 // First, check if the condition is defined by a compare.
4940 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4941
4942 // We can only fold if all of the defs have one use.
4943 Register CondDefReg = CondDef->getOperand(0).getReg();
4944 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4945 // Unless it's another select.
4946 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4947 if (CondDef == &UI)
4948 continue;
4949 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4950 return false;
4951 }
4952 }
4953
4954 // Is the condition defined by a compare?
4955 unsigned CondOpc = CondDef->getOpcode();
4956 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4957 if (tryOptSelectConjunction(I, *CondDef))
4958 return true;
4959 return false;
4960 }
4961
4963 if (CondOpc == TargetOpcode::G_ICMP) {
4964 auto Pred =
4965 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4967 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4968 CondDef->getOperand(1), MIB);
4969 } else {
4970 // Get the condition code for the select.
4971 auto Pred =
4972 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4973 AArch64CC::CondCode CondCode2;
4974 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4975
4976 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4977 // instructions to emit the comparison.
4978 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4979 // unnecessary.
4980 if (CondCode2 != AArch64CC::AL)
4981 return false;
4982
4983 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4984 CondDef->getOperand(3).getReg(), MIB)) {
4985 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4986 return false;
4987 }
4988 }
4989
4990 // Emit the select.
4991 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4992 I.getOperand(3).getReg(), CondCode, MIB);
4993 I.eraseFromParent();
4994 return true;
4995}
4996
4997MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4998 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4999 MachineIRBuilder &MIRBuilder) const {
5000 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5001 "Unexpected MachineOperand");
5002 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5003 // We want to find this sort of thing:
5004 // x = G_SUB 0, y
5005 // G_ICMP z, x
5006 //
5007 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5008 // e.g:
5009 //
5010 // cmn z, y
5011
5012 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5013 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5014 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5015 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5016 // Given this:
5017 //
5018 // x = G_SUB 0, y
5019 // G_ICMP x, z
5020 //
5021 // Produce this:
5022 //
5023 // cmn y, z
5024 if (isCMN(LHSDef, P, MRI))
5025 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5026
5027 // Same idea here, but with the RHS of the compare instead:
5028 //
5029 // Given this:
5030 //
5031 // x = G_SUB 0, y
5032 // G_ICMP z, x
5033 //
5034 // Produce this:
5035 //
5036 // cmn z, y
5037 if (isCMN(RHSDef, P, MRI))
5038 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5039
5040 // Given this:
5041 //
5042 // z = G_AND x, y
5043 // G_ICMP z, 0
5044 //
5045 // Produce this if the compare is signed:
5046 //
5047 // tst x, y
5048 if (!CmpInst::isUnsigned(P) && LHSDef &&
5049 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5050 // Make sure that the RHS is 0.
5051 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5052 if (!ValAndVReg || ValAndVReg->Value != 0)
5053 return nullptr;
5054
5055 return emitTST(LHSDef->getOperand(1),
5056 LHSDef->getOperand(2), MIRBuilder);
5057 }
5058
5059 return nullptr;
5060}
5061
5062bool AArch64InstructionSelector::selectShuffleVector(
5064 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5065 Register Src1Reg = I.getOperand(1).getReg();
5066 const LLT Src1Ty = MRI.getType(Src1Reg);
5067 Register Src2Reg = I.getOperand(2).getReg();
5068 const LLT Src2Ty = MRI.getType(Src2Reg);
5069 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5070
5071 MachineBasicBlock &MBB = *I.getParent();
5072 MachineFunction &MF = *MBB.getParent();
5073 LLVMContext &Ctx = MF.getFunction().getContext();
5074
5075 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5076 // it's originated from a <1 x T> type. Those should have been lowered into
5077 // G_BUILD_VECTOR earlier.
5078 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5079 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5080 return false;
5081 }
5082
5083 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5084
5086 for (int Val : Mask) {
5087 // For now, any undef indexes we'll just assume to be 0. This should be
5088 // optimized in future, e.g. to select DUP etc.
5089 Val = Val < 0 ? 0 : Val;
5090 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5091 unsigned Offset = Byte + Val * BytesPerElt;
5092 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5093 }
5094 }
5095
5096 // Use a constant pool to load the index vector for TBL.
5097 Constant *CPVal = ConstantVector::get(CstIdxs);
5098 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5099 if (!IndexLoad) {
5100 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5101 return false;
5102 }
5103
5104 if (DstTy.getSizeInBits() != 128) {
5105 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5106 // This case can be done with TBL1.
5108 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5109 if (!Concat) {
5110 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5111 return false;
5112 }
5113
5114 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5115 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5116 IndexLoad->getOperand(0).getReg(), MIB);
5117
5118 auto TBL1 = MIB.buildInstr(
5119 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5120 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5122
5123 auto Copy =
5124 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5125 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5126 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5127 I.eraseFromParent();
5128 return true;
5129 }
5130
5131 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5132 // Q registers for regalloc.
5133 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5134 auto RegSeq = createQTuple(Regs, MIB);
5135 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5136 {RegSeq, IndexLoad->getOperand(0)});
5138 I.eraseFromParent();
5139 return true;
5140}
5141
5142MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5143 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5144 unsigned LaneIdx, const RegisterBank &RB,
5145 MachineIRBuilder &MIRBuilder) const {
5146 MachineInstr *InsElt = nullptr;
5147 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5148 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5149
5150 // Create a register to define with the insert if one wasn't passed in.
5151 if (!DstReg)
5152 DstReg = MRI.createVirtualRegister(DstRC);
5153
5154 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5155 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5156
5157 if (RB.getID() == AArch64::FPRRegBankID) {
5158 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5159 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5160 .addImm(LaneIdx)
5161 .addUse(InsSub->getOperand(0).getReg())
5162 .addImm(0);
5163 } else {
5164 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5165 .addImm(LaneIdx)
5166 .addUse(EltReg);
5167 }
5168
5170 return InsElt;
5171}
5172
5173bool AArch64InstructionSelector::selectUSMovFromExtend(
5175 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5176 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5177 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5178 return false;
5179 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5180 const Register DefReg = MI.getOperand(0).getReg();
5181 const LLT DstTy = MRI.getType(DefReg);
5182 unsigned DstSize = DstTy.getSizeInBits();
5183
5184 if (DstSize != 32 && DstSize != 64)
5185 return false;
5186
5187 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5188 MI.getOperand(1).getReg(), MRI);
5189 int64_t Lane;
5190 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5191 return false;
5192 Register Src0 = Extract->getOperand(1).getReg();
5193
5194 const LLT &VecTy = MRI.getType(Src0);
5195
5196 if (VecTy.getSizeInBits() != 128) {
5197 const MachineInstr *ScalarToVector = emitScalarToVector(
5198 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5199 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5200 Src0 = ScalarToVector->getOperand(0).getReg();
5201 }
5202
5203 unsigned Opcode;
5204 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5205 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5206 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5207 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5208 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5209 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5210 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5211 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5212 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5213 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5214 else
5215 llvm_unreachable("Unexpected type combo for S/UMov!");
5216
5217 // We may need to generate one of these, depending on the type and sign of the
5218 // input:
5219 // DstReg = SMOV Src0, Lane;
5220 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5221 MachineInstr *ExtI = nullptr;
5222 if (DstSize == 64 && !IsSigned) {
5223 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5224 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5225 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5226 .addImm(0)
5227 .addUse(NewReg)
5228 .addImm(AArch64::sub_32);
5229 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5230 } else
5231 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5232
5234 MI.eraseFromParent();
5235 return true;
5236}
5237
5238MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5239 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5240 unsigned int Op;
5241 if (DstSize == 128) {
5242 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5243 return nullptr;
5244 Op = AArch64::MOVIv16b_ns;
5245 } else {
5246 Op = AArch64::MOVIv8b_ns;
5247 }
5248
5249 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5250
5253 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5255 return &*Mov;
5256 }
5257 return nullptr;
5258}
5259
5260MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5261 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5262 bool Inv) {
5263
5264 unsigned int Op;
5265 if (DstSize == 128) {
5266 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5267 return nullptr;
5268 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5269 } else {
5270 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5271 }
5272
5273 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5274 uint64_t Shift;
5275
5278 Shift = 0;
5279 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5281 Shift = 8;
5282 } else
5283 return nullptr;
5284
5285 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5287 return &*Mov;
5288}
5289
5290MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5291 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5292 bool Inv) {
5293
5294 unsigned int Op;
5295 if (DstSize == 128) {
5296 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5297 return nullptr;
5298 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5299 } else {
5300 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5301 }
5302
5303 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5304 uint64_t Shift;
5305
5308 Shift = 0;
5309 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5311 Shift = 8;
5312 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5314 Shift = 16;
5315 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5317 Shift = 24;
5318 } else
5319 return nullptr;
5320
5321 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5323 return &*Mov;
5324}
5325
5326MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5327 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5328
5329 unsigned int Op;
5330 if (DstSize == 128) {
5331 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5332 return nullptr;
5333 Op = AArch64::MOVIv2d_ns;
5334 } else {
5335 Op = AArch64::MOVID;
5336 }
5337
5338 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5341 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5343 return &*Mov;
5344 }
5345 return nullptr;
5346}
5347
5348MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5349 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5350 bool Inv) {
5351
5352 unsigned int Op;
5353 if (DstSize == 128) {
5354 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5355 return nullptr;
5356 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5357 } else {
5358 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5359 }
5360
5361 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5362 uint64_t Shift;
5363
5366 Shift = 264;
5367 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5369 Shift = 272;
5370 } else
5371 return nullptr;
5372
5373 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5375 return &*Mov;
5376}
5377
5378MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5379 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5380
5381 unsigned int Op;
5382 bool IsWide = false;
5383 if (DstSize == 128) {
5384 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5385 return nullptr;
5386 Op = AArch64::FMOVv4f32_ns;
5387 IsWide = true;
5388 } else {
5389 Op = AArch64::FMOVv2f32_ns;
5390 }
5391
5392 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5393
5396 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5398 Op = AArch64::FMOVv2f64_ns;
5399 } else
5400 return nullptr;
5401
5402 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5404 return &*Mov;
5405}
5406
5407bool AArch64InstructionSelector::selectIndexedExtLoad(
5409 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5410 Register Dst = ExtLd.getDstReg();
5411 Register WriteBack = ExtLd.getWritebackReg();
5412 Register Base = ExtLd.getBaseReg();
5413 Register Offset = ExtLd.getOffsetReg();
5414 LLT Ty = MRI.getType(Dst);
5415 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5416 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5417 bool IsPre = ExtLd.isPre();
5418 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5419 bool InsertIntoXReg = false;
5420 bool IsDst64 = Ty.getSizeInBits() == 64;
5421
5422 unsigned Opc = 0;
5423 LLT NewLdDstTy;
5424 LLT s32 = LLT::scalar(32);
5425 LLT s64 = LLT::scalar(64);
5426
5427 if (MemSizeBits == 8) {
5428 if (IsSExt) {
5429 if (IsDst64)
5430 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5431 else
5432 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5433 NewLdDstTy = IsDst64 ? s64 : s32;
5434 } else {
5435 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5436 InsertIntoXReg = IsDst64;
5437 NewLdDstTy = s32;
5438 }
5439 } else if (MemSizeBits == 16) {
5440 if (IsSExt) {
5441 if (IsDst64)
5442 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5443 else
5444 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5445 NewLdDstTy = IsDst64 ? s64 : s32;
5446 } else {
5447 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5448 InsertIntoXReg = IsDst64;
5449 NewLdDstTy = s32;
5450 }
5451 } else if (MemSizeBits == 32) {
5452 if (IsSExt) {
5453 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5454 NewLdDstTy = s64;
5455 } else {
5456 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5457 InsertIntoXReg = IsDst64;
5458 NewLdDstTy = s32;
5459 }
5460 } else {
5461 llvm_unreachable("Unexpected size for indexed load");
5462 }
5463
5464 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5465 return false; // We should be on gpr.
5466
5467 auto Cst = getIConstantVRegVal(Offset, MRI);
5468 if (!Cst)
5469 return false; // Shouldn't happen, but just in case.
5470
5471 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5472 .addImm(Cst->getSExtValue());
5473 LdMI.cloneMemRefs(ExtLd);
5475 // Make sure to select the load with the MemTy as the dest type, and then
5476 // insert into X reg if needed.
5477 if (InsertIntoXReg) {
5478 // Generate a SUBREG_TO_REG.
5479 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5480 .addImm(0)
5481 .addUse(LdMI.getReg(1))
5482 .addImm(AArch64::sub_32);
5483 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5484 MRI);
5485 } else {
5486 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5487 selectCopy(*Copy, TII, MRI, TRI, RBI);
5488 }
5489 MI.eraseFromParent();
5490
5491 return true;
5492}
5493
5494bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5496 auto &Ld = cast<GIndexedLoad>(MI);
5497 Register Dst = Ld.getDstReg();
5498 Register WriteBack = Ld.getWritebackReg();
5499 Register Base = Ld.getBaseReg();
5500 Register Offset = Ld.getOffsetReg();
5501 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5502 "Unexpected type for indexed load");
5503 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5504
5505 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5506 return selectIndexedExtLoad(MI, MRI);
5507
5508 unsigned Opc = 0;
5509 if (Ld.isPre()) {
5510 static constexpr unsigned GPROpcodes[] = {
5511 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5512 AArch64::LDRXpre};
5513 static constexpr unsigned FPROpcodes[] = {
5514 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5515 AArch64::LDRQpre};
5516 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5517 Opc = FPROpcodes[Log2_32(MemSize)];
5518 else
5519 Opc = GPROpcodes[Log2_32(MemSize)];
5520 } else {
5521 static constexpr unsigned GPROpcodes[] = {
5522 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5523 AArch64::LDRXpost};
5524 static constexpr unsigned FPROpcodes[] = {
5525 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5526 AArch64::LDRDpost, AArch64::LDRQpost};
5527 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5528 Opc = FPROpcodes[Log2_32(MemSize)];
5529 else
5530 Opc = GPROpcodes[Log2_32(MemSize)];
5531 }
5532 auto Cst = getIConstantVRegVal(Offset, MRI);
5533 if (!Cst)
5534 return false; // Shouldn't happen, but just in case.
5535 auto LdMI =
5536 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5537 LdMI.cloneMemRefs(Ld);
5539 MI.eraseFromParent();
5540 return true;
5541}
5542
5543bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5545 Register Dst = I.getWritebackReg();
5546 Register Val = I.getValueReg();
5547 Register Base = I.getBaseReg();
5548 Register Offset = I.getOffsetReg();
5549 LLT ValTy = MRI.getType(Val);
5550 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5551
5552 unsigned Opc = 0;
5553 if (I.isPre()) {
5554 static constexpr unsigned GPROpcodes[] = {
5555 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5556 AArch64::STRXpre};
5557 static constexpr unsigned FPROpcodes[] = {
5558 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5559 AArch64::STRQpre};
5560
5561 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5562 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5563 else
5564 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5565 } else {
5566 static constexpr unsigned GPROpcodes[] = {
5567 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5568 AArch64::STRXpost};
5569 static constexpr unsigned FPROpcodes[] = {
5570 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5571 AArch64::STRDpost, AArch64::STRQpost};
5572
5573 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5574 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5575 else
5576 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5577 }
5578
5579 auto Cst = getIConstantVRegVal(Offset, MRI);
5580 if (!Cst)
5581 return false; // Shouldn't happen, but just in case.
5582 auto Str =
5583 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5584 Str.cloneMemRefs(I);
5586 I.eraseFromParent();
5587 return true;
5588}
5589
5591AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5592 MachineIRBuilder &MIRBuilder,
5594 LLT DstTy = MRI.getType(Dst);
5595 unsigned DstSize = DstTy.getSizeInBits();
5596 if (CV->isNullValue()) {
5597 if (DstSize == 128) {
5598 auto Mov =
5599 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5601 return &*Mov;
5602 }
5603
5604 if (DstSize == 64) {
5605 auto Mov =
5606 MIRBuilder
5607 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5608 .addImm(0);
5609 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5610 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5611 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5612 return &*Copy;
5613 }
5614 }
5615
5616 if (CV->getSplatValue()) {
5617 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5618 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5619 MachineInstr *NewOp;
5620 bool Inv = false;
5621 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5622 (NewOp =
5623 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5624 (NewOp =
5625 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5626 (NewOp =
5627 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5628 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5629 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5630 return NewOp;
5631
5632 DefBits = ~DefBits;
5633 Inv = true;
5634 if ((NewOp =
5635 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5636 (NewOp =
5637 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5638 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5639 return NewOp;
5640 return nullptr;
5641 };
5642
5643 if (auto *NewOp = TryMOVIWithBits(DefBits))
5644 return NewOp;
5645
5646 // See if a fneg of the constant can be materialized with a MOVI, etc
5647 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5648 unsigned NegOpc) -> MachineInstr * {
5649 // FNegate each sub-element of the constant
5650 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5651 APInt NegBits(DstSize, 0);
5652 unsigned NumElts = DstSize / NumBits;
5653 for (unsigned i = 0; i < NumElts; i++)
5654 NegBits |= Neg << (NumBits * i);
5655 NegBits = DefBits ^ NegBits;
5656
5657 // Try to create the new constants with MOVI, and if so generate a fneg
5658 // for it.
5659 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5660 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5661 NewOp->getOperand(0).setReg(NewDst);
5662 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5663 }
5664 return nullptr;
5665 };
5666 MachineInstr *R;
5667 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5668 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5669 (STI.hasFullFP16() &&
5670 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5671 return R;
5672 }
5673
5674 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5675 if (!CPLoad) {
5676 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5677 return nullptr;
5678 }
5679
5680 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5681 RBI.constrainGenericRegister(
5682 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5683 return &*Copy;
5684}
5685
5686bool AArch64InstructionSelector::tryOptConstantBuildVec(
5688 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5689 unsigned DstSize = DstTy.getSizeInBits();
5690 assert(DstSize <= 128 && "Unexpected build_vec type!");
5691 if (DstSize < 32)
5692 return false;
5693 // Check if we're building a constant vector, in which case we want to
5694 // generate a constant pool load instead of a vector insert sequence.
5696 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5697 // Try to find G_CONSTANT or G_FCONSTANT
5698 auto *OpMI =
5699 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5700 if (OpMI)
5701 Csts.emplace_back(
5702 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5703 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5704 I.getOperand(Idx).getReg(), MRI)))
5705 Csts.emplace_back(
5706 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5707 else
5708 return false;
5709 }
5710 Constant *CV = ConstantVector::get(Csts);
5711 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5712 return false;
5713 I.eraseFromParent();
5714 return true;
5715}
5716
5717bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5719 // Given:
5720 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5721 //
5722 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5723 Register Dst = I.getOperand(0).getReg();
5724 Register EltReg = I.getOperand(1).getReg();
5725 LLT EltTy = MRI.getType(EltReg);
5726 // If the index isn't on the same bank as its elements, then this can't be a
5727 // SUBREG_TO_REG.
5728 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5729 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5730 if (EltRB != DstRB)
5731 return false;
5732 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5733 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5734 }))
5735 return false;
5736 unsigned SubReg;
5737 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5738 if (!EltRC)
5739 return false;
5740 const TargetRegisterClass *DstRC =
5741 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5742 if (!DstRC)
5743 return false;
5744 if (!getSubRegForClass(EltRC, TRI, SubReg))
5745 return false;
5746 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5747 .addImm(0)
5748 .addUse(EltReg)
5749 .addImm(SubReg);
5750 I.eraseFromParent();
5751 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5752 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5753}
5754
5755bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5757 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5758 // Until we port more of the optimized selections, for now just use a vector
5759 // insert sequence.
5760 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5761 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5762 unsigned EltSize = EltTy.getSizeInBits();
5763
5764 if (tryOptConstantBuildVec(I, DstTy, MRI))
5765 return true;
5766 if (tryOptBuildVecToSubregToReg(I, MRI))
5767 return true;
5768
5769 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5770 return false; // Don't support all element types yet.
5771 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5772
5773 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5774 MachineInstr *ScalarToVec =
5775 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5776 I.getOperand(1).getReg(), MIB);
5777 if (!ScalarToVec)
5778 return false;
5779
5780 Register DstVec = ScalarToVec->getOperand(0).getReg();
5781 unsigned DstSize = DstTy.getSizeInBits();
5782
5783 // Keep track of the last MI we inserted. Later on, we might be able to save
5784 // a copy using it.
5785 MachineInstr *PrevMI = ScalarToVec;
5786 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5787 // Note that if we don't do a subregister copy, we can end up making an
5788 // extra register.
5789 Register OpReg = I.getOperand(i).getReg();
5790 // Do not emit inserts for undefs
5791 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5792 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5793 DstVec = PrevMI->getOperand(0).getReg();
5794 }
5795 }
5796
5797 // If DstTy's size in bits is less than 128, then emit a subregister copy
5798 // from DstVec to the last register we've defined.
5799 if (DstSize < 128) {
5800 // Force this to be FPR using the destination vector.
5801 const TargetRegisterClass *RC =
5802 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5803 if (!RC)
5804 return false;
5805 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5806 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5807 return false;
5808 }
5809
5810 unsigned SubReg = 0;
5811 if (!getSubRegForClass(RC, TRI, SubReg))
5812 return false;
5813 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5814 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5815 << "\n");
5816 return false;
5817 }
5818
5819 Register Reg = MRI.createVirtualRegister(RC);
5820 Register DstReg = I.getOperand(0).getReg();
5821
5822 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5823 MachineOperand &RegOp = I.getOperand(1);
5824 RegOp.setReg(Reg);
5825 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5826 } else {
5827 // We either have a vector with all elements (except the first one) undef or
5828 // at least one non-undef non-first element. In the first case, we need to
5829 // constrain the output register ourselves as we may have generated an
5830 // INSERT_SUBREG operation which is a generic operation for which the
5831 // output regclass cannot be automatically chosen.
5832 //
5833 // In the second case, there is no need to do this as it may generate an
5834 // instruction like INSvi32gpr where the regclass can be automatically
5835 // chosen.
5836 //
5837 // Also, we save a copy by re-using the destination register on the final
5838 // insert.
5839 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5841
5842 Register DstReg = PrevMI->getOperand(0).getReg();
5843 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5844 const TargetRegisterClass *RC =
5845 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5846 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5847 }
5848 }
5849
5850 I.eraseFromParent();
5851 return true;
5852}
5853
5854bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5855 unsigned NumVecs,
5856 MachineInstr &I) {
5857 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5858 assert(Opc && "Expected an opcode?");
5859 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5860 auto &MRI = *MIB.getMRI();
5861 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5862 unsigned Size = Ty.getSizeInBits();
5863 assert((Size == 64 || Size == 128) &&
5864 "Destination must be 64 bits or 128 bits?");
5865 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5866 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5867 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5868 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5869 Load.cloneMemRefs(I);
5871 Register SelectedLoadDst = Load->getOperand(0).getReg();
5872 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5873 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5874 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5875 // Emit the subreg copies and immediately select them.
5876 // FIXME: We should refactor our copy code into an emitCopy helper and
5877 // clean up uses of this pattern elsewhere in the selector.
5878 selectCopy(*Vec, TII, MRI, TRI, RBI);
5879 }
5880 return true;
5881}
5882
5883bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5884 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5885 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5886 assert(Opc && "Expected an opcode?");
5887 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5888 auto &MRI = *MIB.getMRI();
5889 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5890 bool Narrow = Ty.getSizeInBits() == 64;
5891
5892 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5893 SmallVector<Register, 4> Regs(NumVecs);
5894 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5895 [](auto MO) { return MO.getReg(); });
5896
5897 if (Narrow) {
5898 transform(Regs, Regs.begin(), [this](Register Reg) {
5899 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5900 ->getOperand(0)
5901 .getReg();
5902 });
5903 Ty = Ty.multiplyElements(2);
5904 }
5905
5906 Register Tuple = createQTuple(Regs, MIB);
5907 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5908 if (!LaneNo)
5909 return false;
5910
5911 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5912 auto Load = MIB.buildInstr(Opc, {Ty}, {})
5913 .addReg(Tuple)
5914 .addImm(LaneNo->getZExtValue())
5915 .addReg(Ptr);
5916 Load.cloneMemRefs(I);
5918 Register SelectedLoadDst = Load->getOperand(0).getReg();
5919 unsigned SubReg = AArch64::qsub0;
5920 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5921 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
5922 {Narrow ? DstOp(&AArch64::FPR128RegClass)
5923 : DstOp(I.getOperand(Idx).getReg())},
5924 {})
5925 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5926 Register WideReg = Vec.getReg(0);
5927 // Emit the subreg copies and immediately select them.
5928 selectCopy(*Vec, TII, MRI, TRI, RBI);
5929 if (Narrow &&
5930 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
5931 return false;
5932 }
5933 return true;
5934}
5935
5936void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
5937 unsigned NumVecs,
5938 unsigned Opc) {
5939 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5940 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5941 Register Ptr = I.getOperand(1 + NumVecs).getReg();
5942
5943 SmallVector<Register, 2> Regs(NumVecs);
5944 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5945 Regs.begin(), [](auto MO) { return MO.getReg(); });
5946
5947 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5948 : createDTuple(Regs, MIB);
5949 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5950 Store.cloneMemRefs(I);
5952}
5953
5954bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5955 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5956 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5957 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5958 bool Narrow = Ty.getSizeInBits() == 64;
5959
5960 SmallVector<Register, 2> Regs(NumVecs);
5961 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5962 Regs.begin(), [](auto MO) { return MO.getReg(); });
5963
5964 if (Narrow)
5965 transform(Regs, Regs.begin(), [this](Register Reg) {
5966 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5967 ->getOperand(0)
5968 .getReg();
5969 });
5970
5971 Register Tuple = createQTuple(Regs, MIB);
5972
5973 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
5974 if (!LaneNo)
5975 return false;
5976 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
5977 auto Store = MIB.buildInstr(Opc, {}, {})
5978 .addReg(Tuple)
5979 .addImm(LaneNo->getZExtValue())
5980 .addReg(Ptr);
5981 Store.cloneMemRefs(I);
5983 return true;
5984}
5985
5986bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5988 // Find the intrinsic ID.
5989 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
5990
5991 const LLT S8 = LLT::scalar(8);
5992 const LLT S16 = LLT::scalar(16);
5993 const LLT S32 = LLT::scalar(32);
5994 const LLT S64 = LLT::scalar(64);
5995 const LLT P0 = LLT::pointer(0, 64);
5996 // Select the instruction.
5997 switch (IntrinID) {
5998 default:
5999 return false;
6000 case Intrinsic::aarch64_ldxp:
6001 case Intrinsic::aarch64_ldaxp: {
6002 auto NewI = MIB.buildInstr(
6003 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6004 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6005 {I.getOperand(3)});
6006 NewI.cloneMemRefs(I);
6008 break;
6009 }
6010 case Intrinsic::aarch64_neon_ld1x2: {
6011 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6012 unsigned Opc = 0;
6013 if (Ty == LLT::fixed_vector(8, S8))
6014 Opc = AArch64::LD1Twov8b;
6015 else if (Ty == LLT::fixed_vector(16, S8))
6016 Opc = AArch64::LD1Twov16b;
6017 else if (Ty == LLT::fixed_vector(4, S16))
6018 Opc = AArch64::LD1Twov4h;
6019 else if (Ty == LLT::fixed_vector(8, S16))
6020 Opc = AArch64::LD1Twov8h;
6021 else if (Ty == LLT::fixed_vector(2, S32))
6022 Opc = AArch64::LD1Twov2s;
6023 else if (Ty == LLT::fixed_vector(4, S32))
6024 Opc = AArch64::LD1Twov4s;
6025 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6026 Opc = AArch64::LD1Twov2d;
6027 else if (Ty == S64 || Ty == P0)
6028 Opc = AArch64::LD1Twov1d;
6029 else
6030 llvm_unreachable("Unexpected type for ld1x2!");
6031 selectVectorLoadIntrinsic(Opc, 2, I);
6032 break;
6033 }
6034 case Intrinsic::aarch64_neon_ld1x3: {
6035 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6036 unsigned Opc = 0;
6037 if (Ty == LLT::fixed_vector(8, S8))
6038 Opc = AArch64::LD1Threev8b;
6039 else if (Ty == LLT::fixed_vector(16, S8))
6040 Opc = AArch64::LD1Threev16b;
6041 else if (Ty == LLT::fixed_vector(4, S16))
6042 Opc = AArch64::LD1Threev4h;
6043 else if (Ty == LLT::fixed_vector(8, S16))
6044 Opc = AArch64::LD1Threev8h;
6045 else if (Ty == LLT::fixed_vector(2, S32))
6046 Opc = AArch64::LD1Threev2s;
6047 else if (Ty == LLT::fixed_vector(4, S32))
6048 Opc = AArch64::LD1Threev4s;
6049 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6050 Opc = AArch64::LD1Threev2d;
6051 else if (Ty == S64 || Ty == P0)
6052 Opc = AArch64::LD1Threev1d;
6053 else
6054 llvm_unreachable("Unexpected type for ld1x3!");
6055 selectVectorLoadIntrinsic(Opc, 3, I);
6056 break;
6057 }
6058 case Intrinsic::aarch64_neon_ld1x4: {
6059 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6060 unsigned Opc = 0;
6061 if (Ty == LLT::fixed_vector(8, S8))
6062 Opc = AArch64::LD1Fourv8b;
6063 else if (Ty == LLT::fixed_vector(16, S8))
6064 Opc = AArch64::LD1Fourv16b;
6065 else if (Ty == LLT::fixed_vector(4, S16))
6066 Opc = AArch64::LD1Fourv4h;
6067 else if (Ty == LLT::fixed_vector(8, S16))
6068 Opc = AArch64::LD1Fourv8h;
6069 else if (Ty == LLT::fixed_vector(2, S32))
6070 Opc = AArch64::LD1Fourv2s;
6071 else if (Ty == LLT::fixed_vector(4, S32))
6072 Opc = AArch64::LD1Fourv4s;
6073 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6074 Opc = AArch64::LD1Fourv2d;
6075 else if (Ty == S64 || Ty == P0)
6076 Opc = AArch64::LD1Fourv1d;
6077 else
6078 llvm_unreachable("Unexpected type for ld1x4!");
6079 selectVectorLoadIntrinsic(Opc, 4, I);
6080 break;
6081 }
6082 case Intrinsic::aarch64_neon_ld2: {
6083 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6084 unsigned Opc = 0;
6085 if (Ty == LLT::fixed_vector(8, S8))
6086 Opc = AArch64::LD2Twov8b;
6087 else if (Ty == LLT::fixed_vector(16, S8))
6088 Opc = AArch64::LD2Twov16b;
6089 else if (Ty == LLT::fixed_vector(4, S16))
6090 Opc = AArch64::LD2Twov4h;
6091 else if (Ty == LLT::fixed_vector(8, S16))
6092 Opc = AArch64::LD2Twov8h;
6093 else if (Ty == LLT::fixed_vector(2, S32))
6094 Opc = AArch64::LD2Twov2s;
6095 else if (Ty == LLT::fixed_vector(4, S32))
6096 Opc = AArch64::LD2Twov4s;
6097 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6098 Opc = AArch64::LD2Twov2d;
6099 else if (Ty == S64 || Ty == P0)
6100 Opc = AArch64::LD1Twov1d;
6101 else
6102 llvm_unreachable("Unexpected type for ld2!");
6103 selectVectorLoadIntrinsic(Opc, 2, I);
6104 break;
6105 }
6106 case Intrinsic::aarch64_neon_ld2lane: {
6107 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6108 unsigned Opc;
6109 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6110 Opc = AArch64::LD2i8;
6111 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6112 Opc = AArch64::LD2i16;
6113 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6114 Opc = AArch64::LD2i32;
6115 else if (Ty == LLT::fixed_vector(2, S64) ||
6116 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6117 Opc = AArch64::LD2i64;
6118 else
6119 llvm_unreachable("Unexpected type for st2lane!");
6120 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6121 return false;
6122 break;
6123 }
6124 case Intrinsic::aarch64_neon_ld2r: {
6125 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6126 unsigned Opc = 0;
6127 if (Ty == LLT::fixed_vector(8, S8))
6128 Opc = AArch64::LD2Rv8b;
6129 else if (Ty == LLT::fixed_vector(16, S8))
6130 Opc = AArch64::LD2Rv16b;
6131 else if (Ty == LLT::fixed_vector(4, S16))
6132 Opc = AArch64::LD2Rv4h;
6133 else if (Ty == LLT::fixed_vector(8, S16))
6134 Opc = AArch64::LD2Rv8h;
6135 else if (Ty == LLT::fixed_vector(2, S32))
6136 Opc = AArch64::LD2Rv2s;
6137 else if (Ty == LLT::fixed_vector(4, S32))
6138 Opc = AArch64::LD2Rv4s;
6139 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6140 Opc = AArch64::LD2Rv2d;
6141 else if (Ty == S64 || Ty == P0)
6142 Opc = AArch64::LD2Rv1d;
6143 else
6144 llvm_unreachable("Unexpected type for ld2r!");
6145 selectVectorLoadIntrinsic(Opc, 2, I);
6146 break;
6147 }
6148 case Intrinsic::aarch64_neon_ld3: {
6149 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6150 unsigned Opc = 0;
6151 if (Ty == LLT::fixed_vector(8, S8))
6152 Opc = AArch64::LD3Threev8b;
6153 else if (Ty == LLT::fixed_vector(16, S8))
6154 Opc = AArch64::LD3Threev16b;
6155 else if (Ty == LLT::fixed_vector(4, S16))
6156 Opc = AArch64::LD3Threev4h;
6157 else if (Ty == LLT::fixed_vector(8, S16))
6158 Opc = AArch64::LD3Threev8h;
6159 else if (Ty == LLT::fixed_vector(2, S32))
6160 Opc = AArch64::LD3Threev2s;
6161 else if (Ty == LLT::fixed_vector(4, S32))
6162 Opc = AArch64::LD3Threev4s;
6163 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6164 Opc = AArch64::LD3Threev2d;
6165 else if (Ty == S64 || Ty == P0)
6166 Opc = AArch64::LD1Threev1d;
6167 else
6168 llvm_unreachable("Unexpected type for ld3!");
6169 selectVectorLoadIntrinsic(Opc, 3, I);
6170 break;
6171 }
6172 case Intrinsic::aarch64_neon_ld3lane: {
6173 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6174 unsigned Opc;
6175 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6176 Opc = AArch64::LD3i8;
6177 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6178 Opc = AArch64::LD3i16;
6179 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6180 Opc = AArch64::LD3i32;
6181 else if (Ty == LLT::fixed_vector(2, S64) ||
6182 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6183 Opc = AArch64::LD3i64;
6184 else
6185 llvm_unreachable("Unexpected type for st3lane!");
6186 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6187 return false;
6188 break;
6189 }
6190 case Intrinsic::aarch64_neon_ld3r: {
6191 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6192 unsigned Opc = 0;
6193 if (Ty == LLT::fixed_vector(8, S8))
6194 Opc = AArch64::LD3Rv8b;
6195 else if (Ty == LLT::fixed_vector(16, S8))
6196 Opc = AArch64::LD3Rv16b;
6197 else if (Ty == LLT::fixed_vector(4, S16))
6198 Opc = AArch64::LD3Rv4h;
6199 else if (Ty == LLT::fixed_vector(8, S16))
6200 Opc = AArch64::LD3Rv8h;
6201 else if (Ty == LLT::fixed_vector(2, S32))
6202 Opc = AArch64::LD3Rv2s;
6203 else if (Ty == LLT::fixed_vector(4, S32))
6204 Opc = AArch64::LD3Rv4s;
6205 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6206 Opc = AArch64::LD3Rv2d;
6207 else if (Ty == S64 || Ty == P0)
6208 Opc = AArch64::LD3Rv1d;
6209 else
6210 llvm_unreachable("Unexpected type for ld3r!");
6211 selectVectorLoadIntrinsic(Opc, 3, I);
6212 break;
6213 }
6214 case Intrinsic::aarch64_neon_ld4: {
6215 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6216 unsigned Opc = 0;
6217 if (Ty == LLT::fixed_vector(8, S8))
6218 Opc = AArch64::LD4Fourv8b;
6219 else if (Ty == LLT::fixed_vector(16, S8))
6220 Opc = AArch64::LD4Fourv16b;
6221 else if (Ty == LLT::fixed_vector(4, S16))
6222 Opc = AArch64::LD4Fourv4h;
6223 else if (Ty == LLT::fixed_vector(8, S16))
6224 Opc = AArch64::LD4Fourv8h;
6225 else if (Ty == LLT::fixed_vector(2, S32))
6226 Opc = AArch64::LD4Fourv2s;
6227 else if (Ty == LLT::fixed_vector(4, S32))
6228 Opc = AArch64::LD4Fourv4s;
6229 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6230 Opc = AArch64::LD4Fourv2d;
6231 else if (Ty == S64 || Ty == P0)
6232 Opc = AArch64::LD1Fourv1d;
6233 else
6234 llvm_unreachable("Unexpected type for ld4!");
6235 selectVectorLoadIntrinsic(Opc, 4, I);
6236 break;
6237 }
6238 case Intrinsic::aarch64_neon_ld4lane: {
6239 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6240 unsigned Opc;
6241 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6242 Opc = AArch64::LD4i8;
6243 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6244 Opc = AArch64::LD4i16;
6245 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6246 Opc = AArch64::LD4i32;
6247 else if (Ty == LLT::fixed_vector(2, S64) ||
6248 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6249 Opc = AArch64::LD4i64;
6250 else
6251 llvm_unreachable("Unexpected type for st4lane!");
6252 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6253 return false;
6254 break;
6255 }
6256 case Intrinsic::aarch64_neon_ld4r: {
6257 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6258 unsigned Opc = 0;
6259 if (Ty == LLT::fixed_vector(8, S8))
6260 Opc = AArch64::LD4Rv8b;
6261 else if (Ty == LLT::fixed_vector(16, S8))
6262 Opc = AArch64::LD4Rv16b;
6263 else if (Ty == LLT::fixed_vector(4, S16))
6264 Opc = AArch64::LD4Rv4h;
6265 else if (Ty == LLT::fixed_vector(8, S16))
6266 Opc = AArch64::LD4Rv8h;
6267 else if (Ty == LLT::fixed_vector(2, S32))
6268 Opc = AArch64::LD4Rv2s;
6269 else if (Ty == LLT::fixed_vector(4, S32))
6270 Opc = AArch64::LD4Rv4s;
6271 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6272 Opc = AArch64::LD4Rv2d;
6273 else if (Ty == S64 || Ty == P0)
6274 Opc = AArch64::LD4Rv1d;
6275 else
6276 llvm_unreachable("Unexpected type for ld4r!");
6277 selectVectorLoadIntrinsic(Opc, 4, I);
6278 break;
6279 }
6280 case Intrinsic::aarch64_neon_st1x2: {
6281 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6282 unsigned Opc;
6283 if (Ty == LLT::fixed_vector(8, S8))
6284 Opc = AArch64::ST1Twov8b;
6285 else if (Ty == LLT::fixed_vector(16, S8))
6286 Opc = AArch64::ST1Twov16b;
6287 else if (Ty == LLT::fixed_vector(4, S16))
6288 Opc = AArch64::ST1Twov4h;
6289 else if (Ty == LLT::fixed_vector(8, S16))
6290 Opc = AArch64::ST1Twov8h;
6291 else if (Ty == LLT::fixed_vector(2, S32))
6292 Opc = AArch64::ST1Twov2s;
6293 else if (Ty == LLT::fixed_vector(4, S32))
6294 Opc = AArch64::ST1Twov4s;
6295 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6296 Opc = AArch64::ST1Twov2d;
6297 else if (Ty == S64 || Ty == P0)
6298 Opc = AArch64::ST1Twov1d;
6299 else
6300 llvm_unreachable("Unexpected type for st1x2!");
6301 selectVectorStoreIntrinsic(I, 2, Opc);
6302 break;
6303 }
6304 case Intrinsic::aarch64_neon_st1x3: {
6305 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6306 unsigned Opc;
6307 if (Ty == LLT::fixed_vector(8, S8))
6308 Opc = AArch64::ST1Threev8b;
6309 else if (Ty == LLT::fixed_vector(16, S8))
6310 Opc = AArch64::ST1Threev16b;
6311 else if (Ty == LLT::fixed_vector(4, S16))
6312 Opc = AArch64::ST1Threev4h;
6313 else if (Ty == LLT::fixed_vector(8, S16))
6314 Opc = AArch64::ST1Threev8h;
6315 else if (Ty == LLT::fixed_vector(2, S32))
6316 Opc = AArch64::ST1Threev2s;
6317 else if (Ty == LLT::fixed_vector(4, S32))
6318 Opc = AArch64::ST1Threev4s;
6319 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6320 Opc = AArch64::ST1Threev2d;
6321 else if (Ty == S64 || Ty == P0)
6322 Opc = AArch64::ST1Threev1d;
6323 else
6324 llvm_unreachable("Unexpected type for st1x3!");
6325 selectVectorStoreIntrinsic(I, 3, Opc);
6326 break;
6327 }
6328 case Intrinsic::aarch64_neon_st1x4: {
6329 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6330 unsigned Opc;
6331 if (Ty == LLT::fixed_vector(8, S8))
6332 Opc = AArch64::ST1Fourv8b;
6333 else if (Ty == LLT::fixed_vector(16, S8))
6334 Opc = AArch64::ST1Fourv16b;
6335 else if (Ty == LLT::fixed_vector(4, S16))
6336 Opc = AArch64::ST1Fourv4h;
6337 else if (Ty == LLT::fixed_vector(8, S16))
6338 Opc = AArch64::ST1Fourv8h;
6339 else if (Ty == LLT::fixed_vector(2, S32))
6340 Opc = AArch64::ST1Fourv2s;
6341 else if (Ty == LLT::fixed_vector(4, S32))
6342 Opc = AArch64::ST1Fourv4s;
6343 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6344 Opc = AArch64::ST1Fourv2d;
6345 else if (Ty == S64 || Ty == P0)
6346 Opc = AArch64::ST1Fourv1d;
6347 else
6348 llvm_unreachable("Unexpected type for st1x4!");
6349 selectVectorStoreIntrinsic(I, 4, Opc);
6350 break;
6351 }
6352 case Intrinsic::aarch64_neon_st2: {
6353 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6354 unsigned Opc;
6355 if (Ty == LLT::fixed_vector(8, S8))
6356 Opc = AArch64::ST2Twov8b;
6357 else if (Ty == LLT::fixed_vector(16, S8))
6358 Opc = AArch64::ST2Twov16b;
6359 else if (Ty == LLT::fixed_vector(4, S16))
6360 Opc = AArch64::ST2Twov4h;
6361 else if (Ty == LLT::fixed_vector(8, S16))
6362 Opc = AArch64::ST2Twov8h;
6363 else if (Ty == LLT::fixed_vector(2, S32))
6364 Opc = AArch64::ST2Twov2s;
6365 else if (Ty == LLT::fixed_vector(4, S32))
6366 Opc = AArch64::ST2Twov4s;
6367 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6368 Opc = AArch64::ST2Twov2d;
6369 else if (Ty == S64 || Ty == P0)
6370 Opc = AArch64::ST1Twov1d;
6371 else
6372 llvm_unreachable("Unexpected type for st2!");
6373 selectVectorStoreIntrinsic(I, 2, Opc);
6374 break;
6375 }
6376 case Intrinsic::aarch64_neon_st3: {
6377 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6378 unsigned Opc;
6379 if (Ty == LLT::fixed_vector(8, S8))
6380 Opc = AArch64::ST3Threev8b;
6381 else if (Ty == LLT::fixed_vector(16, S8))
6382 Opc = AArch64::ST3Threev16b;
6383 else if (Ty == LLT::fixed_vector(4, S16))
6384 Opc = AArch64::ST3Threev4h;
6385 else if (Ty == LLT::fixed_vector(8, S16))
6386 Opc = AArch64::ST3Threev8h;
6387 else if (Ty == LLT::fixed_vector(2, S32))
6388 Opc = AArch64::ST3Threev2s;
6389 else if (Ty == LLT::fixed_vector(4, S32))
6390 Opc = AArch64::ST3Threev4s;
6391 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6392 Opc = AArch64::ST3Threev2d;
6393 else if (Ty == S64 || Ty == P0)
6394 Opc = AArch64::ST1Threev1d;
6395 else
6396 llvm_unreachable("Unexpected type for st3!");
6397 selectVectorStoreIntrinsic(I, 3, Opc);
6398 break;
6399 }
6400 case Intrinsic::aarch64_neon_st4: {
6401 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6402 unsigned Opc;
6403 if (Ty == LLT::fixed_vector(8, S8))
6404 Opc = AArch64::ST4Fourv8b;
6405 else if (Ty == LLT::fixed_vector(16, S8))
6406 Opc = AArch64::ST4Fourv16b;
6407 else if (Ty == LLT::fixed_vector(4, S16))
6408 Opc = AArch64::ST4Fourv4h;
6409 else if (Ty == LLT::fixed_vector(8, S16))
6410 Opc = AArch64::ST4Fourv8h;
6411 else if (Ty == LLT::fixed_vector(2, S32))
6412 Opc = AArch64::ST4Fourv2s;
6413 else if (Ty == LLT::fixed_vector(4, S32))
6414 Opc = AArch64::ST4Fourv4s;
6415 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6416 Opc = AArch64::ST4Fourv2d;
6417 else if (Ty == S64 || Ty == P0)
6418 Opc = AArch64::ST1Fourv1d;
6419 else
6420 llvm_unreachable("Unexpected type for st4!");
6421 selectVectorStoreIntrinsic(I, 4, Opc);
6422 break;
6423 }
6424 case Intrinsic::aarch64_neon_st2lane: {
6425 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6426 unsigned Opc;
6427 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6428 Opc = AArch64::ST2i8;
6429 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6430 Opc = AArch64::ST2i16;
6431 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6432 Opc = AArch64::ST2i32;
6433 else if (Ty == LLT::fixed_vector(2, S64) ||
6434 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6435 Opc = AArch64::ST2i64;
6436 else
6437 llvm_unreachable("Unexpected type for st2lane!");
6438 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6439 return false;
6440 break;
6441 }
6442 case Intrinsic::aarch64_neon_st3lane: {
6443 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6444 unsigned Opc;
6445 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6446 Opc = AArch64::ST3i8;
6447 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6448 Opc = AArch64::ST3i16;
6449 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6450 Opc = AArch64::ST3i32;
6451 else if (Ty == LLT::fixed_vector(2, S64) ||
6452 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6453 Opc = AArch64::ST3i64;
6454 else
6455 llvm_unreachable("Unexpected type for st3lane!");
6456 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6457 return false;
6458 break;
6459 }
6460 case Intrinsic::aarch64_neon_st4lane: {
6461 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6462 unsigned Opc;
6463 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6464 Opc = AArch64::ST4i8;
6465 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6466 Opc = AArch64::ST4i16;
6467 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6468 Opc = AArch64::ST4i32;
6469 else if (Ty == LLT::fixed_vector(2, S64) ||
6470 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6471 Opc = AArch64::ST4i64;
6472 else
6473 llvm_unreachable("Unexpected type for st4lane!");
6474 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6475 return false;
6476 break;
6477 }
6478 case Intrinsic::aarch64_mops_memset_tag: {
6479 // Transform
6480 // %dst:gpr(p0) = \
6481 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6482 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6483 // where %dst is updated, into
6484 // %Rd:GPR64common, %Rn:GPR64) = \
6485 // MOPSMemorySetTaggingPseudo \
6486 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6487 // where Rd and Rn are tied.
6488 // It is expected that %val has been extended to s64 in legalization.
6489 // Note that the order of the size/value operands are swapped.
6490
6491 Register DstDef = I.getOperand(0).getReg();
6492 // I.getOperand(1) is the intrinsic function
6493 Register DstUse = I.getOperand(2).getReg();
6494 Register ValUse = I.getOperand(3).getReg();
6495 Register SizeUse = I.getOperand(4).getReg();
6496
6497 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6498 // Therefore an additional virtual register is requried for the updated size
6499 // operand. This value is not accessible via the semantics of the intrinsic.
6500 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6501
6502 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6503 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6504 Memset.cloneMemRefs(I);
6506 break;
6507 }
6508 }
6509
6510 I.eraseFromParent();
6511 return true;
6512}
6513
6514bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6516 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6517
6518 switch (IntrinID) {
6519 default:
6520 break;
6521 case Intrinsic::aarch64_crypto_sha1h: {
6522 Register DstReg = I.getOperand(0).getReg();
6523 Register SrcReg = I.getOperand(2).getReg();
6524
6525 // FIXME: Should this be an assert?
6526 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6527 MRI.getType(SrcReg).getSizeInBits() != 32)
6528 return false;
6529
6530 // The operation has to happen on FPRs. Set up some new FPR registers for
6531 // the source and destination if they are on GPRs.
6532 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6533 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6534 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6535
6536 // Make sure the copy ends up getting constrained properly.
6537 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6538 AArch64::GPR32RegClass, MRI);
6539 }
6540
6541 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6542 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6543
6544 // Actually insert the instruction.
6545 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6546 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6547
6548 // Did we create a new register for the destination?
6549 if (DstReg != I.getOperand(0).getReg()) {
6550 // Yep. Copy the result of the instruction back into the original
6551 // destination.
6552 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6553 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6554 AArch64::GPR32RegClass, MRI);
6555 }
6556
6557 I.eraseFromParent();
6558 return true;
6559 }
6560 case Intrinsic::ptrauth_resign: {
6561 Register DstReg = I.getOperand(0).getReg();
6562 Register ValReg = I.getOperand(2).getReg();
6563 uint64_t AUTKey = I.getOperand(3).getImm();
6564 Register AUTDisc = I.getOperand(4).getReg();
6565 uint64_t PACKey = I.getOperand(5).getImm();
6566 Register PACDisc = I.getOperand(6).getReg();
6567
6568 Register AUTAddrDisc = AUTDisc;
6569 uint16_t AUTConstDiscC = 0;
6570 std::tie(AUTConstDiscC, AUTAddrDisc) =
6572
6573 Register PACAddrDisc = PACDisc;
6574 uint16_t PACConstDiscC = 0;
6575 std::tie(PACConstDiscC, PACAddrDisc) =
6577
6578 MIB.buildCopy({AArch64::X16}, {ValReg});
6579 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6580 MIB.buildInstr(AArch64::AUTPAC)
6581 .addImm(AUTKey)
6582 .addImm(AUTConstDiscC)
6583 .addUse(AUTAddrDisc)
6584 .addImm(PACKey)
6585 .addImm(PACConstDiscC)
6586 .addUse(PACAddrDisc)
6587 .constrainAllUses(TII, TRI, RBI);
6588 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6589
6590 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6591 I.eraseFromParent();
6592 return true;
6593 }
6594 case Intrinsic::ptrauth_auth: {
6595 Register DstReg = I.getOperand(0).getReg();
6596 Register ValReg = I.getOperand(2).getReg();
6597 uint64_t AUTKey = I.getOperand(3).getImm();
6598 Register AUTDisc = I.getOperand(4).getReg();
6599
6600 Register AUTAddrDisc = AUTDisc;
6601 uint16_t AUTConstDiscC = 0;
6602 std::tie(AUTConstDiscC, AUTAddrDisc) =
6604
6605 MIB.buildCopy({AArch64::X16}, {ValReg});
6606 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6607 MIB.buildInstr(AArch64::AUT)
6608 .addImm(AUTKey)
6609 .addImm(AUTConstDiscC)
6610 .addUse(AUTAddrDisc)
6611 .constrainAllUses(TII, TRI, RBI);
6612 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6613
6614 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6615 I.eraseFromParent();
6616 return true;
6617 }
6618 case Intrinsic::frameaddress:
6619 case Intrinsic::returnaddress: {
6620 MachineFunction &MF = *I.getParent()->getParent();
6621 MachineFrameInfo &MFI = MF.getFrameInfo();
6622
6623 unsigned Depth = I.getOperand(2).getImm();
6624 Register DstReg = I.getOperand(0).getReg();
6625 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6626
6627 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6628 if (!MFReturnAddr) {
6629 // Insert the copy from LR/X30 into the entry block, before it can be
6630 // clobbered by anything.
6631 MFI.setReturnAddressIsTaken(true);
6632 MFReturnAddr = getFunctionLiveInPhysReg(
6633 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6634 }
6635
6636 if (STI.hasPAuth()) {
6637 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6638 } else {
6639 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6640 MIB.buildInstr(AArch64::XPACLRI);
6641 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6642 }
6643
6644 I.eraseFromParent();
6645 return true;
6646 }
6647
6648 MFI.setFrameAddressIsTaken(true);
6649 Register FrameAddr(AArch64::FP);
6650 while (Depth--) {
6651 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6652 auto Ldr =
6653 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6655 FrameAddr = NextFrame;
6656 }
6657
6658 if (IntrinID == Intrinsic::frameaddress)
6659 MIB.buildCopy({DstReg}, {FrameAddr});
6660 else {
6661 MFI.setReturnAddressIsTaken(true);
6662
6663 if (STI.hasPAuth()) {
6664 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6665 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6666 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6667 } else {
6668 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6669 .addImm(1);
6670 MIB.buildInstr(AArch64::XPACLRI);
6671 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6672 }
6673 }
6674
6675 I.eraseFromParent();
6676 return true;
6677 }
6678 case Intrinsic::aarch64_neon_tbl2:
6679 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6680 return true;
6681 case Intrinsic::aarch64_neon_tbl3:
6682 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6683 false);
6684 return true;
6685 case Intrinsic::aarch64_neon_tbl4:
6686 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6687 return true;
6688 case Intrinsic::aarch64_neon_tbx2:
6689 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6690 return true;
6691 case Intrinsic::aarch64_neon_tbx3:
6692 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6693 return true;
6694 case Intrinsic::aarch64_neon_tbx4:
6695 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6696 return true;
6697 case Intrinsic::swift_async_context_addr:
6698 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6699 {Register(AArch64::FP)})
6700 .addImm(8)
6701 .addImm(0);
6703
6705 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6706 I.eraseFromParent();
6707 return true;
6708 }
6709 return false;
6710}
6711
6712// G_PTRAUTH_GLOBAL_VALUE lowering
6713//
6714// We have 3 lowering alternatives to choose from:
6715// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6716// If the GV doesn't need a GOT load (i.e., is locally defined)
6717// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6718//
6719// - LOADgotPAC: similar to LOADgot, with added PAC.
6720// If the GV needs a GOT load, materialize the pointer using the usual
6721// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6722// section is assumed to be read-only (for example, via relro mechanism). See
6723// LowerMOVaddrPAC.
6724//
6725// - LOADauthptrstatic: similar to LOADgot, but use a
6726// special stub slot instead of a GOT slot.
6727// Load a signed pointer for symbol 'sym' from a stub slot named
6728// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6729// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6730// .data with an
6731// @AUTH relocation. See LowerLOADauthptrstatic.
6732//
6733// All 3 are pseudos that are expand late to longer sequences: this lets us
6734// provide integrity guarantees on the to-be-signed intermediate values.
6735//
6736// LOADauthptrstatic is undesirable because it requires a large section filled
6737// with often similarly-signed pointers, making it a good harvesting target.
6738// Thus, it's only used for ptrauth references to extern_weak to avoid null
6739// checks.
6740
6741bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6743 Register DefReg = I.getOperand(0).getReg();
6744 Register Addr = I.getOperand(1).getReg();
6745 uint64_t Key = I.getOperand(2).getImm();
6746 Register AddrDisc = I.getOperand(3).getReg();
6747 uint64_t Disc = I.getOperand(4).getImm();
6748 int64_t Offset = 0;
6749
6750 if (Key > AArch64PACKey::LAST)
6751 report_fatal_error("key in ptrauth global out of range [0, " +
6752 Twine((int)AArch64PACKey::LAST) + "]");
6753
6754 // Blend only works if the integer discriminator is 16-bit wide.
6755 if (!isUInt<16>(Disc))
6757 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6758
6759 // Choosing between 3 lowering alternatives is target-specific.
6760 if (!STI.isTargetELF() && !STI.isTargetMachO())
6761 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6762
6763 if (!MRI.hasOneDef(Addr))
6764 return false;
6765
6766 // First match any offset we take from the real global.
6767 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6768 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6769 Register OffsetReg = DefMI->getOperand(2).getReg();
6770 if (!MRI.hasOneDef(OffsetReg))
6771 return false;
6772 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6773 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6774 return false;
6775
6776 Addr = DefMI->getOperand(1).getReg();
6777 if (!MRI.hasOneDef(Addr))
6778 return false;
6779
6780 DefMI = &*MRI.def_instr_begin(Addr);
6781 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6782 }
6783
6784 // We should be left with a genuine unauthenticated GlobalValue.
6785 const GlobalValue *GV;
6786 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6787 GV = DefMI->getOperand(1).getGlobal();
6789 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6790 GV = DefMI->getOperand(2).getGlobal();
6792 } else {
6793 return false;
6794 }
6795
6796 MachineIRBuilder MIB(I);
6797
6798 // Classify the reference to determine whether it needs a GOT load.
6799 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6800 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6801 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6802 "unsupported non-GOT op flags on ptrauth global reference");
6803 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6804 "unsupported non-GOT reference to weak ptrauth global");
6805
6806 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6807 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6808
6809 // Non-extern_weak:
6810 // - No GOT load needed -> MOVaddrPAC
6811 // - GOT load for non-extern_weak -> LOADgotPAC
6812 // Note that we disallow extern_weak refs to avoid null checks later.
6813 if (!GV->hasExternalWeakLinkage()) {
6814 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6815 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6816 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6818 .addImm(Key)
6819 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6820 .addImm(Disc)
6821 .constrainAllUses(TII, TRI, RBI);
6822 MIB.buildCopy(DefReg, Register(AArch64::X16));
6823 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6824 I.eraseFromParent();
6825 return true;
6826 }
6827
6828 // extern_weak -> LOADauthptrstatic
6829
6830 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6831 // offset alone as a pointer if the symbol wasn't available, which would
6832 // probably break null checks in users. Ptrauth complicates things further:
6833 // error out.
6834 if (Offset != 0)
6836 "unsupported non-zero offset in weak ptrauth global reference");
6837
6838 if (HasAddrDisc)
6839 report_fatal_error("unsupported weak addr-div ptrauth global");
6840
6841 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6842 .addGlobalAddress(GV, Offset)
6843 .addImm(Key)
6844 .addImm(Disc);
6845 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6846
6847 I.eraseFromParent();
6848 return true;
6849}
6850
6851void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6853 unsigned NumVec, unsigned Opc1,
6854 unsigned Opc2, bool isExt) {
6855 Register DstReg = I.getOperand(0).getReg();
6856 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6857
6858 // Create the REG_SEQUENCE
6860 for (unsigned i = 0; i < NumVec; i++)
6861 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6862 Register RegSeq = createQTuple(Regs, MIB);
6863
6864 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6866 if (isExt) {
6867 Register Reg = I.getOperand(2).getReg();
6868 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6869 } else
6870 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6872 I.eraseFromParent();
6873}
6874
6876AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6877 auto MaybeImmed = getImmedFromMO(Root);
6878 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6879 return std::nullopt;
6880 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6881 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6882}
6883
6885AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6886 auto MaybeImmed = getImmedFromMO(Root);
6887 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6888 return std::nullopt;
6889 uint64_t Enc = 31 - *MaybeImmed;
6890 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6891}
6892
6894AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6895 auto MaybeImmed = getImmedFromMO(Root);
6896 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6897 return std::nullopt;
6898 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6899 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6900}
6901
6903AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6904 auto MaybeImmed = getImmedFromMO(Root);
6905 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6906 return std::nullopt;
6907 uint64_t Enc = 63 - *MaybeImmed;
6908 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6909}
6910
6911/// Helper to select an immediate value that can be represented as a 12-bit
6912/// value shifted left by either 0 or 12. If it is possible to do so, return
6913/// the immediate and shift value. If not, return std::nullopt.
6914///
6915/// Used by selectArithImmed and selectNegArithImmed.
6917AArch64InstructionSelector::select12BitValueWithLeftShift(
6918 uint64_t Immed) const {
6919 unsigned ShiftAmt;
6920 if (Immed >> 12 == 0) {
6921 ShiftAmt = 0;
6922 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6923 ShiftAmt = 12;
6924 Immed = Immed >> 12;
6925 } else
6926 return std::nullopt;
6927
6928 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6929 return {{
6930 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6931 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6932 }};
6933}
6934
6935/// SelectArithImmed - Select an immediate value that can be represented as
6936/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6937/// Val set to the 12-bit value and Shift set to the shifter operand.
6939AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6940 // This function is called from the addsub_shifted_imm ComplexPattern,
6941 // which lists [imm] as the list of opcode it's interested in, however
6942 // we still need to check whether the operand is actually an immediate
6943 // here because the ComplexPattern opcode list is only used in
6944 // root-level opcode matching.
6945 auto MaybeImmed = getImmedFromMO(Root);
6946 if (MaybeImmed == std::nullopt)
6947 return std::nullopt;
6948 return select12BitValueWithLeftShift(*MaybeImmed);
6949}
6950
6951/// SelectNegArithImmed - As above, but negates the value before trying to
6952/// select it.
6954AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6955 // We need a register here, because we need to know if we have a 64 or 32
6956 // bit immediate.
6957 if (!Root.isReg())
6958 return std::nullopt;
6959 auto MaybeImmed = getImmedFromMO(Root);
6960 if (MaybeImmed == std::nullopt)
6961 return std::nullopt;
6962 uint64_t Immed = *MaybeImmed;
6963
6964 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6965 // have the opposite effect on the C flag, so this pattern mustn't match under
6966 // those circumstances.
6967 if (Immed == 0)
6968 return std::nullopt;
6969
6970 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6971 // the root.
6973 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6974 Immed = ~((uint32_t)Immed) + 1;
6975 else
6976 Immed = ~Immed + 1ULL;
6977
6978 if (Immed & 0xFFFFFFFFFF000000ULL)
6979 return std::nullopt;
6980
6981 Immed &= 0xFFFFFFULL;
6982 return select12BitValueWithLeftShift(Immed);
6983}
6984
6985/// Checks if we are sure that folding MI into load/store addressing mode is
6986/// beneficial or not.
6987///
6988/// Returns:
6989/// - true if folding MI would be beneficial.
6990/// - false if folding MI would be bad.
6991/// - std::nullopt if it is not sure whether folding MI is beneficial.
6992///
6993/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
6994///
6995/// %13:gpr(s64) = G_CONSTANT i64 1
6996/// %8:gpr(s64) = G_SHL %6, %13(s64)
6997/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
6998/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
6999std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7000 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7001 if (MI.getOpcode() == AArch64::G_SHL) {
7002 // Address operands with shifts are free, except for running on subtargets
7003 // with AddrLSLSlow14.
7004 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7005 MI.getOperand(2).getReg(), MRI)) {
7006 const APInt ShiftVal = ValAndVeg->Value;
7007
7008 // Don't fold if we know this will be slow.
7009 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7010 }
7011 }
7012 return std::nullopt;
7013}
7014
7015/// Return true if it is worth folding MI into an extended register. That is,
7016/// if it's safe to pull it into the addressing mode of a load or store as a
7017/// shift.
7018/// \p IsAddrOperand whether the def of MI is used as an address operand
7019/// (e.g. feeding into an LDR/STR).
7020bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7022 bool IsAddrOperand) const {
7023
7024 // Always fold if there is one use, or if we're optimizing for size.
7025 Register DefReg = MI.getOperand(0).getReg();
7026 if (MRI.hasOneNonDBGUse(DefReg) ||
7027 MI.getParent()->getParent()->getFunction().hasOptSize())
7028 return true;
7029
7030 if (IsAddrOperand) {
7031 // If we are already sure that folding MI is good or bad, return the result.
7032 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7033 return *Worth;
7034
7035 // Fold G_PTR_ADD if its offset operand can be folded
7036 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7037 MachineInstr *OffsetInst =
7038 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7039
7040 // Note, we already know G_PTR_ADD is used by at least two instructions.
7041 // If we are also sure about whether folding is beneficial or not,
7042 // return the result.
7043 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7044 return *Worth;
7045 }
7046 }
7047
7048 // FIXME: Consider checking HasALULSLFast as appropriate.
7049
7050 // We have a fastpath, so folding a shift in and potentially computing it
7051 // many times may be beneficial. Check if this is only used in memory ops.
7052 // If it is, then we should fold.
7053 return all_of(MRI.use_nodbg_instructions(DefReg),
7054 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7055}
7056
7058 switch (Type) {
7059 case AArch64_AM::SXTB:
7060 case AArch64_AM::SXTH:
7061 case AArch64_AM::SXTW:
7062 return true;
7063 default:
7064 return false;
7065 }
7066}
7067
7069AArch64InstructionSelector::selectExtendedSHL(
7071 unsigned SizeInBytes, bool WantsExt) const {
7072 assert(Base.isReg() && "Expected base to be a register operand");
7073 assert(Offset.isReg() && "Expected offset to be a register operand");
7074
7076 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7077
7078 unsigned OffsetOpc = OffsetInst->getOpcode();
7079 bool LookedThroughZExt = false;
7080 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7081 // Try to look through a ZEXT.
7082 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7083 return std::nullopt;
7084
7085 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7086 OffsetOpc = OffsetInst->getOpcode();
7087 LookedThroughZExt = true;
7088
7089 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7090 return std::nullopt;
7091 }
7092 // Make sure that the memory op is a valid size.
7093 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7094 if (LegalShiftVal == 0)
7095 return std::nullopt;
7096 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7097 return std::nullopt;
7098
7099 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7100 // register we will offset is the LHS, and the register containing the
7101 // constant is the RHS.
7102 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7103 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7104 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7105 if (!ValAndVReg) {
7106 // We didn't get a constant on the RHS. If the opcode is a shift, then
7107 // we're done.
7108 if (OffsetOpc == TargetOpcode::G_SHL)
7109 return std::nullopt;
7110
7111 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7112 std::swap(OffsetReg, ConstantReg);
7113 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7114 if (!ValAndVReg)
7115 return std::nullopt;
7116 }
7117
7118 // The value must fit into 3 bits, and must be positive. Make sure that is
7119 // true.
7120 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7121
7122 // Since we're going to pull this into a shift, the constant value must be
7123 // a power of 2. If we got a multiply, then we need to check this.
7124 if (OffsetOpc == TargetOpcode::G_MUL) {
7125 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7126 return std::nullopt;
7127
7128 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7129 ImmVal = Log2_32(ImmVal);
7130 }
7131
7132 if ((ImmVal & 0x7) != ImmVal)
7133 return std::nullopt;
7134
7135 // We are only allowed to shift by LegalShiftVal. This shift value is built
7136 // into the instruction, so we can't just use whatever we want.
7137 if (ImmVal != LegalShiftVal)
7138 return std::nullopt;
7139
7140 unsigned SignExtend = 0;
7141 if (WantsExt) {
7142 // Check if the offset is defined by an extend, unless we looked through a
7143 // G_ZEXT earlier.
7144 if (!LookedThroughZExt) {
7145 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7146 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7148 return std::nullopt;
7149
7150 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7151 // We only support SXTW for signed extension here.
7152 if (SignExtend && Ext != AArch64_AM::SXTW)
7153 return std::nullopt;
7154 OffsetReg = ExtInst->getOperand(1).getReg();
7155 }
7156
7157 // Need a 32-bit wide register here.
7158 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7159 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7160 }
7161
7162 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7163 // offset. Signify that we are shifting by setting the shift flag to 1.
7164 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7165 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7166 [=](MachineInstrBuilder &MIB) {
7167 // Need to add both immediates here to make sure that they are both
7168 // added to the instruction.
7169 MIB.addImm(SignExtend);
7170 MIB.addImm(1);
7171 }}};
7172}
7173
7174/// This is used for computing addresses like this:
7175///
7176/// ldr x1, [x2, x3, lsl #3]
7177///
7178/// Where x2 is the base register, and x3 is an offset register. The shift-left
7179/// is a constant value specific to this load instruction. That is, we'll never
7180/// see anything other than a 3 here (which corresponds to the size of the
7181/// element being loaded.)
7183AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7184 MachineOperand &Root, unsigned SizeInBytes) const {
7185 if (!Root.isReg())
7186 return std::nullopt;
7188
7189 // We want to find something like this:
7190 //
7191 // val = G_CONSTANT LegalShiftVal
7192 // shift = G_SHL off_reg val
7193 // ptr = G_PTR_ADD base_reg shift
7194 // x = G_LOAD ptr
7195 //
7196 // And fold it into this addressing mode:
7197 //
7198 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7199
7200 // Check if we can find the G_PTR_ADD.
7201 MachineInstr *PtrAdd =
7202 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7203 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7204 return std::nullopt;
7205
7206 // Now, try to match an opcode which will match our specific offset.
7207 // We want a G_SHL or a G_MUL.
7208 MachineInstr *OffsetInst =
7210 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7211 OffsetInst->getOperand(0), SizeInBytes,
7212 /*WantsExt=*/false);
7213}
7214
7215/// This is used for computing addresses like this:
7216///
7217/// ldr x1, [x2, x3]
7218///
7219/// Where x2 is the base register, and x3 is an offset register.
7220///
7221/// When possible (or profitable) to fold a G_PTR_ADD into the address
7222/// calculation, this will do so. Otherwise, it will return std::nullopt.
7224AArch64InstructionSelector::selectAddrModeRegisterOffset(
7225 MachineOperand &Root) const {
7227
7228 // We need a GEP.
7229 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7230 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7231 return std::nullopt;
7232
7233 // If this is used more than once, let's not bother folding.
7234 // TODO: Check if they are memory ops. If they are, then we can still fold
7235 // without having to recompute anything.
7236 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7237 return std::nullopt;
7238
7239 // Base is the GEP's LHS, offset is its RHS.
7240 return {{[=](MachineInstrBuilder &MIB) {
7241 MIB.addUse(Gep->getOperand(1).getReg());
7242 },
7243 [=](MachineInstrBuilder &MIB) {
7244 MIB.addUse(Gep->getOperand(2).getReg());
7245 },
7246 [=](MachineInstrBuilder &MIB) {
7247 // Need to add both immediates here to make sure that they are both
7248 // added to the instruction.
7249 MIB.addImm(0);
7250 MIB.addImm(0);
7251 }}};
7252}
7253
7254/// This is intended to be equivalent to selectAddrModeXRO in
7255/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7257AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7258 unsigned SizeInBytes) const {
7260 if (!Root.isReg())
7261 return std::nullopt;
7262 MachineInstr *PtrAdd =
7263 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7264 if (!PtrAdd)
7265 return std::nullopt;
7266
7267 // Check for an immediates which cannot be encoded in the [base + imm]
7268 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7269 // end up with code like:
7270 //
7271 // mov x0, wide
7272 // add x1 base, x0
7273 // ldr x2, [x1, x0]
7274 //
7275 // In this situation, we can use the [base, xreg] addressing mode to save an
7276 // add/sub:
7277 //
7278 // mov x0, wide
7279 // ldr x2, [base, x0]
7280 auto ValAndVReg =
7282 if (ValAndVReg) {
7283 unsigned Scale = Log2_32(SizeInBytes);
7284 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7285
7286 // Skip immediates that can be selected in the load/store addresing
7287 // mode.
7288 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7289 ImmOff < (0x1000 << Scale))
7290 return std::nullopt;
7291
7292 // Helper lambda to decide whether or not it is preferable to emit an add.
7293 auto isPreferredADD = [](int64_t ImmOff) {
7294 // Constants in [0x0, 0xfff] can be encoded in an add.
7295 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7296 return true;
7297
7298 // Can it be encoded in an add lsl #12?
7299 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7300 return false;
7301
7302 // It can be encoded in an add lsl #12, but we may not want to. If it is
7303 // possible to select this as a single movz, then prefer that. A single
7304 // movz is faster than an add with a shift.
7305 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7306 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7307 };
7308
7309 // If the immediate can be encoded in a single add/sub, then bail out.
7310 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7311 return std::nullopt;
7312 }
7313
7314 // Try to fold shifts into the addressing mode.
7315 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7316 if (AddrModeFns)
7317 return AddrModeFns;
7318
7319 // If that doesn't work, see if it's possible to fold in registers from
7320 // a GEP.
7321 return selectAddrModeRegisterOffset(Root);
7322}
7323
7324/// This is used for computing addresses like this:
7325///
7326/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7327///
7328/// Where we have a 64-bit base register, a 32-bit offset register, and an
7329/// extend (which may or may not be signed).
7331AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7332 unsigned SizeInBytes) const {
7334
7335 MachineInstr *PtrAdd =
7336 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7337 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7338 return std::nullopt;
7339
7340 MachineOperand &LHS = PtrAdd->getOperand(1);
7341 MachineOperand &RHS = PtrAdd->getOperand(2);
7342 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7343
7344 // The first case is the same as selectAddrModeXRO, except we need an extend.
7345 // In this case, we try to find a shift and extend, and fold them into the
7346 // addressing mode.
7347 //
7348 // E.g.
7349 //
7350 // off_reg = G_Z/S/ANYEXT ext_reg
7351 // val = G_CONSTANT LegalShiftVal
7352 // shift = G_SHL off_reg val
7353 // ptr = G_PTR_ADD base_reg shift
7354 // x = G_LOAD ptr
7355 //
7356 // In this case we can get a load like this:
7357 //
7358 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7359 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7360 SizeInBytes, /*WantsExt=*/true);
7361 if (ExtendedShl)
7362 return ExtendedShl;
7363
7364 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7365 //
7366 // e.g.
7367 // ldr something, [base_reg, ext_reg, sxtw]
7368 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7369 return std::nullopt;
7370
7371 // Check if this is an extend. We'll get an extend type if it is.
7373 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7375 return std::nullopt;
7376
7377 // Need a 32-bit wide register.
7378 MachineIRBuilder MIB(*PtrAdd);
7379 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7380 AArch64::GPR32RegClass, MIB);
7381 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7382
7383 // Base is LHS, offset is ExtReg.
7384 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7385 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7386 [=](MachineInstrBuilder &MIB) {
7387 MIB.addImm(SignExtend);
7388 MIB.addImm(0);
7389 }}};
7390}
7391
7392/// Select a "register plus unscaled signed 9-bit immediate" address. This
7393/// should only match when there is an offset that is not valid for a scaled
7394/// immediate addressing mode. The "Size" argument is the size in bytes of the
7395/// memory reference, which is needed here to know what is valid for a scaled
7396/// immediate.
7398AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7399 unsigned Size) const {
7401 Root.getParent()->getParent()->getParent()->getRegInfo();
7402
7403 if (!Root.isReg())
7404 return std::nullopt;
7405
7406 if (!isBaseWithConstantOffset(Root, MRI))
7407 return std::nullopt;
7408
7409 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7410
7411 MachineOperand &OffImm = RootDef->getOperand(2);
7412 if (!OffImm.isReg())
7413 return std::nullopt;
7414 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7415 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7416 return std::nullopt;
7417 int64_t RHSC;
7418 MachineOperand &RHSOp1 = RHS->getOperand(1);
7419 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7420 return std::nullopt;
7421 RHSC = RHSOp1.getCImm()->getSExtValue();
7422
7423 if (RHSC >= -256 && RHSC < 256) {
7424 MachineOperand &Base = RootDef->getOperand(1);
7425 return {{
7426 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7427 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7428 }};
7429 }
7430 return std::nullopt;
7431}
7432
7434AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7435 unsigned Size,
7436 MachineRegisterInfo &MRI) const {
7437 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7438 return std::nullopt;
7439 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7440 if (Adrp.getOpcode() != AArch64::ADRP)
7441 return std::nullopt;
7442
7443 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7444 auto Offset = Adrp.getOperand(1).getOffset();
7445 if (Offset % Size != 0)
7446 return std::nullopt;
7447
7448 auto GV = Adrp.getOperand(1).getGlobal();
7449 if (GV->isThreadLocal())
7450 return std::nullopt;
7451
7452 auto &MF = *RootDef.getParent()->getParent();
7453 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7454 return std::nullopt;
7455
7456 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7457 MachineIRBuilder MIRBuilder(RootDef);
7458 Register AdrpReg = Adrp.getOperand(0).getReg();
7459 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7460 [=](MachineInstrBuilder &MIB) {
7461 MIB.addGlobalAddress(GV, Offset,
7462 OpFlags | AArch64II::MO_PAGEOFF |
7464 }}};
7465}
7466
7467/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7468/// "Size" argument is the size in bytes of the memory reference, which
7469/// determines the scale.
7471AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7472 unsigned Size) const {
7473 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7475
7476 if (!Root.isReg())
7477 return std::nullopt;
7478
7479 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7480 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7481 return {{
7482 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7483 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7484 }};
7485 }
7486
7488 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7489 if (CM == CodeModel::Small) {
7490 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7491 if (OpFns)
7492 return OpFns;
7493 }
7494
7495 if (isBaseWithConstantOffset(Root, MRI)) {
7496 MachineOperand &LHS = RootDef->getOperand(1);
7497 MachineOperand &RHS = RootDef->getOperand(2);
7498 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7499 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7500
7501 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7502 unsigned Scale = Log2_32(Size);
7503 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7504 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7505 return {{
7506 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7507 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7508 }};
7509
7510 return {{
7511 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7512 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7513 }};
7514 }
7515 }
7516
7517 // Before falling back to our general case, check if the unscaled
7518 // instructions can handle this. If so, that's preferable.
7519 if (selectAddrModeUnscaled(Root, Size))
7520 return std::nullopt;
7521
7522 return {{
7523 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7524 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7525 }};
7526}
7527
7528/// Given a shift instruction, return the correct shift type for that
7529/// instruction.
7531 switch (MI.getOpcode()) {
7532 default:
7534 case TargetOpcode::G_SHL:
7535 return AArch64_AM::LSL;
7536 case TargetOpcode::G_LSHR:
7537 return AArch64_AM::LSR;
7538 case TargetOpcode::G_ASHR:
7539 return AArch64_AM::ASR;
7540 case TargetOpcode::G_ROTR:
7541 return AArch64_AM::ROR;
7542 }
7543}
7544
7545/// Select a "shifted register" operand. If the value is not shifted, set the
7546/// shift operand to a default value of "lsl 0".
7548AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7549 bool AllowROR) const {
7550 if (!Root.isReg())
7551 return std::nullopt;
7553 Root.getParent()->getParent()->getParent()->getRegInfo();
7554
7555 // Check if the operand is defined by an instruction which corresponds to
7556 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7557 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7559 if (ShType == AArch64_AM::InvalidShiftExtend)
7560 return std::nullopt;
7561 if (ShType == AArch64_AM::ROR && !AllowROR)
7562 return std::nullopt;
7563 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7564 return std::nullopt;
7565
7566 // Need an immediate on the RHS.
7567 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7568 auto Immed = getImmedFromMO(ShiftRHS);
7569 if (!Immed)
7570 return std::nullopt;
7571
7572 // We have something that we can fold. Fold in the shift's LHS and RHS into
7573 // the instruction.
7574 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7575 Register ShiftReg = ShiftLHS.getReg();
7576
7577 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7578 unsigned Val = *Immed & (NumBits - 1);
7579 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7580
7581 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7582 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7583}
7584
7585AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7586 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7587 unsigned Opc = MI.getOpcode();
7588
7589 // Handle explicit extend instructions first.
7590 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7591 unsigned Size;
7592 if (Opc == TargetOpcode::G_SEXT)
7593 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7594 else
7595 Size = MI.getOperand(2).getImm();
7596 assert(Size != 64 && "Extend from 64 bits?");
7597 switch (Size) {
7598 case 8:
7599 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7600 case 16:
7601 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7602 case 32:
7603 return AArch64_AM::SXTW;
7604 default:
7606 }
7607 }
7608
7609 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7610 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7611 assert(Size != 64 && "Extend from 64 bits?");
7612 switch (Size) {
7613 case 8:
7614 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7615 case 16:
7616 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7617 case 32:
7618 return AArch64_AM::UXTW;
7619 default:
7621 }
7622 }
7623
7624 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7625 // on the RHS.
7626 if (Opc != TargetOpcode::G_AND)
7628
7629 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7630 if (!MaybeAndMask)
7632 uint64_t AndMask = *MaybeAndMask;
7633 switch (AndMask) {
7634 default:
7636 case 0xFF:
7637 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7638 case 0xFFFF:
7639 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7640 case 0xFFFFFFFF:
7641 return AArch64_AM::UXTW;
7642 }
7643}
7644
7645Register AArch64InstructionSelector::moveScalarRegClass(
7646 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7647 MachineRegisterInfo &MRI = *MIB.getMRI();
7648 auto Ty = MRI.getType(Reg);
7649 assert(!Ty.isVector() && "Expected scalars only!");
7650 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7651 return Reg;
7652
7653 // Create a copy and immediately select it.
7654 // FIXME: We should have an emitCopy function?
7655 auto Copy = MIB.buildCopy({&RC}, {Reg});
7656 selectCopy(*Copy, TII, MRI, TRI, RBI);
7657 return Copy.getReg(0);
7658}
7659
7660/// Select an "extended register" operand. This operand folds in an extend
7661/// followed by an optional left shift.
7663AArch64InstructionSelector::selectArithExtendedRegister(
7664 MachineOperand &Root) const {
7665 if (!Root.isReg())
7666 return std::nullopt;
7668 Root.getParent()->getParent()->getParent()->getRegInfo();
7669
7670 uint64_t ShiftVal = 0;
7671 Register ExtReg;
7673 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7674 if (!RootDef)
7675 return std::nullopt;
7676
7677 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7678 return std::nullopt;
7679
7680 // Check if we can fold a shift and an extend.
7681 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7682 // Look for a constant on the RHS of the shift.
7683 MachineOperand &RHS = RootDef->getOperand(2);
7684 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7685 if (!MaybeShiftVal)
7686 return std::nullopt;
7687 ShiftVal = *MaybeShiftVal;
7688 if (ShiftVal > 4)
7689 return std::nullopt;
7690 // Look for a valid extend instruction on the LHS of the shift.
7691 MachineOperand &LHS = RootDef->getOperand(1);
7692 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7693 if (!ExtDef)
7694 return std::nullopt;
7695 Ext = getExtendTypeForInst(*ExtDef, MRI);
7697 return std::nullopt;
7698 ExtReg = ExtDef->getOperand(1).getReg();
7699 } else {
7700 // Didn't get a shift. Try just folding an extend.
7701 Ext = getExtendTypeForInst(*RootDef, MRI);
7703 return std::nullopt;
7704 ExtReg = RootDef->getOperand(1).getReg();
7705
7706 // If we have a 32 bit instruction which zeroes out the high half of a
7707 // register, we get an implicit zero extend for free. Check if we have one.
7708 // FIXME: We actually emit the extend right now even though we don't have
7709 // to.
7710 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7711 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7712 if (isDef32(*ExtInst))
7713 return std::nullopt;
7714 }
7715 }
7716
7717 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7718 // copy.
7719 MachineIRBuilder MIB(*RootDef);
7720 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7721
7722 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7723 [=](MachineInstrBuilder &MIB) {
7724 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7725 }}};
7726}
7727
7729AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7730 if (!Root.isReg())
7731 return std::nullopt;
7733 Root.getParent()->getParent()->getParent()->getRegInfo();
7734
7735 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7736 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7737 STI.isLittleEndian())
7738 Extract =
7739 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7740 if (!Extract)
7741 return std::nullopt;
7742
7743 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7744 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7745 Register ExtReg = Extract->MI->getOperand(2).getReg();
7746 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7747 }
7748 }
7749 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7750 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7752 Extract->MI->getOperand(2).getReg(), MRI);
7753 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7754 LaneIdx->Value.getSExtValue() == 1) {
7755 Register ExtReg = Extract->MI->getOperand(1).getReg();
7756 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7757 }
7758 }
7759
7760 return std::nullopt;
7761}
7762
7763void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7764 const MachineInstr &MI,
7765 int OpIdx) const {
7766 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7767 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7768 "Expected G_CONSTANT");
7769 std::optional<int64_t> CstVal =
7770 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7771 assert(CstVal && "Expected constant value");
7772 MIB.addImm(*CstVal);
7773}
7774
7775void AArch64InstructionSelector::renderLogicalImm32(
7776 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7777 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7778 "Expected G_CONSTANT");
7779 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7781 MIB.addImm(Enc);
7782}
7783
7784void AArch64InstructionSelector::renderLogicalImm64(
7785 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7786 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7787 "Expected G_CONSTANT");
7788 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7790 MIB.addImm(Enc);
7791}
7792
7793void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7794 const MachineInstr &MI,
7795 int OpIdx) const {
7796 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7797 "Expected G_UBSANTRAP");
7798 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7799}
7800
7801void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7802 const MachineInstr &MI,
7803 int OpIdx) const {
7804 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7805 "Expected G_FCONSTANT");
7806 MIB.addImm(
7807 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7808}
7809
7810void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7811 const MachineInstr &MI,
7812 int OpIdx) const {
7813 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7814 "Expected G_FCONSTANT");
7815 MIB.addImm(
7816 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7817}
7818
7819void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7820 const MachineInstr &MI,
7821 int OpIdx) const {
7822 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7823 "Expected G_FCONSTANT");
7824 MIB.addImm(
7825 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7826}
7827
7828void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7829 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7830 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7831 "Expected G_FCONSTANT");
7833 .getFPImm()
7834 ->getValueAPF()
7835 .bitcastToAPInt()
7836 .getZExtValue()));
7837}
7838
7839bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7840 const MachineInstr &MI, unsigned NumBytes) const {
7841 if (!MI.mayLoadOrStore())
7842 return false;
7843 assert(MI.hasOneMemOperand() &&
7844 "Expected load/store to have only one mem op!");
7845 return (*MI.memoperands_begin())->getSize() == NumBytes;
7846}
7847
7848bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7849 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7850 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7851 return false;
7852
7853 // Only return true if we know the operation will zero-out the high half of
7854 // the 64-bit register. Truncates can be subregister copies, which don't
7855 // zero out the high bits. Copies and other copy-like instructions can be
7856 // fed by truncates, or could be lowered as subregister copies.
7857 switch (MI.getOpcode()) {
7858 default:
7859 return true;
7860 case TargetOpcode::COPY:
7861 case TargetOpcode::G_BITCAST:
7862 case TargetOpcode::G_TRUNC:
7863 case TargetOpcode::G_PHI:
7864 return false;
7865 }
7866}
7867
7868
7869// Perform fixups on the given PHI instruction's operands to force them all
7870// to be the same as the destination regbank.
7872 const AArch64RegisterBankInfo &RBI) {
7873 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7874 Register DstReg = MI.getOperand(0).getReg();
7875 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7876 assert(DstRB && "Expected PHI dst to have regbank assigned");
7877 MachineIRBuilder MIB(MI);
7878
7879 // Go through each operand and ensure it has the same regbank.
7880 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7881 if (!MO.isReg())
7882 continue;
7883 Register OpReg = MO.getReg();
7884 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7885 if (RB != DstRB) {
7886 // Insert a cross-bank copy.
7887 auto *OpDef = MRI.getVRegDef(OpReg);
7888 const LLT &Ty = MRI.getType(OpReg);
7889 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7890
7891 // Any instruction we insert must appear after all PHIs in the block
7892 // for the block to be valid MIR.
7893 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7894 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7895 InsertPt = OpDefBB.getFirstNonPHI();
7896 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7897 auto Copy = MIB.buildCopy(Ty, OpReg);
7898 MRI.setRegBank(Copy.getReg(0), *DstRB);
7899 MO.setReg(Copy.getReg(0));
7900 }
7901 }
7902}
7903
7904void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7905 // We're looking for PHIs, build a list so we don't invalidate iterators.
7908 for (auto &BB : MF) {
7909 for (auto &MI : BB) {
7910 if (MI.getOpcode() == TargetOpcode::G_PHI)
7911 Phis.emplace_back(&MI);
7912 }
7913 }
7914
7915 for (auto *MI : Phis) {
7916 // We need to do some work here if the operand types are < 16 bit and they
7917 // are split across fpr/gpr banks. Since all types <32b on gpr
7918 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7919 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7920 // be selecting heterogenous regbanks for operands if possible, but we
7921 // still need to be able to deal with it here.
7922 //
7923 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7924 // one other operand is on the fpr bank, then we add cross-bank copies
7925 // to homogenize the operand banks. For simplicity the bank that we choose
7926 // to settle on is whatever bank the def operand has. For example:
7927 //
7928 // %endbb:
7929 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7930 // =>
7931 // %bb2:
7932 // ...
7933 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7934 // ...
7935 // %endbb:
7936 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7937 bool HasGPROp = false, HasFPROp = false;
7938 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7939 if (!MO.isReg())
7940 continue;
7941 const LLT &Ty = MRI.getType(MO.getReg());
7942 if (!Ty.isValid() || !Ty.isScalar())
7943 break;
7944 if (Ty.getSizeInBits() >= 32)
7945 break;
7946 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7947 // If for some reason we don't have a regbank yet. Don't try anything.
7948 if (!RB)
7949 break;
7950
7951 if (RB->getID() == AArch64::GPRRegBankID)
7952 HasGPROp = true;
7953 else
7954 HasFPROp = true;
7955 }
7956 // We have heterogenous regbanks, need to fixup.
7957 if (HasGPROp && HasFPROp)
7958 fixupPHIOpBanks(*MI, MRI, RBI);
7959 }
7960}
7961
7962namespace llvm {
7965 const AArch64Subtarget &Subtarget,
7966 const AArch64RegisterBankInfo &RBI) {
7967 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7968}
7969}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:766
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:767
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
bool isIntPredicate() const
Definition: InstrTypes.h:865
bool isUnsigned() const
Definition: InstrTypes.h:1013
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3015
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:319
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:316
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1686
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1745
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
Key
PAL metadata keys.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:903
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:307
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1630
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:432
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.