LLVM 20.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/Type.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
49#include <optional>
50
51#define DEBUG_TYPE "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 /// Save state that is shared between select calls, call select on \p I and
106 /// then restore the saved state. This can be used to recursively call select
107 /// within a select call.
108 bool selectAndRestoreState(MachineInstr &I);
109
110 // Do some preprocessing of G_PHIs before we begin selection.
111 void processPHIs(MachineFunction &MF);
112
113 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
114
115 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
116 bool contractCrossBankCopyIntoStore(MachineInstr &I,
118
119 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
120
121 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
122 MachineRegisterInfo &MRI) const;
123 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
124 MachineRegisterInfo &MRI) const;
125
126 ///@{
127 /// Helper functions for selectCompareBranch.
128 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
129 MachineIRBuilder &MIB) const;
130 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
131 MachineIRBuilder &MIB) const;
132 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
133 MachineIRBuilder &MIB) const;
134 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
135 MachineBasicBlock *DstMBB,
136 MachineIRBuilder &MIB) const;
137 ///@}
138
139 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141
142 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
143 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
144
145 // Helper to generate an equivalent of scalar_to_vector into a new register,
146 // returned via 'Dst'.
147 MachineInstr *emitScalarToVector(unsigned EltSize,
148 const TargetRegisterClass *DstRC,
149 Register Scalar,
150 MachineIRBuilder &MIRBuilder) const;
151 /// Helper to narrow vector that was widened by emitScalarToVector.
152 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
153 /// vector, correspondingly.
154 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
155 MachineIRBuilder &MIRBuilder,
156 MachineRegisterInfo &MRI) const;
157
158 /// Emit a lane insert into \p DstReg, or a new vector register if
159 /// std::nullopt is provided.
160 ///
161 /// The lane inserted into is defined by \p LaneIdx. The vector source
162 /// register is given by \p SrcReg. The register containing the element is
163 /// given by \p EltReg.
164 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
165 Register EltReg, unsigned LaneIdx,
166 const RegisterBank &RB,
167 MachineIRBuilder &MIRBuilder) const;
168
169 /// Emit a sequence of instructions representing a constant \p CV for a
170 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
171 ///
172 /// \returns the last instruction in the sequence on success, and nullptr
173 /// otherwise.
174 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
175 MachineIRBuilder &MIRBuilder,
177
178 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
179 MachineIRBuilder &MIRBuilder);
180
181 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
182 MachineIRBuilder &MIRBuilder, bool Inv);
183
184 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
185 MachineIRBuilder &MIRBuilder, bool Inv);
186 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
187 MachineIRBuilder &MIRBuilder);
188 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
189 MachineIRBuilder &MIRBuilder, bool Inv);
190 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
191 MachineIRBuilder &MIRBuilder);
192
193 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
196 /// SUBREG_TO_REG.
197 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
198 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
203 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
206
207 /// Helper function to select vector load intrinsics like
208 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
209 /// \p Opc is the opcode that the selected instruction should use.
210 /// \p NumVecs is the number of vector destinations for the instruction.
211 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
212 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
213 MachineInstr &I);
214 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
215 MachineInstr &I);
216 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
217 unsigned Opc);
218 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
219 unsigned Opc);
220 bool selectIntrinsicWithSideEffects(MachineInstr &I,
222 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
223 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectPtrAuthGlobalValue(MachineInstr &I,
227 MachineRegisterInfo &MRI) const;
228 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
231 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
232 unsigned Opc1, unsigned Opc2, bool isExt);
233
234 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
235 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
237
238 unsigned emitConstantPoolEntry(const Constant *CPVal,
239 MachineFunction &MF) const;
241 MachineIRBuilder &MIRBuilder) const;
242
243 // Emit a vector concat operation.
244 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
245 Register Op2,
246 MachineIRBuilder &MIRBuilder) const;
247
248 // Emit an integer compare between LHS and RHS, which checks for Predicate.
249 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
250 MachineOperand &Predicate,
251 MachineIRBuilder &MIRBuilder) const;
252
253 /// Emit a floating point comparison between \p LHS and \p RHS.
254 /// \p Pred if given is the intended predicate to use.
256 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
257 std::optional<CmpInst::Predicate> = std::nullopt) const;
258
260 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
261 std::initializer_list<llvm::SrcOp> SrcOps,
262 MachineIRBuilder &MIRBuilder,
263 const ComplexRendererFns &RenderFns = std::nullopt) const;
264 /// Helper function to emit an add or sub instruction.
265 ///
266 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
267 /// in a specific order.
268 ///
269 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
270 ///
271 /// \code
272 /// const std::array<std::array<unsigned, 2>, 4> Table {
273 /// {{AArch64::ADDXri, AArch64::ADDWri},
274 /// {AArch64::ADDXrs, AArch64::ADDWrs},
275 /// {AArch64::ADDXrr, AArch64::ADDWrr},
276 /// {AArch64::SUBXri, AArch64::SUBWri},
277 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
278 /// \endcode
279 ///
280 /// Each row in the table corresponds to a different addressing mode. Each
281 /// column corresponds to a different register size.
282 ///
283 /// \attention Rows must be structured as follows:
284 /// - Row 0: The ri opcode variants
285 /// - Row 1: The rs opcode variants
286 /// - Row 2: The rr opcode variants
287 /// - Row 3: The ri opcode variants for negative immediates
288 /// - Row 4: The rx opcode variants
289 ///
290 /// \attention Columns must be structured as follows:
291 /// - Column 0: The 64-bit opcode variants
292 /// - Column 1: The 32-bit opcode variants
293 ///
294 /// \p Dst is the destination register of the binop to emit.
295 /// \p LHS is the left-hand operand of the binop to emit.
296 /// \p RHS is the right-hand operand of the binop to emit.
297 MachineInstr *emitAddSub(
298 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
300 MachineIRBuilder &MIRBuilder) const;
301 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303 MachineIRBuilder &MIRBuilder) const;
305 MachineIRBuilder &MIRBuilder) const;
307 MachineIRBuilder &MIRBuilder) const;
309 MachineIRBuilder &MIRBuilder) const;
311 MachineIRBuilder &MIRBuilder) const;
313 MachineIRBuilder &MIRBuilder) const;
315 MachineIRBuilder &MIRBuilder) const;
316 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
318 MachineIRBuilder &MIRBuilder) const;
319 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
320 const RegisterBank &DstRB, LLT ScalarTy,
321 Register VecReg, unsigned LaneIdx,
322 MachineIRBuilder &MIRBuilder) const;
323 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
325 MachineIRBuilder &MIRBuilder) const;
326 /// Emit a CSet for a FP compare.
327 ///
328 /// \p Dst is expected to be a 32-bit scalar register.
329 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
330 MachineIRBuilder &MIRBuilder) const;
331
332 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
333 /// Might elide the instruction if the previous instruction already sets NZCV
334 /// correctly.
335 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
336
337 /// Emit the overflow op for \p Opcode.
338 ///
339 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
340 /// G_USUBO, etc.
341 std::pair<MachineInstr *, AArch64CC::CondCode>
342 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
343 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
344
345 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
346
347 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
348 /// In some cases this is even possible with OR operations in the expression.
350 MachineIRBuilder &MIB) const;
353 AArch64CC::CondCode Predicate,
355 MachineIRBuilder &MIB) const;
357 bool Negate, Register CCOp,
358 AArch64CC::CondCode Predicate,
359 MachineIRBuilder &MIB) const;
360
361 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
362 /// \p IsNegative is true if the test should be "not zero".
363 /// This will also optimize the test bit instruction when possible.
364 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
365 MachineBasicBlock *DstMBB,
366 MachineIRBuilder &MIB) const;
367
368 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
369 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
370 MachineBasicBlock *DestMBB,
371 MachineIRBuilder &MIB) const;
372
373 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
374 // We use these manually instead of using the importer since it doesn't
375 // support SDNodeXForm.
376 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
377 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
380
381 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
382 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
383 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
384
385 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
386 unsigned Size) const;
387
388 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
389 return selectAddrModeUnscaled(Root, 1);
390 }
391 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 2);
393 }
394 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 4);
396 }
397 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 8);
399 }
400 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 16);
402 }
403
404 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
405 /// from complex pattern matchers like selectAddrModeIndexed().
406 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
407 MachineRegisterInfo &MRI) const;
408
409 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
410 unsigned Size) const;
411 template <int Width>
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
413 return selectAddrModeIndexed(Root, Width / 8);
414 }
415
416 std::optional<bool>
417 isWorthFoldingIntoAddrMode(MachineInstr &MI,
418 const MachineRegisterInfo &MRI) const;
419
420 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
422 bool IsAddrOperand) const;
423 ComplexRendererFns
424 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
425 unsigned SizeInBytes) const;
426
427 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
428 /// or not a shift + extend should be folded into an addressing mode. Returns
429 /// None when this is not profitable or possible.
430 ComplexRendererFns
431 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
432 MachineOperand &Offset, unsigned SizeInBytes,
433 bool WantsExt) const;
434 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
435 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
436 unsigned SizeInBytes) const;
437 template <int Width>
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
439 return selectAddrModeXRO(Root, Width / 8);
440 }
441
442 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
443 unsigned SizeInBytes) const;
444 template <int Width>
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
446 return selectAddrModeWRO(Root, Width / 8);
447 }
448
449 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
450 bool AllowROR = false) const;
451
452 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
453 return selectShiftedRegister(Root);
454 }
455
456 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
457 return selectShiftedRegister(Root, true);
458 }
459
460 /// Given an extend instruction, determine the correct shift-extend type for
461 /// that instruction.
462 ///
463 /// If the instruction is going to be used in a load or store, pass
464 /// \p IsLoadStore = true.
466 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
467 bool IsLoadStore = false) const;
468
469 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
470 ///
471 /// \returns Either \p Reg if no change was necessary, or the new register
472 /// created by moving \p Reg.
473 ///
474 /// Note: This uses emitCopy right now.
475 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
476 MachineIRBuilder &MIB) const;
477
478 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
479
480 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
481
482 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
483 int OpIdx = -1) const;
484 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
485 int OpIdx = -1) const;
486 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
487 int OpIdx = -1) const;
488 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx) const;
490 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
491 int OpIdx = -1) const;
492 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
493 int OpIdx = -1) const;
494 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
495 int OpIdx = -1) const;
496 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
497 const MachineInstr &MI,
498 int OpIdx = -1) const;
499
500 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
501 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
502
503 // Optimization methods.
504 bool tryOptSelect(GSelect &Sel);
505 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
506 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
507 MachineOperand &Predicate,
508 MachineIRBuilder &MIRBuilder) const;
509
510 /// Return true if \p MI is a load or store of \p NumBytes bytes.
511 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
512
513 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
514 /// register zeroed out. In other words, the result of MI has been explicitly
515 /// zero extended.
516 bool isDef32(const MachineInstr &MI) const;
517
518 const AArch64TargetMachine &TM;
519 const AArch64Subtarget &STI;
520 const AArch64InstrInfo &TII;
522 const AArch64RegisterBankInfo &RBI;
523
524 bool ProduceNonFlagSettingCondBr = false;
525
526 // Some cached values used during selection.
527 // We use LR as a live-in register, and we keep track of it here as it can be
528 // clobbered by calls.
529 Register MFReturnAddr;
530
532
533#define GET_GLOBALISEL_PREDICATES_DECL
534#include "AArch64GenGlobalISel.inc"
535#undef GET_GLOBALISEL_PREDICATES_DECL
536
537// We declare the temporaries used by selectImpl() in the class to minimize the
538// cost of constructing placeholder values.
539#define GET_GLOBALISEL_TEMPORARIES_DECL
540#include "AArch64GenGlobalISel.inc"
541#undef GET_GLOBALISEL_TEMPORARIES_DECL
542};
543
544} // end anonymous namespace
545
546#define GET_GLOBALISEL_IMPL
547#include "AArch64GenGlobalISel.inc"
548#undef GET_GLOBALISEL_IMPL
549
550AArch64InstructionSelector::AArch64InstructionSelector(
551 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
552 const AArch64RegisterBankInfo &RBI)
553 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
554 RBI(RBI),
556#include "AArch64GenGlobalISel.inc"
559#include "AArch64GenGlobalISel.inc"
561{
562}
563
564// FIXME: This should be target-independent, inferred from the types declared
565// for each class in the bank.
566//
567/// Given a register bank, and a type, return the smallest register class that
568/// can represent that combination.
569static const TargetRegisterClass *
570getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
571 bool GetAllRegSet = false) {
572 if (RB.getID() == AArch64::GPRRegBankID) {
573 if (Ty.getSizeInBits() <= 32)
574 return GetAllRegSet ? &AArch64::GPR32allRegClass
575 : &AArch64::GPR32RegClass;
576 if (Ty.getSizeInBits() == 64)
577 return GetAllRegSet ? &AArch64::GPR64allRegClass
578 : &AArch64::GPR64RegClass;
579 if (Ty.getSizeInBits() == 128)
580 return &AArch64::XSeqPairsClassRegClass;
581 return nullptr;
582 }
583
584 if (RB.getID() == AArch64::FPRRegBankID) {
585 switch (Ty.getSizeInBits()) {
586 case 8:
587 return &AArch64::FPR8RegClass;
588 case 16:
589 return &AArch64::FPR16RegClass;
590 case 32:
591 return &AArch64::FPR32RegClass;
592 case 64:
593 return &AArch64::FPR64RegClass;
594 case 128:
595 return &AArch64::FPR128RegClass;
596 }
597 return nullptr;
598 }
599
600 return nullptr;
601}
602
603/// Given a register bank, and size in bits, return the smallest register class
604/// that can represent that combination.
605static const TargetRegisterClass *
607 bool GetAllRegSet = false) {
608 if (SizeInBits.isScalable()) {
609 assert(RB.getID() == AArch64::FPRRegBankID &&
610 "Expected FPR regbank for scalable type size");
611 return &AArch64::ZPRRegClass;
612 }
613
614 unsigned RegBankID = RB.getID();
615
616 if (RegBankID == AArch64::GPRRegBankID) {
617 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
626 }
627
628 if (RegBankID == AArch64::FPRRegBankID) {
629 if (SizeInBits.isScalable()) {
630 assert(SizeInBits == TypeSize::getScalable(128) &&
631 "Unexpected scalable register size");
632 return &AArch64::ZPRRegClass;
633 }
634
635 switch (SizeInBits) {
636 default:
637 return nullptr;
638 case 8:
639 return &AArch64::FPR8RegClass;
640 case 16:
641 return &AArch64::FPR16RegClass;
642 case 32:
643 return &AArch64::FPR32RegClass;
644 case 64:
645 return &AArch64::FPR64RegClass;
646 case 128:
647 return &AArch64::FPR128RegClass;
648 }
649 }
650
651 return nullptr;
652}
653
654/// Returns the correct subregister to use for a given register class.
656 const TargetRegisterInfo &TRI, unsigned &SubReg) {
657 switch (TRI.getRegSizeInBits(*RC)) {
658 case 8:
659 SubReg = AArch64::bsub;
660 break;
661 case 16:
662 SubReg = AArch64::hsub;
663 break;
664 case 32:
665 if (RC != &AArch64::FPR32RegClass)
666 SubReg = AArch64::sub_32;
667 else
668 SubReg = AArch64::ssub;
669 break;
670 case 64:
671 SubReg = AArch64::dsub;
672 break;
673 default:
675 dbgs() << "Couldn't find appropriate subregister for register class.");
676 return false;
677 }
678
679 return true;
680}
681
682/// Returns the minimum size the given register bank can hold.
683static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
684 switch (RB.getID()) {
685 case AArch64::GPRRegBankID:
686 return 32;
687 case AArch64::FPRRegBankID:
688 return 8;
689 default:
690 llvm_unreachable("Tried to get minimum size for unknown register bank.");
691 }
692}
693
694/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
695/// Helper function for functions like createDTuple and createQTuple.
696///
697/// \p RegClassIDs - The list of register class IDs available for some tuple of
698/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
699/// expected to contain between 2 and 4 tuple classes.
700///
701/// \p SubRegs - The list of subregister classes associated with each register
702/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
703/// subregister class. The index of each subregister class is expected to
704/// correspond with the index of each register class.
705///
706/// \returns Either the destination register of REG_SEQUENCE instruction that
707/// was created, or the 0th element of \p Regs if \p Regs contains a single
708/// element.
710 const unsigned RegClassIDs[],
711 const unsigned SubRegs[], MachineIRBuilder &MIB) {
712 unsigned NumRegs = Regs.size();
713 if (NumRegs == 1)
714 return Regs[0];
715 assert(NumRegs >= 2 && NumRegs <= 4 &&
716 "Only support between two and 4 registers in a tuple!");
718 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
719 auto RegSequence =
720 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
721 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
722 RegSequence.addUse(Regs[I]);
723 RegSequence.addImm(SubRegs[I]);
724 }
725 return RegSequence.getReg(0);
726}
727
728/// Create a tuple of D-registers using the registers in \p Regs.
730 static const unsigned RegClassIDs[] = {
731 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
732 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
733 AArch64::dsub2, AArch64::dsub3};
734 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
735}
736
737/// Create a tuple of Q-registers using the registers in \p Regs.
739 static const unsigned RegClassIDs[] = {
740 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
741 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
742 AArch64::qsub2, AArch64::qsub3};
743 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
744}
745
746static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
747 auto &MI = *Root.getParent();
748 auto &MBB = *MI.getParent();
749 auto &MF = *MBB.getParent();
750 auto &MRI = MF.getRegInfo();
751 uint64_t Immed;
752 if (Root.isImm())
753 Immed = Root.getImm();
754 else if (Root.isCImm())
755 Immed = Root.getCImm()->getZExtValue();
756 else if (Root.isReg()) {
757 auto ValAndVReg =
759 if (!ValAndVReg)
760 return std::nullopt;
761 Immed = ValAndVReg->Value.getSExtValue();
762 } else
763 return std::nullopt;
764 return Immed;
765}
766
767/// Check whether \p I is a currently unsupported binary operation:
768/// - it has an unsized type
769/// - an operand is not a vreg
770/// - all operands are not in the same bank
771/// These are checks that should someday live in the verifier, but right now,
772/// these are mostly limitations of the aarch64 selector.
773static bool unsupportedBinOp(const MachineInstr &I,
774 const AArch64RegisterBankInfo &RBI,
776 const AArch64RegisterInfo &TRI) {
777 LLT Ty = MRI.getType(I.getOperand(0).getReg());
778 if (!Ty.isValid()) {
779 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
780 return true;
781 }
782
783 const RegisterBank *PrevOpBank = nullptr;
784 for (auto &MO : I.operands()) {
785 // FIXME: Support non-register operands.
786 if (!MO.isReg()) {
787 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
788 return true;
789 }
790
791 // FIXME: Can generic operations have physical registers operands? If
792 // so, this will need to be taught about that, and we'll need to get the
793 // bank out of the minimal class for the register.
794 // Either way, this needs to be documented (and possibly verified).
795 if (!MO.getReg().isVirtual()) {
796 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
797 return true;
798 }
799
800 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
801 if (!OpBank) {
802 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
803 return true;
804 }
805
806 if (PrevOpBank && OpBank != PrevOpBank) {
807 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
808 return true;
809 }
810 PrevOpBank = OpBank;
811 }
812 return false;
813}
814
815/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
816/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
817/// and of size \p OpSize.
818/// \returns \p GenericOpc if the combination is unsupported.
819static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
820 unsigned OpSize) {
821 switch (RegBankID) {
822 case AArch64::GPRRegBankID:
823 if (OpSize == 32) {
824 switch (GenericOpc) {
825 case TargetOpcode::G_SHL:
826 return AArch64::LSLVWr;
827 case TargetOpcode::G_LSHR:
828 return AArch64::LSRVWr;
829 case TargetOpcode::G_ASHR:
830 return AArch64::ASRVWr;
831 default:
832 return GenericOpc;
833 }
834 } else if (OpSize == 64) {
835 switch (GenericOpc) {
836 case TargetOpcode::G_PTR_ADD:
837 return AArch64::ADDXrr;
838 case TargetOpcode::G_SHL:
839 return AArch64::LSLVXr;
840 case TargetOpcode::G_LSHR:
841 return AArch64::LSRVXr;
842 case TargetOpcode::G_ASHR:
843 return AArch64::ASRVXr;
844 default:
845 return GenericOpc;
846 }
847 }
848 break;
849 case AArch64::FPRRegBankID:
850 switch (OpSize) {
851 case 32:
852 switch (GenericOpc) {
853 case TargetOpcode::G_FADD:
854 return AArch64::FADDSrr;
855 case TargetOpcode::G_FSUB:
856 return AArch64::FSUBSrr;
857 case TargetOpcode::G_FMUL:
858 return AArch64::FMULSrr;
859 case TargetOpcode::G_FDIV:
860 return AArch64::FDIVSrr;
861 default:
862 return GenericOpc;
863 }
864 case 64:
865 switch (GenericOpc) {
866 case TargetOpcode::G_FADD:
867 return AArch64::FADDDrr;
868 case TargetOpcode::G_FSUB:
869 return AArch64::FSUBDrr;
870 case TargetOpcode::G_FMUL:
871 return AArch64::FMULDrr;
872 case TargetOpcode::G_FDIV:
873 return AArch64::FDIVDrr;
874 case TargetOpcode::G_OR:
875 return AArch64::ORRv8i8;
876 default:
877 return GenericOpc;
878 }
879 }
880 break;
881 }
882 return GenericOpc;
883}
884
885/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
886/// appropriate for the (value) register bank \p RegBankID and of memory access
887/// size \p OpSize. This returns the variant with the base+unsigned-immediate
888/// addressing mode (e.g., LDRXui).
889/// \returns \p GenericOpc if the combination is unsupported.
890static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
891 unsigned OpSize) {
892 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
893 switch (RegBankID) {
894 case AArch64::GPRRegBankID:
895 switch (OpSize) {
896 case 8:
897 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
898 case 16:
899 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
900 case 32:
901 return isStore ? AArch64::STRWui : AArch64::LDRWui;
902 case 64:
903 return isStore ? AArch64::STRXui : AArch64::LDRXui;
904 }
905 break;
906 case AArch64::FPRRegBankID:
907 switch (OpSize) {
908 case 8:
909 return isStore ? AArch64::STRBui : AArch64::LDRBui;
910 case 16:
911 return isStore ? AArch64::STRHui : AArch64::LDRHui;
912 case 32:
913 return isStore ? AArch64::STRSui : AArch64::LDRSui;
914 case 64:
915 return isStore ? AArch64::STRDui : AArch64::LDRDui;
916 case 128:
917 return isStore ? AArch64::STRQui : AArch64::LDRQui;
918 }
919 break;
920 }
921 return GenericOpc;
922}
923
924/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
925/// to \p *To.
926///
927/// E.g "To = COPY SrcReg:SubReg"
929 const RegisterBankInfo &RBI, Register SrcReg,
930 const TargetRegisterClass *To, unsigned SubReg) {
931 assert(SrcReg.isValid() && "Expected a valid source register?");
932 assert(To && "Destination register class cannot be null");
933 assert(SubReg && "Expected a valid subregister");
934
935 MachineIRBuilder MIB(I);
936 auto SubRegCopy =
937 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
938 MachineOperand &RegOp = I.getOperand(1);
939 RegOp.setReg(SubRegCopy.getReg(0));
940
941 // It's possible that the destination register won't be constrained. Make
942 // sure that happens.
943 if (!I.getOperand(0).getReg().isPhysical())
944 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
945
946 return true;
947}
948
949/// Helper function to get the source and destination register classes for a
950/// copy. Returns a std::pair containing the source register class for the
951/// copy, and the destination register class for the copy. If a register class
952/// cannot be determined, then it will be nullptr.
953static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
956 const RegisterBankInfo &RBI) {
957 Register DstReg = I.getOperand(0).getReg();
958 Register SrcReg = I.getOperand(1).getReg();
959 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
960 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
961
962 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
963 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
964
965 // Special casing for cross-bank copies of s1s. We can technically represent
966 // a 1-bit value with any size of register. The minimum size for a GPR is 32
967 // bits. So, we need to put the FPR on 32 bits as well.
968 //
969 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
970 // then we can pull it into the helpers that get the appropriate class for a
971 // register bank. Or make a new helper that carries along some constraint
972 // information.
973 if (SrcRegBank != DstRegBank &&
974 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
975 SrcSize = DstSize = TypeSize::getFixed(32);
976
977 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
978 getMinClassForRegBank(DstRegBank, DstSize, true)};
979}
980
981// FIXME: We need some sort of API in RBI/TRI to allow generic code to
982// constrain operands of simple instructions given a TargetRegisterClass
983// and LLT
985 const RegisterBankInfo &RBI) {
986 for (MachineOperand &MO : I.operands()) {
987 if (!MO.isReg())
988 continue;
989 Register Reg = MO.getReg();
990 if (!Reg)
991 continue;
992 if (Reg.isPhysical())
993 continue;
994 LLT Ty = MRI.getType(Reg);
995 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
996 const TargetRegisterClass *RC =
997 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
998 if (!RC) {
999 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1000 RC = getRegClassForTypeOnBank(Ty, RB);
1001 if (!RC) {
1002 LLVM_DEBUG(
1003 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1004 break;
1005 }
1006 }
1007 RBI.constrainGenericRegister(Reg, *RC, MRI);
1008 }
1009
1010 return true;
1011}
1012
1015 const RegisterBankInfo &RBI) {
1016 Register DstReg = I.getOperand(0).getReg();
1017 Register SrcReg = I.getOperand(1).getReg();
1018 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1019 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1020
1021 // Find the correct register classes for the source and destination registers.
1022 const TargetRegisterClass *SrcRC;
1023 const TargetRegisterClass *DstRC;
1024 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1025
1026 if (!DstRC) {
1027 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1028 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1029 return false;
1030 }
1031
1032 // Is this a copy? If so, then we may need to insert a subregister copy.
1033 if (I.isCopy()) {
1034 // Yes. Check if there's anything to fix up.
1035 if (!SrcRC) {
1036 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1037 return false;
1038 }
1039
1040 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1041 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1042 unsigned SubReg;
1043
1044 // If the source bank doesn't support a subregister copy small enough,
1045 // then we first need to copy to the destination bank.
1046 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1047 const TargetRegisterClass *DstTempRC =
1048 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1049 getSubRegForClass(DstRC, TRI, SubReg);
1050
1051 MachineIRBuilder MIB(I);
1052 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1053 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1054 } else if (SrcSize > DstSize) {
1055 // If the source register is bigger than the destination we need to
1056 // perform a subregister copy.
1057 const TargetRegisterClass *SubRegRC =
1058 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1059 getSubRegForClass(SubRegRC, TRI, SubReg);
1060 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1061 } else if (DstSize > SrcSize) {
1062 // If the destination register is bigger than the source we need to do
1063 // a promotion using SUBREG_TO_REG.
1064 const TargetRegisterClass *PromotionRC =
1065 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1066 getSubRegForClass(SrcRC, TRI, SubReg);
1067
1068 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1069 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1070 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1071 .addImm(0)
1072 .addUse(SrcReg)
1073 .addImm(SubReg);
1074 MachineOperand &RegOp = I.getOperand(1);
1075 RegOp.setReg(PromoteReg);
1076 }
1077
1078 // If the destination is a physical register, then there's nothing to
1079 // change, so we're done.
1080 if (DstReg.isPhysical())
1081 return true;
1082 }
1083
1084 // No need to constrain SrcReg. It will get constrained when we hit another
1085 // of its use or its defs. Copies do not have constraints.
1086 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1087 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1088 << " operand\n");
1089 return false;
1090 }
1091
1092 // If this a GPR ZEXT that we want to just reduce down into a copy.
1093 // The sizes will be mismatched with the source < 32b but that's ok.
1094 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1095 I.setDesc(TII.get(AArch64::COPY));
1096 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1097 return selectCopy(I, TII, MRI, TRI, RBI);
1098 }
1099
1100 I.setDesc(TII.get(AArch64::COPY));
1101 return true;
1102}
1103
1104static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1105 if (!DstTy.isScalar() || !SrcTy.isScalar())
1106 return GenericOpc;
1107
1108 const unsigned DstSize = DstTy.getSizeInBits();
1109 const unsigned SrcSize = SrcTy.getSizeInBits();
1110
1111 switch (DstSize) {
1112 case 32:
1113 switch (SrcSize) {
1114 case 32:
1115 switch (GenericOpc) {
1116 case TargetOpcode::G_SITOFP:
1117 return AArch64::SCVTFUWSri;
1118 case TargetOpcode::G_UITOFP:
1119 return AArch64::UCVTFUWSri;
1120 case TargetOpcode::G_FPTOSI:
1121 return AArch64::FCVTZSUWSr;
1122 case TargetOpcode::G_FPTOUI:
1123 return AArch64::FCVTZUUWSr;
1124 default:
1125 return GenericOpc;
1126 }
1127 case 64:
1128 switch (GenericOpc) {
1129 case TargetOpcode::G_SITOFP:
1130 return AArch64::SCVTFUXSri;
1131 case TargetOpcode::G_UITOFP:
1132 return AArch64::UCVTFUXSri;
1133 case TargetOpcode::G_FPTOSI:
1134 return AArch64::FCVTZSUWDr;
1135 case TargetOpcode::G_FPTOUI:
1136 return AArch64::FCVTZUUWDr;
1137 default:
1138 return GenericOpc;
1139 }
1140 default:
1141 return GenericOpc;
1142 }
1143 case 64:
1144 switch (SrcSize) {
1145 case 32:
1146 switch (GenericOpc) {
1147 case TargetOpcode::G_SITOFP:
1148 return AArch64::SCVTFUWDri;
1149 case TargetOpcode::G_UITOFP:
1150 return AArch64::UCVTFUWDri;
1151 case TargetOpcode::G_FPTOSI:
1152 return AArch64::FCVTZSUXSr;
1153 case TargetOpcode::G_FPTOUI:
1154 return AArch64::FCVTZUUXSr;
1155 default:
1156 return GenericOpc;
1157 }
1158 case 64:
1159 switch (GenericOpc) {
1160 case TargetOpcode::G_SITOFP:
1161 return AArch64::SCVTFUXDri;
1162 case TargetOpcode::G_UITOFP:
1163 return AArch64::UCVTFUXDri;
1164 case TargetOpcode::G_FPTOSI:
1165 return AArch64::FCVTZSUXDr;
1166 case TargetOpcode::G_FPTOUI:
1167 return AArch64::FCVTZUUXDr;
1168 default:
1169 return GenericOpc;
1170 }
1171 default:
1172 return GenericOpc;
1173 }
1174 default:
1175 return GenericOpc;
1176 };
1177 return GenericOpc;
1178}
1179
1181AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1183 MachineIRBuilder &MIB) const {
1184 MachineRegisterInfo &MRI = *MIB.getMRI();
1185 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1186 RBI.getRegBank(True, MRI, TRI)->getID() &&
1187 "Expected both select operands to have the same regbank?");
1188 LLT Ty = MRI.getType(True);
1189 if (Ty.isVector())
1190 return nullptr;
1191 const unsigned Size = Ty.getSizeInBits();
1192 assert((Size == 32 || Size == 64) &&
1193 "Expected 32 bit or 64 bit select only?");
1194 const bool Is32Bit = Size == 32;
1195 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1196 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1197 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1199 return &*FCSel;
1200 }
1201
1202 // By default, we'll try and emit a CSEL.
1203 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1204 bool Optimized = false;
1205 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1206 &Optimized](Register &Reg, Register &OtherReg,
1207 bool Invert) {
1208 if (Optimized)
1209 return false;
1210
1211 // Attempt to fold:
1212 //
1213 // %sub = G_SUB 0, %x
1214 // %select = G_SELECT cc, %reg, %sub
1215 //
1216 // Into:
1217 // %select = CSNEG %reg, %x, cc
1218 Register MatchReg;
1219 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1220 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1221 Reg = MatchReg;
1222 if (Invert) {
1224 std::swap(Reg, OtherReg);
1225 }
1226 return true;
1227 }
1228
1229 // Attempt to fold:
1230 //
1231 // %xor = G_XOR %x, -1
1232 // %select = G_SELECT cc, %reg, %xor
1233 //
1234 // Into:
1235 // %select = CSINV %reg, %x, cc
1236 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1237 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1238 Reg = MatchReg;
1239 if (Invert) {
1241 std::swap(Reg, OtherReg);
1242 }
1243 return true;
1244 }
1245
1246 // Attempt to fold:
1247 //
1248 // %add = G_ADD %x, 1
1249 // %select = G_SELECT cc, %reg, %add
1250 //
1251 // Into:
1252 // %select = CSINC %reg, %x, cc
1253 if (mi_match(Reg, MRI,
1254 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1255 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1256 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1257 Reg = MatchReg;
1258 if (Invert) {
1260 std::swap(Reg, OtherReg);
1261 }
1262 return true;
1263 }
1264
1265 return false;
1266 };
1267
1268 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1269 // true/false values are constants.
1270 // FIXME: All of these patterns already exist in tablegen. We should be
1271 // able to import these.
1272 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1273 &Optimized]() {
1274 if (Optimized)
1275 return false;
1276 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1277 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1278 if (!TrueCst && !FalseCst)
1279 return false;
1280
1281 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1282 if (TrueCst && FalseCst) {
1283 int64_t T = TrueCst->Value.getSExtValue();
1284 int64_t F = FalseCst->Value.getSExtValue();
1285
1286 if (T == 0 && F == 1) {
1287 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1288 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1289 True = ZReg;
1290 False = ZReg;
1291 return true;
1292 }
1293
1294 if (T == 0 && F == -1) {
1295 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1296 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1297 True = ZReg;
1298 False = ZReg;
1299 return true;
1300 }
1301 }
1302
1303 if (TrueCst) {
1304 int64_t T = TrueCst->Value.getSExtValue();
1305 if (T == 1) {
1306 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1307 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1308 True = False;
1309 False = ZReg;
1311 return true;
1312 }
1313
1314 if (T == -1) {
1315 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1316 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1317 True = False;
1318 False = ZReg;
1320 return true;
1321 }
1322 }
1323
1324 if (FalseCst) {
1325 int64_t F = FalseCst->Value.getSExtValue();
1326 if (F == 1) {
1327 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1328 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1329 False = ZReg;
1330 return true;
1331 }
1332
1333 if (F == -1) {
1334 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1335 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1336 False = ZReg;
1337 return true;
1338 }
1339 }
1340 return false;
1341 };
1342
1343 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1344 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1345 Optimized |= TryOptSelectCst();
1346 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1348 return &*SelectInst;
1349}
1350
1352 switch (P) {
1353 default:
1354 llvm_unreachable("Unknown condition code!");
1355 case CmpInst::ICMP_NE:
1356 return AArch64CC::NE;
1357 case CmpInst::ICMP_EQ:
1358 return AArch64CC::EQ;
1359 case CmpInst::ICMP_SGT:
1360 return AArch64CC::GT;
1361 case CmpInst::ICMP_SGE:
1362 return AArch64CC::GE;
1363 case CmpInst::ICMP_SLT:
1364 return AArch64CC::LT;
1365 case CmpInst::ICMP_SLE:
1366 return AArch64CC::LE;
1367 case CmpInst::ICMP_UGT:
1368 return AArch64CC::HI;
1369 case CmpInst::ICMP_UGE:
1370 return AArch64CC::HS;
1371 case CmpInst::ICMP_ULT:
1372 return AArch64CC::LO;
1373 case CmpInst::ICMP_ULE:
1374 return AArch64CC::LS;
1375 }
1376}
1377
1378/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1380 AArch64CC::CondCode &CondCode,
1381 AArch64CC::CondCode &CondCode2) {
1382 CondCode2 = AArch64CC::AL;
1383 switch (CC) {
1384 default:
1385 llvm_unreachable("Unknown FP condition!");
1386 case CmpInst::FCMP_OEQ:
1387 CondCode = AArch64CC::EQ;
1388 break;
1389 case CmpInst::FCMP_OGT:
1390 CondCode = AArch64CC::GT;
1391 break;
1392 case CmpInst::FCMP_OGE:
1393 CondCode = AArch64CC::GE;
1394 break;
1395 case CmpInst::FCMP_OLT:
1396 CondCode = AArch64CC::MI;
1397 break;
1398 case CmpInst::FCMP_OLE:
1399 CondCode = AArch64CC::LS;
1400 break;
1401 case CmpInst::FCMP_ONE:
1402 CondCode = AArch64CC::MI;
1403 CondCode2 = AArch64CC::GT;
1404 break;
1405 case CmpInst::FCMP_ORD:
1406 CondCode = AArch64CC::VC;
1407 break;
1408 case CmpInst::FCMP_UNO:
1409 CondCode = AArch64CC::VS;
1410 break;
1411 case CmpInst::FCMP_UEQ:
1412 CondCode = AArch64CC::EQ;
1413 CondCode2 = AArch64CC::VS;
1414 break;
1415 case CmpInst::FCMP_UGT:
1416 CondCode = AArch64CC::HI;
1417 break;
1418 case CmpInst::FCMP_UGE:
1419 CondCode = AArch64CC::PL;
1420 break;
1421 case CmpInst::FCMP_ULT:
1422 CondCode = AArch64CC::LT;
1423 break;
1424 case CmpInst::FCMP_ULE:
1425 CondCode = AArch64CC::LE;
1426 break;
1427 case CmpInst::FCMP_UNE:
1428 CondCode = AArch64CC::NE;
1429 break;
1430 }
1431}
1432
1433/// Convert an IR fp condition code to an AArch64 CC.
1434/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1435/// should be AND'ed instead of OR'ed.
1437 AArch64CC::CondCode &CondCode,
1438 AArch64CC::CondCode &CondCode2) {
1439 CondCode2 = AArch64CC::AL;
1440 switch (CC) {
1441 default:
1442 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1443 assert(CondCode2 == AArch64CC::AL);
1444 break;
1445 case CmpInst::FCMP_ONE:
1446 // (a one b)
1447 // == ((a olt b) || (a ogt b))
1448 // == ((a ord b) && (a une b))
1449 CondCode = AArch64CC::VC;
1450 CondCode2 = AArch64CC::NE;
1451 break;
1452 case CmpInst::FCMP_UEQ:
1453 // (a ueq b)
1454 // == ((a uno b) || (a oeq b))
1455 // == ((a ule b) && (a uge b))
1456 CondCode = AArch64CC::PL;
1457 CondCode2 = AArch64CC::LE;
1458 break;
1459 }
1460}
1461
1462/// Return a register which can be used as a bit to test in a TB(N)Z.
1463static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1465 assert(Reg.isValid() && "Expected valid register!");
1466 bool HasZext = false;
1467 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1468 unsigned Opc = MI->getOpcode();
1469
1470 if (!MI->getOperand(0).isReg() ||
1471 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1472 break;
1473
1474 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1475 //
1476 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1477 // on the truncated x is the same as the bit number on x.
1478 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1479 Opc == TargetOpcode::G_TRUNC) {
1480 if (Opc == TargetOpcode::G_ZEXT)
1481 HasZext = true;
1482
1483 Register NextReg = MI->getOperand(1).getReg();
1484 // Did we find something worth folding?
1485 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1486 break;
1487
1488 // NextReg is worth folding. Keep looking.
1489 Reg = NextReg;
1490 continue;
1491 }
1492
1493 // Attempt to find a suitable operation with a constant on one side.
1494 std::optional<uint64_t> C;
1495 Register TestReg;
1496 switch (Opc) {
1497 default:
1498 break;
1499 case TargetOpcode::G_AND:
1500 case TargetOpcode::G_XOR: {
1501 TestReg = MI->getOperand(1).getReg();
1502 Register ConstantReg = MI->getOperand(2).getReg();
1503 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1504 if (!VRegAndVal) {
1505 // AND commutes, check the other side for a constant.
1506 // FIXME: Can we canonicalize the constant so that it's always on the
1507 // same side at some point earlier?
1508 std::swap(ConstantReg, TestReg);
1509 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1510 }
1511 if (VRegAndVal) {
1512 if (HasZext)
1513 C = VRegAndVal->Value.getZExtValue();
1514 else
1515 C = VRegAndVal->Value.getSExtValue();
1516 }
1517 break;
1518 }
1519 case TargetOpcode::G_ASHR:
1520 case TargetOpcode::G_LSHR:
1521 case TargetOpcode::G_SHL: {
1522 TestReg = MI->getOperand(1).getReg();
1523 auto VRegAndVal =
1524 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1525 if (VRegAndVal)
1526 C = VRegAndVal->Value.getSExtValue();
1527 break;
1528 }
1529 }
1530
1531 // Didn't find a constant or viable register. Bail out of the loop.
1532 if (!C || !TestReg.isValid())
1533 break;
1534
1535 // We found a suitable instruction with a constant. Check to see if we can
1536 // walk through the instruction.
1537 Register NextReg;
1538 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1539 switch (Opc) {
1540 default:
1541 break;
1542 case TargetOpcode::G_AND:
1543 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1544 if ((*C >> Bit) & 1)
1545 NextReg = TestReg;
1546 break;
1547 case TargetOpcode::G_SHL:
1548 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1549 // the type of the register.
1550 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1551 NextReg = TestReg;
1552 Bit = Bit - *C;
1553 }
1554 break;
1555 case TargetOpcode::G_ASHR:
1556 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1557 // in x
1558 NextReg = TestReg;
1559 Bit = Bit + *C;
1560 if (Bit >= TestRegSize)
1561 Bit = TestRegSize - 1;
1562 break;
1563 case TargetOpcode::G_LSHR:
1564 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1565 if ((Bit + *C) < TestRegSize) {
1566 NextReg = TestReg;
1567 Bit = Bit + *C;
1568 }
1569 break;
1570 case TargetOpcode::G_XOR:
1571 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1572 // appropriate.
1573 //
1574 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1575 //
1576 // tbz x', b -> tbnz x, b
1577 //
1578 // Because x' only has the b-th bit set if x does not.
1579 if ((*C >> Bit) & 1)
1580 Invert = !Invert;
1581 NextReg = TestReg;
1582 break;
1583 }
1584
1585 // Check if we found anything worth folding.
1586 if (!NextReg.isValid())
1587 return Reg;
1588 Reg = NextReg;
1589 }
1590
1591 return Reg;
1592}
1593
1594MachineInstr *AArch64InstructionSelector::emitTestBit(
1595 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1596 MachineIRBuilder &MIB) const {
1597 assert(TestReg.isValid());
1598 assert(ProduceNonFlagSettingCondBr &&
1599 "Cannot emit TB(N)Z with speculation tracking!");
1600 MachineRegisterInfo &MRI = *MIB.getMRI();
1601
1602 // Attempt to optimize the test bit by walking over instructions.
1603 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1604 LLT Ty = MRI.getType(TestReg);
1605 unsigned Size = Ty.getSizeInBits();
1606 assert(!Ty.isVector() && "Expected a scalar!");
1607 assert(Bit < 64 && "Bit is too large!");
1608
1609 // When the test register is a 64-bit register, we have to narrow to make
1610 // TBNZW work.
1611 bool UseWReg = Bit < 32;
1612 unsigned NecessarySize = UseWReg ? 32 : 64;
1613 if (Size != NecessarySize)
1614 TestReg = moveScalarRegClass(
1615 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1616 MIB);
1617
1618 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1619 {AArch64::TBZW, AArch64::TBNZW}};
1620 unsigned Opc = OpcTable[UseWReg][IsNegative];
1621 auto TestBitMI =
1622 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1623 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1624 return &*TestBitMI;
1625}
1626
1627bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1628 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1629 MachineIRBuilder &MIB) const {
1630 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1631 // Given something like this:
1632 //
1633 // %x = ...Something...
1634 // %one = G_CONSTANT i64 1
1635 // %zero = G_CONSTANT i64 0
1636 // %and = G_AND %x, %one
1637 // %cmp = G_ICMP intpred(ne), %and, %zero
1638 // %cmp_trunc = G_TRUNC %cmp
1639 // G_BRCOND %cmp_trunc, %bb.3
1640 //
1641 // We want to try and fold the AND into the G_BRCOND and produce either a
1642 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1643 //
1644 // In this case, we'd get
1645 //
1646 // TBNZ %x %bb.3
1647 //
1648
1649 // Check if the AND has a constant on its RHS which we can use as a mask.
1650 // If it's a power of 2, then it's the same as checking a specific bit.
1651 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1652 auto MaybeBit = getIConstantVRegValWithLookThrough(
1653 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1654 if (!MaybeBit)
1655 return false;
1656
1657 int32_t Bit = MaybeBit->Value.exactLogBase2();
1658 if (Bit < 0)
1659 return false;
1660
1661 Register TestReg = AndInst.getOperand(1).getReg();
1662
1663 // Emit a TB(N)Z.
1664 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1665 return true;
1666}
1667
1668MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1669 bool IsNegative,
1670 MachineBasicBlock *DestMBB,
1671 MachineIRBuilder &MIB) const {
1672 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1673 MachineRegisterInfo &MRI = *MIB.getMRI();
1674 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1675 AArch64::GPRRegBankID &&
1676 "Expected GPRs only?");
1677 auto Ty = MRI.getType(CompareReg);
1678 unsigned Width = Ty.getSizeInBits();
1679 assert(!Ty.isVector() && "Expected scalar only?");
1680 assert(Width <= 64 && "Expected width to be at most 64?");
1681 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1682 {AArch64::CBNZW, AArch64::CBNZX}};
1683 unsigned Opc = OpcTable[IsNegative][Width == 64];
1684 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1685 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1686 return &*BranchMI;
1687}
1688
1689bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1690 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1691 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1692 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1693 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1694 // totally clean. Some of them require two branches to implement.
1695 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1696 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1697 Pred);
1698 AArch64CC::CondCode CC1, CC2;
1699 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1700 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1701 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1702 if (CC2 != AArch64CC::AL)
1703 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1704 I.eraseFromParent();
1705 return true;
1706}
1707
1708bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1709 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1710 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1711 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1712 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1713 //
1714 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1715 // instructions will not be produced, as they are conditional branch
1716 // instructions that do not set flags.
1717 if (!ProduceNonFlagSettingCondBr)
1718 return false;
1719
1720 MachineRegisterInfo &MRI = *MIB.getMRI();
1721 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1722 auto Pred =
1723 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1724 Register LHS = ICmp.getOperand(2).getReg();
1725 Register RHS = ICmp.getOperand(3).getReg();
1726
1727 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1728 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1729 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1730
1731 // When we can emit a TB(N)Z, prefer that.
1732 //
1733 // Handle non-commutative condition codes first.
1734 // Note that we don't want to do this when we have a G_AND because it can
1735 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1736 if (VRegAndVal && !AndInst) {
1737 int64_t C = VRegAndVal->Value.getSExtValue();
1738
1739 // When we have a greater-than comparison, we can just test if the msb is
1740 // zero.
1741 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1742 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1743 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1744 I.eraseFromParent();
1745 return true;
1746 }
1747
1748 // When we have a less than comparison, we can just test if the msb is not
1749 // zero.
1750 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1751 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1752 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1753 I.eraseFromParent();
1754 return true;
1755 }
1756
1757 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1758 // we can test if the msb is zero.
1759 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1760 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1761 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1762 I.eraseFromParent();
1763 return true;
1764 }
1765 }
1766
1767 // Attempt to handle commutative condition codes. Right now, that's only
1768 // eq/ne.
1769 if (ICmpInst::isEquality(Pred)) {
1770 if (!VRegAndVal) {
1771 std::swap(RHS, LHS);
1772 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1773 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1774 }
1775
1776 if (VRegAndVal && VRegAndVal->Value == 0) {
1777 // If there's a G_AND feeding into this branch, try to fold it away by
1778 // emitting a TB(N)Z instead.
1779 //
1780 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1781 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1782 // would be redundant.
1783 if (AndInst &&
1784 tryOptAndIntoCompareBranch(
1785 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1786 I.eraseFromParent();
1787 return true;
1788 }
1789
1790 // Otherwise, try to emit a CB(N)Z instead.
1791 auto LHSTy = MRI.getType(LHS);
1792 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1793 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1794 I.eraseFromParent();
1795 return true;
1796 }
1797 }
1798 }
1799
1800 return false;
1801}
1802
1803bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1804 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1805 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1806 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1807 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1808 return true;
1809
1810 // Couldn't optimize. Emit a compare + a Bcc.
1811 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1812 auto PredOp = ICmp.getOperand(1);
1813 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1815 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1816 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1817 I.eraseFromParent();
1818 return true;
1819}
1820
1821bool AArch64InstructionSelector::selectCompareBranch(
1823 Register CondReg = I.getOperand(0).getReg();
1824 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1825 // Try to select the G_BRCOND using whatever is feeding the condition if
1826 // possible.
1827 unsigned CCMIOpc = CCMI->getOpcode();
1828 if (CCMIOpc == TargetOpcode::G_FCMP)
1829 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1830 if (CCMIOpc == TargetOpcode::G_ICMP)
1831 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1832
1833 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1834 // instructions will not be produced, as they are conditional branch
1835 // instructions that do not set flags.
1836 if (ProduceNonFlagSettingCondBr) {
1837 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1838 I.getOperand(1).getMBB(), MIB);
1839 I.eraseFromParent();
1840 return true;
1841 }
1842
1843 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1844 auto TstMI =
1845 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1847 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1849 .addMBB(I.getOperand(1).getMBB());
1850 I.eraseFromParent();
1851 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1852}
1853
1854/// Returns the element immediate value of a vector shift operand if found.
1855/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1856static std::optional<int64_t> getVectorShiftImm(Register Reg,
1858 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1859 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1860 return getAArch64VectorSplatScalar(*OpMI, MRI);
1861}
1862
1863/// Matches and returns the shift immediate value for a SHL instruction given
1864/// a shift operand.
1865static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1867 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1868 if (!ShiftImm)
1869 return std::nullopt;
1870 // Check the immediate is in range for a SHL.
1871 int64_t Imm = *ShiftImm;
1872 if (Imm < 0)
1873 return std::nullopt;
1874 switch (SrcTy.getElementType().getSizeInBits()) {
1875 default:
1876 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1877 return std::nullopt;
1878 case 8:
1879 if (Imm > 7)
1880 return std::nullopt;
1881 break;
1882 case 16:
1883 if (Imm > 15)
1884 return std::nullopt;
1885 break;
1886 case 32:
1887 if (Imm > 31)
1888 return std::nullopt;
1889 break;
1890 case 64:
1891 if (Imm > 63)
1892 return std::nullopt;
1893 break;
1894 }
1895 return Imm;
1896}
1897
1898bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1900 assert(I.getOpcode() == TargetOpcode::G_SHL);
1901 Register DstReg = I.getOperand(0).getReg();
1902 const LLT Ty = MRI.getType(DstReg);
1903 Register Src1Reg = I.getOperand(1).getReg();
1904 Register Src2Reg = I.getOperand(2).getReg();
1905
1906 if (!Ty.isVector())
1907 return false;
1908
1909 // Check if we have a vector of constants on RHS that we can select as the
1910 // immediate form.
1911 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1912
1913 unsigned Opc = 0;
1914 if (Ty == LLT::fixed_vector(2, 64)) {
1915 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1916 } else if (Ty == LLT::fixed_vector(4, 32)) {
1917 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1918 } else if (Ty == LLT::fixed_vector(2, 32)) {
1919 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1920 } else if (Ty == LLT::fixed_vector(4, 16)) {
1921 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1922 } else if (Ty == LLT::fixed_vector(8, 16)) {
1923 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1924 } else if (Ty == LLT::fixed_vector(16, 8)) {
1925 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1926 } else if (Ty == LLT::fixed_vector(8, 8)) {
1927 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1928 } else {
1929 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1930 return false;
1931 }
1932
1933 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1934 if (ImmVal)
1935 Shl.addImm(*ImmVal);
1936 else
1937 Shl.addUse(Src2Reg);
1939 I.eraseFromParent();
1940 return true;
1941}
1942
1943bool AArch64InstructionSelector::selectVectorAshrLshr(
1945 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1946 I.getOpcode() == TargetOpcode::G_LSHR);
1947 Register DstReg = I.getOperand(0).getReg();
1948 const LLT Ty = MRI.getType(DstReg);
1949 Register Src1Reg = I.getOperand(1).getReg();
1950 Register Src2Reg = I.getOperand(2).getReg();
1951
1952 if (!Ty.isVector())
1953 return false;
1954
1955 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1956
1957 // We expect the immediate case to be lowered in the PostLegalCombiner to
1958 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1959
1960 // There is not a shift right register instruction, but the shift left
1961 // register instruction takes a signed value, where negative numbers specify a
1962 // right shift.
1963
1964 unsigned Opc = 0;
1965 unsigned NegOpc = 0;
1966 const TargetRegisterClass *RC =
1967 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1968 if (Ty == LLT::fixed_vector(2, 64)) {
1969 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1970 NegOpc = AArch64::NEGv2i64;
1971 } else if (Ty == LLT::fixed_vector(4, 32)) {
1972 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1973 NegOpc = AArch64::NEGv4i32;
1974 } else if (Ty == LLT::fixed_vector(2, 32)) {
1975 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1976 NegOpc = AArch64::NEGv2i32;
1977 } else if (Ty == LLT::fixed_vector(4, 16)) {
1978 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1979 NegOpc = AArch64::NEGv4i16;
1980 } else if (Ty == LLT::fixed_vector(8, 16)) {
1981 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1982 NegOpc = AArch64::NEGv8i16;
1983 } else if (Ty == LLT::fixed_vector(16, 8)) {
1984 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1985 NegOpc = AArch64::NEGv16i8;
1986 } else if (Ty == LLT::fixed_vector(8, 8)) {
1987 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1988 NegOpc = AArch64::NEGv8i8;
1989 } else {
1990 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1991 return false;
1992 }
1993
1994 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1996 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1998 I.eraseFromParent();
1999 return true;
2000}
2001
2002bool AArch64InstructionSelector::selectVaStartAAPCS(
2004
2005 if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
2006 MF.getFunction().isVarArg()))
2007 return false;
2008
2009 // The layout of the va_list struct is specified in the AArch64 Procedure Call
2010 // Standard, section 10.1.5.
2011
2012 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2013 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
2014 const auto *PtrRegClass =
2015 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
2016
2017 const MCInstrDesc &MCIDAddAddr =
2018 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
2019 const MCInstrDesc &MCIDStoreAddr =
2020 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
2021
2022 /*
2023 * typedef struct va_list {
2024 * void * stack; // next stack param
2025 * void * gr_top; // end of GP arg reg save area
2026 * void * vr_top; // end of FP/SIMD arg reg save area
2027 * int gr_offs; // offset from gr_top to next GP register arg
2028 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
2029 * } va_list;
2030 */
2031 const auto VAList = I.getOperand(0).getReg();
2032
2033 // Our current offset in bytes from the va_list struct (VAList).
2034 unsigned OffsetBytes = 0;
2035
2036 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
2037 // and increment OffsetBytes by PtrSize.
2038 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
2039 const Register Top = MRI.createVirtualRegister(PtrRegClass);
2040 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
2041 .addDef(Top)
2042 .addFrameIndex(FrameIndex)
2043 .addImm(Imm)
2044 .addImm(0);
2046
2047 const auto *MMO = *I.memoperands_begin();
2048 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
2049 .addUse(Top)
2050 .addUse(VAList)
2051 .addImm(OffsetBytes / PtrSize)
2053 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2054 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2056
2057 OffsetBytes += PtrSize;
2058 };
2059
2060 // void* stack at offset 0
2061 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2062
2063 // void* gr_top at offset 8 (4 on ILP32)
2064 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2065 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2066
2067 // void* vr_top at offset 16 (8 on ILP32)
2068 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2069 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2070
2071 // Helper function to store a 4-byte integer constant to VAList at offset
2072 // OffsetBytes, and increment OffsetBytes by 4.
2073 const auto PushIntConstant = [&](const int32_t Value) {
2074 constexpr int IntSize = 4;
2075 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2076 auto MIB =
2077 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2078 .addDef(Temp)
2079 .addImm(Value);
2081
2082 const auto *MMO = *I.memoperands_begin();
2083 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2084 .addUse(Temp)
2085 .addUse(VAList)
2086 .addImm(OffsetBytes / IntSize)
2088 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2089 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2091 OffsetBytes += IntSize;
2092 };
2093
2094 // int gr_offs at offset 24 (12 on ILP32)
2095 PushIntConstant(-static_cast<int32_t>(GPRSize));
2096
2097 // int vr_offs at offset 28 (16 on ILP32)
2098 PushIntConstant(-static_cast<int32_t>(FPRSize));
2099
2100 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2101
2102 I.eraseFromParent();
2103 return true;
2104}
2105
2106bool AArch64InstructionSelector::selectVaStartDarwin(
2109 Register ListReg = I.getOperand(0).getReg();
2110
2111 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2112
2113 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2116 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2117 ? FuncInfo->getVarArgsGPRIndex()
2118 : FuncInfo->getVarArgsStackIndex();
2119 }
2120
2121 auto MIB =
2122 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2123 .addDef(ArgsAddrReg)
2124 .addFrameIndex(FrameIdx)
2125 .addImm(0)
2126 .addImm(0);
2127
2129
2130 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2131 .addUse(ArgsAddrReg)
2132 .addUse(ListReg)
2133 .addImm(0)
2134 .addMemOperand(*I.memoperands_begin());
2135
2137 I.eraseFromParent();
2138 return true;
2139}
2140
2141void AArch64InstructionSelector::materializeLargeCMVal(
2142 MachineInstr &I, const Value *V, unsigned OpFlags) {
2143 MachineBasicBlock &MBB = *I.getParent();
2144 MachineFunction &MF = *MBB.getParent();
2146
2147 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2148 MovZ->addOperand(MF, I.getOperand(1));
2149 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2151 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2153
2154 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2155 Register ForceDstReg) {
2156 Register DstReg = ForceDstReg
2157 ? ForceDstReg
2158 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2159 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2160 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2162 GV, MovZ->getOperand(1).getOffset(), Flags));
2163 } else {
2164 MovI->addOperand(
2165 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2166 MovZ->getOperand(1).getOffset(), Flags));
2167 }
2168 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2170 return DstReg;
2171 };
2172 Register DstReg = BuildMovK(MovZ.getReg(0),
2174 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2175 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2176}
2177
2178bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2179 MachineBasicBlock &MBB = *I.getParent();
2180 MachineFunction &MF = *MBB.getParent();
2182
2183 switch (I.getOpcode()) {
2184 case TargetOpcode::G_STORE: {
2185 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2186 MachineOperand &SrcOp = I.getOperand(0);
2187 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2188 // Allow matching with imported patterns for stores of pointers. Unlike
2189 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2190 // and constrain.
2191 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2192 Register NewSrc = Copy.getReg(0);
2193 SrcOp.setReg(NewSrc);
2194 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2195 Changed = true;
2196 }
2197 return Changed;
2198 }
2199 case TargetOpcode::G_PTR_ADD:
2200 return convertPtrAddToAdd(I, MRI);
2201 case TargetOpcode::G_LOAD: {
2202 // For scalar loads of pointers, we try to convert the dest type from p0
2203 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2204 // conversion, this should be ok because all users should have been
2205 // selected already, so the type doesn't matter for them.
2206 Register DstReg = I.getOperand(0).getReg();
2207 const LLT DstTy = MRI.getType(DstReg);
2208 if (!DstTy.isPointer())
2209 return false;
2210 MRI.setType(DstReg, LLT::scalar(64));
2211 return true;
2212 }
2213 case AArch64::G_DUP: {
2214 // Convert the type from p0 to s64 to help selection.
2215 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2216 if (!DstTy.isPointerVector())
2217 return false;
2218 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2219 MRI.setType(I.getOperand(0).getReg(),
2220 DstTy.changeElementType(LLT::scalar(64)));
2221 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2222 I.getOperand(1).setReg(NewSrc.getReg(0));
2223 return true;
2224 }
2225 case AArch64::G_INSERT_VECTOR_ELT: {
2226 // Convert the type from p0 to s64 to help selection.
2227 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2228 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2229 if (!SrcVecTy.isPointerVector())
2230 return false;
2231 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2232 MRI.setType(I.getOperand(1).getReg(),
2233 DstTy.changeElementType(LLT::scalar(64)));
2234 MRI.setType(I.getOperand(0).getReg(),
2235 DstTy.changeElementType(LLT::scalar(64)));
2236 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2237 I.getOperand(2).setReg(NewSrc.getReg(0));
2238 return true;
2239 }
2240 case TargetOpcode::G_UITOFP:
2241 case TargetOpcode::G_SITOFP: {
2242 // If both source and destination regbanks are FPR, then convert the opcode
2243 // to G_SITOF so that the importer can select it to an fpr variant.
2244 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2245 // copy.
2246 Register SrcReg = I.getOperand(1).getReg();
2247 LLT SrcTy = MRI.getType(SrcReg);
2248 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2249 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2250 return false;
2251
2252 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2253 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2254 I.setDesc(TII.get(AArch64::G_SITOF));
2255 else
2256 I.setDesc(TII.get(AArch64::G_UITOF));
2257 return true;
2258 }
2259 return false;
2260 }
2261 default:
2262 return false;
2263 }
2264}
2265
2266/// This lowering tries to look for G_PTR_ADD instructions and then converts
2267/// them to a standard G_ADD with a COPY on the source.
2268///
2269/// The motivation behind this is to expose the add semantics to the imported
2270/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2271/// because the selector works bottom up, uses before defs. By the time we
2272/// end up trying to select a G_PTR_ADD, we should have already attempted to
2273/// fold this into addressing modes and were therefore unsuccessful.
2274bool AArch64InstructionSelector::convertPtrAddToAdd(
2276 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2277 Register DstReg = I.getOperand(0).getReg();
2278 Register AddOp1Reg = I.getOperand(1).getReg();
2279 const LLT PtrTy = MRI.getType(DstReg);
2280 if (PtrTy.getAddressSpace() != 0)
2281 return false;
2282
2283 const LLT CastPtrTy =
2284 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2285 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2286 // Set regbanks on the registers.
2287 if (PtrTy.isVector())
2288 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2289 else
2290 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2291
2292 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2293 // %dst(intty) = G_ADD %intbase, off
2294 I.setDesc(TII.get(TargetOpcode::G_ADD));
2295 MRI.setType(DstReg, CastPtrTy);
2296 I.getOperand(1).setReg(PtrToInt.getReg(0));
2297 if (!select(*PtrToInt)) {
2298 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2299 return false;
2300 }
2301
2302 // Also take the opportunity here to try to do some optimization.
2303 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2304 Register NegatedReg;
2305 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2306 return true;
2307 I.getOperand(2).setReg(NegatedReg);
2308 I.setDesc(TII.get(TargetOpcode::G_SUB));
2309 return true;
2310}
2311
2312bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2314 // We try to match the immediate variant of LSL, which is actually an alias
2315 // for a special case of UBFM. Otherwise, we fall back to the imported
2316 // selector which will match the register variant.
2317 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2318 const auto &MO = I.getOperand(2);
2319 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2320 if (!VRegAndVal)
2321 return false;
2322
2323 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2324 if (DstTy.isVector())
2325 return false;
2326 bool Is64Bit = DstTy.getSizeInBits() == 64;
2327 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2328 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2329
2330 if (!Imm1Fn || !Imm2Fn)
2331 return false;
2332
2333 auto NewI =
2334 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2335 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2336
2337 for (auto &RenderFn : *Imm1Fn)
2338 RenderFn(NewI);
2339 for (auto &RenderFn : *Imm2Fn)
2340 RenderFn(NewI);
2341
2342 I.eraseFromParent();
2343 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2344}
2345
2346bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2348 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2349 // If we're storing a scalar, it doesn't matter what register bank that
2350 // scalar is on. All that matters is the size.
2351 //
2352 // So, if we see something like this (with a 32-bit scalar as an example):
2353 //
2354 // %x:gpr(s32) = ... something ...
2355 // %y:fpr(s32) = COPY %x:gpr(s32)
2356 // G_STORE %y:fpr(s32)
2357 //
2358 // We can fix this up into something like this:
2359 //
2360 // G_STORE %x:gpr(s32)
2361 //
2362 // And then continue the selection process normally.
2363 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2364 if (!DefDstReg.isValid())
2365 return false;
2366 LLT DefDstTy = MRI.getType(DefDstReg);
2367 Register StoreSrcReg = I.getOperand(0).getReg();
2368 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2369
2370 // If we get something strange like a physical register, then we shouldn't
2371 // go any further.
2372 if (!DefDstTy.isValid())
2373 return false;
2374
2375 // Are the source and dst types the same size?
2376 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2377 return false;
2378
2379 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2380 RBI.getRegBank(DefDstReg, MRI, TRI))
2381 return false;
2382
2383 // We have a cross-bank copy, which is entering a store. Let's fold it.
2384 I.getOperand(0).setReg(DefDstReg);
2385 return true;
2386}
2387
2388bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2389 assert(I.getParent() && "Instruction should be in a basic block!");
2390 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2391
2392 MachineBasicBlock &MBB = *I.getParent();
2393 MachineFunction &MF = *MBB.getParent();
2395
2396 switch (I.getOpcode()) {
2397 case AArch64::G_DUP: {
2398 // Before selecting a DUP instruction, check if it is better selected as a
2399 // MOV or load from a constant pool.
2400 Register Src = I.getOperand(1).getReg();
2401 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2402 if (!ValAndVReg)
2403 return false;
2404 LLVMContext &Ctx = MF.getFunction().getContext();
2405 Register Dst = I.getOperand(0).getReg();
2407 MRI.getType(Dst).getNumElements(),
2408 ConstantInt::get(
2409 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2410 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2411 if (!emitConstantVector(Dst, CV, MIB, MRI))
2412 return false;
2413 I.eraseFromParent();
2414 return true;
2415 }
2416 case TargetOpcode::G_SEXT:
2417 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2418 // over a normal extend.
2419 if (selectUSMovFromExtend(I, MRI))
2420 return true;
2421 return false;
2422 case TargetOpcode::G_BR:
2423 return false;
2424 case TargetOpcode::G_SHL:
2425 return earlySelectSHL(I, MRI);
2426 case TargetOpcode::G_CONSTANT: {
2427 bool IsZero = false;
2428 if (I.getOperand(1).isCImm())
2429 IsZero = I.getOperand(1).getCImm()->isZero();
2430 else if (I.getOperand(1).isImm())
2431 IsZero = I.getOperand(1).getImm() == 0;
2432
2433 if (!IsZero)
2434 return false;
2435
2436 Register DefReg = I.getOperand(0).getReg();
2437 LLT Ty = MRI.getType(DefReg);
2438 if (Ty.getSizeInBits() == 64) {
2439 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2440 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2441 } else if (Ty.getSizeInBits() == 32) {
2442 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2443 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2444 } else
2445 return false;
2446
2447 I.setDesc(TII.get(TargetOpcode::COPY));
2448 return true;
2449 }
2450
2451 case TargetOpcode::G_ADD: {
2452 // Check if this is being fed by a G_ICMP on either side.
2453 //
2454 // (cmp pred, x, y) + z
2455 //
2456 // In the above case, when the cmp is true, we increment z by 1. So, we can
2457 // fold the add into the cset for the cmp by using cinc.
2458 //
2459 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2460 Register AddDst = I.getOperand(0).getReg();
2461 Register AddLHS = I.getOperand(1).getReg();
2462 Register AddRHS = I.getOperand(2).getReg();
2463 // Only handle scalars.
2464 LLT Ty = MRI.getType(AddLHS);
2465 if (Ty.isVector())
2466 return false;
2467 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2468 // bits.
2469 unsigned Size = Ty.getSizeInBits();
2470 if (Size != 32 && Size != 64)
2471 return false;
2472 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2473 if (!MRI.hasOneNonDBGUse(Reg))
2474 return nullptr;
2475 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2476 // compare.
2477 if (Size == 32)
2478 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2479 // We model scalar compares using 32-bit destinations right now.
2480 // If it's a 64-bit compare, it'll have 64-bit sources.
2481 Register ZExt;
2482 if (!mi_match(Reg, MRI,
2484 return nullptr;
2485 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2486 if (!Cmp ||
2487 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2488 return nullptr;
2489 return Cmp;
2490 };
2491 // Try to match
2492 // z + (cmp pred, x, y)
2493 MachineInstr *Cmp = MatchCmp(AddRHS);
2494 if (!Cmp) {
2495 // (cmp pred, x, y) + z
2496 std::swap(AddLHS, AddRHS);
2497 Cmp = MatchCmp(AddRHS);
2498 if (!Cmp)
2499 return false;
2500 }
2501 auto &PredOp = Cmp->getOperand(1);
2502 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2503 const AArch64CC::CondCode InvCC =
2506 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2507 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2508 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2509 I.eraseFromParent();
2510 return true;
2511 }
2512 case TargetOpcode::G_OR: {
2513 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2514 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2515 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2516 Register Dst = I.getOperand(0).getReg();
2517 LLT Ty = MRI.getType(Dst);
2518
2519 if (!Ty.isScalar())
2520 return false;
2521
2522 unsigned Size = Ty.getSizeInBits();
2523 if (Size != 32 && Size != 64)
2524 return false;
2525
2526 Register ShiftSrc;
2527 int64_t ShiftImm;
2528 Register MaskSrc;
2529 int64_t MaskImm;
2530 if (!mi_match(
2531 Dst, MRI,
2532 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2533 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2534 return false;
2535
2536 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2537 return false;
2538
2539 int64_t Immr = Size - ShiftImm;
2540 int64_t Imms = Size - ShiftImm - 1;
2541 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2542 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2543 I.eraseFromParent();
2544 return true;
2545 }
2546 case TargetOpcode::G_FENCE: {
2547 if (I.getOperand(1).getImm() == 0)
2548 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2549 else
2550 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2551 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2552 I.eraseFromParent();
2553 return true;
2554 }
2555 default:
2556 return false;
2557 }
2558}
2559
2560bool AArch64InstructionSelector::select(MachineInstr &I) {
2561 assert(I.getParent() && "Instruction should be in a basic block!");
2562 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2563
2564 MachineBasicBlock &MBB = *I.getParent();
2565 MachineFunction &MF = *MBB.getParent();
2567
2568 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2569 if (Subtarget->requiresStrictAlign()) {
2570 // We don't support this feature yet.
2571 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2572 return false;
2573 }
2574
2576
2577 unsigned Opcode = I.getOpcode();
2578 // G_PHI requires same handling as PHI
2579 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2580 // Certain non-generic instructions also need some special handling.
2581
2582 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2584
2585 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2586 const Register DefReg = I.getOperand(0).getReg();
2587 const LLT DefTy = MRI.getType(DefReg);
2588
2589 const RegClassOrRegBank &RegClassOrBank =
2590 MRI.getRegClassOrRegBank(DefReg);
2591
2592 const TargetRegisterClass *DefRC =
2593 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
2594 if (!DefRC) {
2595 if (!DefTy.isValid()) {
2596 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2597 return false;
2598 }
2599 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2600 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2601 if (!DefRC) {
2602 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2603 return false;
2604 }
2605 }
2606
2607 I.setDesc(TII.get(TargetOpcode::PHI));
2608
2609 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2610 }
2611
2612 if (I.isCopy())
2613 return selectCopy(I, TII, MRI, TRI, RBI);
2614
2615 if (I.isDebugInstr())
2616 return selectDebugInstr(I, MRI, RBI);
2617
2618 return true;
2619 }
2620
2621
2622 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2623 LLVM_DEBUG(
2624 dbgs() << "Generic instruction has unexpected implicit operands\n");
2625 return false;
2626 }
2627
2628 // Try to do some lowering before we start instruction selecting. These
2629 // lowerings are purely transformations on the input G_MIR and so selection
2630 // must continue after any modification of the instruction.
2631 if (preISelLower(I)) {
2632 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2633 }
2634
2635 // There may be patterns where the importer can't deal with them optimally,
2636 // but does select it to a suboptimal sequence so our custom C++ selection
2637 // code later never has a chance to work on it. Therefore, we have an early
2638 // selection attempt here to give priority to certain selection routines
2639 // over the imported ones.
2640 if (earlySelect(I))
2641 return true;
2642
2643 if (selectImpl(I, *CoverageInfo))
2644 return true;
2645
2646 LLT Ty =
2647 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2648
2649 switch (Opcode) {
2650 case TargetOpcode::G_SBFX:
2651 case TargetOpcode::G_UBFX: {
2652 static const unsigned OpcTable[2][2] = {
2653 {AArch64::UBFMWri, AArch64::UBFMXri},
2654 {AArch64::SBFMWri, AArch64::SBFMXri}};
2655 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2656 unsigned Size = Ty.getSizeInBits();
2657 unsigned Opc = OpcTable[IsSigned][Size == 64];
2658 auto Cst1 =
2659 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2660 assert(Cst1 && "Should have gotten a constant for src 1?");
2661 auto Cst2 =
2662 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2663 assert(Cst2 && "Should have gotten a constant for src 2?");
2664 auto LSB = Cst1->Value.getZExtValue();
2665 auto Width = Cst2->Value.getZExtValue();
2666 auto BitfieldInst =
2667 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2668 .addImm(LSB)
2669 .addImm(LSB + Width - 1);
2670 I.eraseFromParent();
2671 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2672 }
2673 case TargetOpcode::G_BRCOND:
2674 return selectCompareBranch(I, MF, MRI);
2675
2676 case TargetOpcode::G_BRINDIRECT: {
2677 const Function &Fn = MF.getFunction();
2678 if (std::optional<uint16_t> BADisc =
2679 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2680 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2681 MI.addImm(AArch64PACKey::IA);
2682 MI.addImm(*BADisc);
2683 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2684 I.eraseFromParent();
2686 }
2687 I.setDesc(TII.get(AArch64::BR));
2689 }
2690
2691 case TargetOpcode::G_BRJT:
2692 return selectBrJT(I, MRI);
2693
2694 case AArch64::G_ADD_LOW: {
2695 // This op may have been separated from it's ADRP companion by the localizer
2696 // or some other code motion pass. Given that many CPUs will try to
2697 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2698 // which will later be expanded into an ADRP+ADD pair after scheduling.
2699 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2700 if (BaseMI->getOpcode() != AArch64::ADRP) {
2701 I.setDesc(TII.get(AArch64::ADDXri));
2702 I.addOperand(MachineOperand::CreateImm(0));
2704 }
2705 assert(TM.getCodeModel() == CodeModel::Small &&
2706 "Expected small code model");
2707 auto Op1 = BaseMI->getOperand(1);
2708 auto Op2 = I.getOperand(2);
2709 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2710 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2711 Op1.getTargetFlags())
2712 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2713 Op2.getTargetFlags());
2714 I.eraseFromParent();
2715 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2716 }
2717
2718 case TargetOpcode::G_FCONSTANT:
2719 case TargetOpcode::G_CONSTANT: {
2720 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2721
2722 const LLT s8 = LLT::scalar(8);
2723 const LLT s16 = LLT::scalar(16);
2724 const LLT s32 = LLT::scalar(32);
2725 const LLT s64 = LLT::scalar(64);
2726 const LLT s128 = LLT::scalar(128);
2727 const LLT p0 = LLT::pointer(0, 64);
2728
2729 const Register DefReg = I.getOperand(0).getReg();
2730 const LLT DefTy = MRI.getType(DefReg);
2731 const unsigned DefSize = DefTy.getSizeInBits();
2732 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2733
2734 // FIXME: Redundant check, but even less readable when factored out.
2735 if (isFP) {
2736 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2737 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2738 << " constant, expected: " << s16 << " or " << s32
2739 << " or " << s64 << " or " << s128 << '\n');
2740 return false;
2741 }
2742
2743 if (RB.getID() != AArch64::FPRRegBankID) {
2744 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2745 << " constant on bank: " << RB
2746 << ", expected: FPR\n");
2747 return false;
2748 }
2749
2750 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2751 // can be sure tablegen works correctly and isn't rescued by this code.
2752 // 0.0 is not covered by tablegen for FP128. So we will handle this
2753 // scenario in the code here.
2754 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2755 return false;
2756 } else {
2757 // s32 and s64 are covered by tablegen.
2758 if (Ty != p0 && Ty != s8 && Ty != s16) {
2759 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2760 << " constant, expected: " << s32 << ", " << s64
2761 << ", or " << p0 << '\n');
2762 return false;
2763 }
2764
2765 if (RB.getID() != AArch64::GPRRegBankID) {
2766 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2767 << " constant on bank: " << RB
2768 << ", expected: GPR\n");
2769 return false;
2770 }
2771 }
2772
2773 if (isFP) {
2774 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2775 // For 16, 64, and 128b values, emit a constant pool load.
2776 switch (DefSize) {
2777 default:
2778 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2779 case 32:
2780 case 64: {
2781 bool OptForSize = shouldOptForSize(&MF);
2782 const auto &TLI = MF.getSubtarget().getTargetLowering();
2783 // If TLI says that this fpimm is illegal, then we'll expand to a
2784 // constant pool load.
2785 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2786 EVT::getFloatingPointVT(DefSize), OptForSize))
2787 break;
2788 [[fallthrough]];
2789 }
2790 case 16:
2791 case 128: {
2792 auto *FPImm = I.getOperand(1).getFPImm();
2793 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2794 if (!LoadMI) {
2795 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2796 return false;
2797 }
2798 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2799 I.eraseFromParent();
2800 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2801 }
2802 }
2803
2804 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2805 // Either emit a FMOV, or emit a copy to emit a normal mov.
2806 const Register DefGPRReg = MRI.createVirtualRegister(
2807 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2808 MachineOperand &RegOp = I.getOperand(0);
2809 RegOp.setReg(DefGPRReg);
2810 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2811 MIB.buildCopy({DefReg}, {DefGPRReg});
2812
2813 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2814 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2815 return false;
2816 }
2817
2818 MachineOperand &ImmOp = I.getOperand(1);
2819 // FIXME: Is going through int64_t always correct?
2820 ImmOp.ChangeToImmediate(
2822 } else if (I.getOperand(1).isCImm()) {
2823 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2824 I.getOperand(1).ChangeToImmediate(Val);
2825 } else if (I.getOperand(1).isImm()) {
2826 uint64_t Val = I.getOperand(1).getImm();
2827 I.getOperand(1).ChangeToImmediate(Val);
2828 }
2829
2830 const unsigned MovOpc =
2831 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2832 I.setDesc(TII.get(MovOpc));
2834 return true;
2835 }
2836 case TargetOpcode::G_EXTRACT: {
2837 Register DstReg = I.getOperand(0).getReg();
2838 Register SrcReg = I.getOperand(1).getReg();
2839 LLT SrcTy = MRI.getType(SrcReg);
2840 LLT DstTy = MRI.getType(DstReg);
2841 (void)DstTy;
2842 unsigned SrcSize = SrcTy.getSizeInBits();
2843
2844 if (SrcTy.getSizeInBits() > 64) {
2845 // This should be an extract of an s128, which is like a vector extract.
2846 if (SrcTy.getSizeInBits() != 128)
2847 return false;
2848 // Only support extracting 64 bits from an s128 at the moment.
2849 if (DstTy.getSizeInBits() != 64)
2850 return false;
2851
2852 unsigned Offset = I.getOperand(2).getImm();
2853 if (Offset % 64 != 0)
2854 return false;
2855
2856 // Check we have the right regbank always.
2857 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2858 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2859 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2860
2861 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2862 auto NewI =
2863 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2864 .addUse(SrcReg, 0,
2865 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2866 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2867 AArch64::GPR64RegClass, NewI->getOperand(0));
2868 I.eraseFromParent();
2869 return true;
2870 }
2871
2872 // Emit the same code as a vector extract.
2873 // Offset must be a multiple of 64.
2874 unsigned LaneIdx = Offset / 64;
2875 MachineInstr *Extract = emitExtractVectorElt(
2876 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2877 if (!Extract)
2878 return false;
2879 I.eraseFromParent();
2880 return true;
2881 }
2882
2883 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2884 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2885 Ty.getSizeInBits() - 1);
2886
2887 if (SrcSize < 64) {
2888 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2889 "unexpected G_EXTRACT types");
2891 }
2892
2893 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2894 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2895 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2896 .addReg(DstReg, 0, AArch64::sub_32);
2897 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2898 AArch64::GPR32RegClass, MRI);
2899 I.getOperand(0).setReg(DstReg);
2900
2902 }
2903
2904 case TargetOpcode::G_INSERT: {
2905 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2906 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2907 unsigned DstSize = DstTy.getSizeInBits();
2908 // Larger inserts are vectors, same-size ones should be something else by
2909 // now (split up or turned into COPYs).
2910 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2911 return false;
2912
2913 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2914 unsigned LSB = I.getOperand(3).getImm();
2915 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2916 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2917 MachineInstrBuilder(MF, I).addImm(Width - 1);
2918
2919 if (DstSize < 64) {
2920 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2921 "unexpected G_INSERT types");
2923 }
2924
2925 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2926 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2927 TII.get(AArch64::SUBREG_TO_REG))
2928 .addDef(SrcReg)
2929 .addImm(0)
2930 .addUse(I.getOperand(2).getReg())
2931 .addImm(AArch64::sub_32);
2932 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2933 AArch64::GPR32RegClass, MRI);
2934 I.getOperand(2).setReg(SrcReg);
2935
2937 }
2938 case TargetOpcode::G_FRAME_INDEX: {
2939 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2940 if (Ty != LLT::pointer(0, 64)) {
2941 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2942 << ", expected: " << LLT::pointer(0, 64) << '\n');
2943 return false;
2944 }
2945 I.setDesc(TII.get(AArch64::ADDXri));
2946
2947 // MOs for a #0 shifted immediate.
2948 I.addOperand(MachineOperand::CreateImm(0));
2949 I.addOperand(MachineOperand::CreateImm(0));
2950
2952 }
2953
2954 case TargetOpcode::G_GLOBAL_VALUE: {
2955 const GlobalValue *GV = nullptr;
2956 unsigned OpFlags;
2957 if (I.getOperand(1).isSymbol()) {
2958 OpFlags = I.getOperand(1).getTargetFlags();
2959 // Currently only used by "RtLibUseGOT".
2960 assert(OpFlags == AArch64II::MO_GOT);
2961 } else {
2962 GV = I.getOperand(1).getGlobal();
2963 if (GV->isThreadLocal())
2964 return selectTLSGlobalValue(I, MRI);
2965 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2966 }
2967
2968 if (OpFlags & AArch64II::MO_GOT) {
2969 I.setDesc(TII.get(MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
2970 ? AArch64::LOADgotAUTH
2971 : AArch64::LOADgot));
2972 I.getOperand(1).setTargetFlags(OpFlags);
2973 } else if (TM.getCodeModel() == CodeModel::Large &&
2974 !TM.isPositionIndependent()) {
2975 // Materialize the global using movz/movk instructions.
2976 materializeLargeCMVal(I, GV, OpFlags);
2977 I.eraseFromParent();
2978 return true;
2979 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2980 I.setDesc(TII.get(AArch64::ADR));
2981 I.getOperand(1).setTargetFlags(OpFlags);
2982 } else {
2983 I.setDesc(TII.get(AArch64::MOVaddr));
2984 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2985 MachineInstrBuilder MIB(MF, I);
2986 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2988 }
2990 }
2991
2992 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2993 return selectPtrAuthGlobalValue(I, MRI);
2994
2995 case TargetOpcode::G_ZEXTLOAD:
2996 case TargetOpcode::G_LOAD:
2997 case TargetOpcode::G_STORE: {
2998 GLoadStore &LdSt = cast<GLoadStore>(I);
2999 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
3000 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
3001
3002 if (PtrTy != LLT::pointer(0, 64)) {
3003 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
3004 << ", expected: " << LLT::pointer(0, 64) << '\n');
3005 return false;
3006 }
3007
3008 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
3009 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
3010 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
3011
3012 // Need special instructions for atomics that affect ordering.
3013 if (Order != AtomicOrdering::NotAtomic &&
3014 Order != AtomicOrdering::Unordered &&
3015 Order != AtomicOrdering::Monotonic) {
3016 assert(!isa<GZExtLoad>(LdSt));
3017 assert(MemSizeInBytes <= 8 &&
3018 "128-bit atomics should already be custom-legalized");
3019
3020 if (isa<GLoad>(LdSt)) {
3021 static constexpr unsigned LDAPROpcodes[] = {
3022 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
3023 static constexpr unsigned LDAROpcodes[] = {
3024 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
3025 ArrayRef<unsigned> Opcodes =
3026 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
3027 ? LDAPROpcodes
3028 : LDAROpcodes;
3029 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
3030 } else {
3031 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
3032 AArch64::STLRW, AArch64::STLRX};
3033 Register ValReg = LdSt.getReg(0);
3034 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
3035 // Emit a subreg copy of 32 bits.
3036 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3037 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
3038 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
3039 I.getOperand(0).setReg(NewVal);
3040 }
3041 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
3042 }
3044 return true;
3045 }
3046
3047#ifndef NDEBUG
3048 const Register PtrReg = LdSt.getPointerReg();
3049 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3050 // Check that the pointer register is valid.
3051 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3052 "Load/Store pointer operand isn't a GPR");
3053 assert(MRI.getType(PtrReg).isPointer() &&
3054 "Load/Store pointer operand isn't a pointer");
3055#endif
3056
3057 const Register ValReg = LdSt.getReg(0);
3058 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3059 LLT ValTy = MRI.getType(ValReg);
3060
3061 // The code below doesn't support truncating stores, so we need to split it
3062 // again.
3063 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3064 unsigned SubReg;
3065 LLT MemTy = LdSt.getMMO().getMemoryType();
3066 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3067 if (!getSubRegForClass(RC, TRI, SubReg))
3068 return false;
3069
3070 // Generate a subreg copy.
3071 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3072 .addReg(ValReg, 0, SubReg)
3073 .getReg(0);
3074 RBI.constrainGenericRegister(Copy, *RC, MRI);
3075 LdSt.getOperand(0).setReg(Copy);
3076 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3077 // If this is an any-extending load from the FPR bank, split it into a regular
3078 // load + extend.
3079 if (RB.getID() == AArch64::FPRRegBankID) {
3080 unsigned SubReg;
3081 LLT MemTy = LdSt.getMMO().getMemoryType();
3082 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3083 if (!getSubRegForClass(RC, TRI, SubReg))
3084 return false;
3085 Register OldDst = LdSt.getReg(0);
3086 Register NewDst =
3087 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
3088 LdSt.getOperand(0).setReg(NewDst);
3089 MRI.setRegBank(NewDst, RB);
3090 // Generate a SUBREG_TO_REG to extend it.
3091 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3092 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3093 .addImm(0)
3094 .addUse(NewDst)
3095 .addImm(SubReg);
3096 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3097 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3098 MIB.setInstr(LdSt);
3099 ValTy = MemTy; // This is no longer an extending load.
3100 }
3101 }
3102
3103 // Helper lambda for partially selecting I. Either returns the original
3104 // instruction with an updated opcode, or a new instruction.
3105 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3106 bool IsStore = isa<GStore>(I);
3107 const unsigned NewOpc =
3108 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3109 if (NewOpc == I.getOpcode())
3110 return nullptr;
3111 // Check if we can fold anything into the addressing mode.
3112 auto AddrModeFns =
3113 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3114 if (!AddrModeFns) {
3115 // Can't fold anything. Use the original instruction.
3116 I.setDesc(TII.get(NewOpc));
3117 I.addOperand(MachineOperand::CreateImm(0));
3118 return &I;
3119 }
3120
3121 // Folded something. Create a new instruction and return it.
3122 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3123 Register CurValReg = I.getOperand(0).getReg();
3124 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3125 NewInst.cloneMemRefs(I);
3126 for (auto &Fn : *AddrModeFns)
3127 Fn(NewInst);
3128 I.eraseFromParent();
3129 return &*NewInst;
3130 };
3131
3132 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3133 if (!LoadStore)
3134 return false;
3135
3136 // If we're storing a 0, use WZR/XZR.
3137 if (Opcode == TargetOpcode::G_STORE) {
3139 LoadStore->getOperand(0).getReg(), MRI);
3140 if (CVal && CVal->Value == 0) {
3141 switch (LoadStore->getOpcode()) {
3142 case AArch64::STRWui:
3143 case AArch64::STRHHui:
3144 case AArch64::STRBBui:
3145 LoadStore->getOperand(0).setReg(AArch64::WZR);
3146 break;
3147 case AArch64::STRXui:
3148 LoadStore->getOperand(0).setReg(AArch64::XZR);
3149 break;
3150 }
3151 }
3152 }
3153
3154 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3155 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3156 // The any/zextload from a smaller type to i32 should be handled by the
3157 // importer.
3158 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3159 return false;
3160 // If we have an extending load then change the load's type to be a
3161 // narrower reg and zero_extend with SUBREG_TO_REG.
3162 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3163 Register DstReg = LoadStore->getOperand(0).getReg();
3164 LoadStore->getOperand(0).setReg(LdReg);
3165
3166 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3167 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3168 .addImm(0)
3169 .addUse(LdReg)
3170 .addImm(AArch64::sub_32);
3171 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3172 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3173 MRI);
3174 }
3175 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3176 }
3177
3178 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3179 case TargetOpcode::G_INDEXED_SEXTLOAD:
3180 return selectIndexedExtLoad(I, MRI);
3181 case TargetOpcode::G_INDEXED_LOAD:
3182 return selectIndexedLoad(I, MRI);
3183 case TargetOpcode::G_INDEXED_STORE:
3184 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3185
3186 case TargetOpcode::G_LSHR:
3187 case TargetOpcode::G_ASHR:
3188 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3189 return selectVectorAshrLshr(I, MRI);
3190 [[fallthrough]];
3191 case TargetOpcode::G_SHL:
3192 if (Opcode == TargetOpcode::G_SHL &&
3193 MRI.getType(I.getOperand(0).getReg()).isVector())
3194 return selectVectorSHL(I, MRI);
3195
3196 // These shifts were legalized to have 64 bit shift amounts because we
3197 // want to take advantage of the selection patterns that assume the
3198 // immediates are s64s, however, selectBinaryOp will assume both operands
3199 // will have the same bit size.
3200 {
3201 Register SrcReg = I.getOperand(1).getReg();
3202 Register ShiftReg = I.getOperand(2).getReg();
3203 const LLT ShiftTy = MRI.getType(ShiftReg);
3204 const LLT SrcTy = MRI.getType(SrcReg);
3205 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3206 ShiftTy.getSizeInBits() == 64) {
3207 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3208 // Insert a subregister copy to implement a 64->32 trunc
3209 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3210 .addReg(ShiftReg, 0, AArch64::sub_32);
3211 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3212 I.getOperand(2).setReg(Trunc.getReg(0));
3213 }
3214 }
3215 [[fallthrough]];
3216 case TargetOpcode::G_OR: {
3217 // Reject the various things we don't support yet.
3218 if (unsupportedBinOp(I, RBI, MRI, TRI))
3219 return false;
3220
3221 const unsigned OpSize = Ty.getSizeInBits();
3222
3223 const Register DefReg = I.getOperand(0).getReg();
3224 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3225
3226 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3227 if (NewOpc == I.getOpcode())
3228 return false;
3229
3230 I.setDesc(TII.get(NewOpc));
3231 // FIXME: Should the type be always reset in setDesc?
3232
3233 // Now that we selected an opcode, we need to constrain the register
3234 // operands to use appropriate classes.
3236 }
3237
3238 case TargetOpcode::G_PTR_ADD: {
3239 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3240 I.eraseFromParent();
3241 return true;
3242 }
3243
3244 case TargetOpcode::G_SADDE:
3245 case TargetOpcode::G_UADDE:
3246 case TargetOpcode::G_SSUBE:
3247 case TargetOpcode::G_USUBE:
3248 case TargetOpcode::G_SADDO:
3249 case TargetOpcode::G_UADDO:
3250 case TargetOpcode::G_SSUBO:
3251 case TargetOpcode::G_USUBO:
3252 return selectOverflowOp(I, MRI);
3253
3254 case TargetOpcode::G_PTRMASK: {
3255 Register MaskReg = I.getOperand(2).getReg();
3256 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3257 // TODO: Implement arbitrary cases
3258 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3259 return false;
3260
3261 uint64_t Mask = *MaskVal;
3262 I.setDesc(TII.get(AArch64::ANDXri));
3263 I.getOperand(2).ChangeToImmediate(
3265
3267 }
3268 case TargetOpcode::G_PTRTOINT:
3269 case TargetOpcode::G_TRUNC: {
3270 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3271 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3272
3273 const Register DstReg = I.getOperand(0).getReg();
3274 const Register SrcReg = I.getOperand(1).getReg();
3275
3276 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3277 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3278
3279 if (DstRB.getID() != SrcRB.getID()) {
3280 LLVM_DEBUG(
3281 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3282 return false;
3283 }
3284
3285 if (DstRB.getID() == AArch64::GPRRegBankID) {
3286 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3287 if (!DstRC)
3288 return false;
3289
3290 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3291 if (!SrcRC)
3292 return false;
3293
3294 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3295 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3296 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3297 return false;
3298 }
3299
3300 if (DstRC == SrcRC) {
3301 // Nothing to be done
3302 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3303 SrcTy == LLT::scalar(64)) {
3304 llvm_unreachable("TableGen can import this case");
3305 return false;
3306 } else if (DstRC == &AArch64::GPR32RegClass &&
3307 SrcRC == &AArch64::GPR64RegClass) {
3308 I.getOperand(1).setSubReg(AArch64::sub_32);
3309 } else {
3310 LLVM_DEBUG(
3311 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3312 return false;
3313 }
3314
3315 I.setDesc(TII.get(TargetOpcode::COPY));
3316 return true;
3317 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3318 if (DstTy == LLT::fixed_vector(4, 16) &&
3319 SrcTy == LLT::fixed_vector(4, 32)) {
3320 I.setDesc(TII.get(AArch64::XTNv4i16));
3322 return true;
3323 }
3324
3325 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3326 MachineInstr *Extract = emitExtractVectorElt(
3327 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3328 if (!Extract)
3329 return false;
3330 I.eraseFromParent();
3331 return true;
3332 }
3333
3334 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3335 if (Opcode == TargetOpcode::G_PTRTOINT) {
3336 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3337 I.setDesc(TII.get(TargetOpcode::COPY));
3338 return selectCopy(I, TII, MRI, TRI, RBI);
3339 }
3340 }
3341
3342 return false;
3343 }
3344
3345 case TargetOpcode::G_ANYEXT: {
3346 if (selectUSMovFromExtend(I, MRI))
3347 return true;
3348
3349 const Register DstReg = I.getOperand(0).getReg();
3350 const Register SrcReg = I.getOperand(1).getReg();
3351
3352 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3353 if (RBDst.getID() != AArch64::GPRRegBankID) {
3354 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3355 << ", expected: GPR\n");
3356 return false;
3357 }
3358
3359 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3360 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3361 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3362 << ", expected: GPR\n");
3363 return false;
3364 }
3365
3366 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3367
3368 if (DstSize == 0) {
3369 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3370 return false;
3371 }
3372
3373 if (DstSize != 64 && DstSize > 32) {
3374 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3375 << ", expected: 32 or 64\n");
3376 return false;
3377 }
3378 // At this point G_ANYEXT is just like a plain COPY, but we need
3379 // to explicitly form the 64-bit value if any.
3380 if (DstSize > 32) {
3381 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3382 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3383 .addDef(ExtSrc)
3384 .addImm(0)
3385 .addUse(SrcReg)
3386 .addImm(AArch64::sub_32);
3387 I.getOperand(1).setReg(ExtSrc);
3388 }
3389 return selectCopy(I, TII, MRI, TRI, RBI);
3390 }
3391
3392 case TargetOpcode::G_ZEXT:
3393 case TargetOpcode::G_SEXT_INREG:
3394 case TargetOpcode::G_SEXT: {
3395 if (selectUSMovFromExtend(I, MRI))
3396 return true;
3397
3398 unsigned Opcode = I.getOpcode();
3399 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3400 const Register DefReg = I.getOperand(0).getReg();
3401 Register SrcReg = I.getOperand(1).getReg();
3402 const LLT DstTy = MRI.getType(DefReg);
3403 const LLT SrcTy = MRI.getType(SrcReg);
3404 unsigned DstSize = DstTy.getSizeInBits();
3405 unsigned SrcSize = SrcTy.getSizeInBits();
3406
3407 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3408 // extended is encoded in the imm.
3409 if (Opcode == TargetOpcode::G_SEXT_INREG)
3410 SrcSize = I.getOperand(2).getImm();
3411
3412 if (DstTy.isVector())
3413 return false; // Should be handled by imported patterns.
3414
3415 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3416 AArch64::GPRRegBankID &&
3417 "Unexpected ext regbank");
3418
3419 MachineInstr *ExtI;
3420
3421 // First check if we're extending the result of a load which has a dest type
3422 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3423 // GPR register on AArch64 and all loads which are smaller automatically
3424 // zero-extend the upper bits. E.g.
3425 // %v(s8) = G_LOAD %p, :: (load 1)
3426 // %v2(s32) = G_ZEXT %v(s8)
3427 if (!IsSigned) {
3428 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3429 bool IsGPR =
3430 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3431 if (LoadMI && IsGPR) {
3432 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3433 unsigned BytesLoaded = MemOp->getSize().getValue();
3434 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3435 return selectCopy(I, TII, MRI, TRI, RBI);
3436 }
3437
3438 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3439 // + SUBREG_TO_REG.
3440 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3441 Register SubregToRegSrc =
3442 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3443 const Register ZReg = AArch64::WZR;
3444 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3445 .addImm(0);
3446
3447 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3448 .addImm(0)
3449 .addUse(SubregToRegSrc)
3450 .addImm(AArch64::sub_32);
3451
3452 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3453 MRI)) {
3454 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3455 return false;
3456 }
3457
3458 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3459 MRI)) {
3460 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3461 return false;
3462 }
3463
3464 I.eraseFromParent();
3465 return true;
3466 }
3467 }
3468
3469 if (DstSize == 64) {
3470 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3471 // FIXME: Can we avoid manually doing this?
3472 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3473 MRI)) {
3474 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3475 << " operand\n");
3476 return false;
3477 }
3478 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3479 {&AArch64::GPR64RegClass}, {})
3480 .addImm(0)
3481 .addUse(SrcReg)
3482 .addImm(AArch64::sub_32)
3483 .getReg(0);
3484 }
3485
3486 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3487 {DefReg}, {SrcReg})
3488 .addImm(0)
3489 .addImm(SrcSize - 1);
3490 } else if (DstSize <= 32) {
3491 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3492 {DefReg}, {SrcReg})
3493 .addImm(0)
3494 .addImm(SrcSize - 1);
3495 } else {
3496 return false;
3497 }
3498
3500 I.eraseFromParent();
3501 return true;
3502 }
3503
3504 case TargetOpcode::G_SITOFP:
3505 case TargetOpcode::G_UITOFP:
3506 case TargetOpcode::G_FPTOSI:
3507 case TargetOpcode::G_FPTOUI: {
3508 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3509 SrcTy = MRI.getType(I.getOperand(1).getReg());
3510 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3511 if (NewOpc == Opcode)
3512 return false;
3513
3514 I.setDesc(TII.get(NewOpc));
3516 I.setFlags(MachineInstr::NoFPExcept);
3517
3518 return true;
3519 }
3520
3521 case TargetOpcode::G_FREEZE:
3522 return selectCopy(I, TII, MRI, TRI, RBI);
3523
3524 case TargetOpcode::G_INTTOPTR:
3525 // The importer is currently unable to import pointer types since they
3526 // didn't exist in SelectionDAG.
3527 return selectCopy(I, TII, MRI, TRI, RBI);
3528
3529 case TargetOpcode::G_BITCAST:
3530 // Imported SelectionDAG rules can handle every bitcast except those that
3531 // bitcast from a type to the same type. Ideally, these shouldn't occur
3532 // but we might not run an optimizer that deletes them. The other exception
3533 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3534 // of them.
3535 return selectCopy(I, TII, MRI, TRI, RBI);
3536
3537 case TargetOpcode::G_SELECT: {
3538 auto &Sel = cast<GSelect>(I);
3539 const Register CondReg = Sel.getCondReg();
3540 const Register TReg = Sel.getTrueReg();
3541 const Register FReg = Sel.getFalseReg();
3542
3543 if (tryOptSelect(Sel))
3544 return true;
3545
3546 // Make sure to use an unused vreg instead of wzr, so that the peephole
3547 // optimizations will be able to optimize these.
3548 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3549 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3550 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3552 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3553 return false;
3554 Sel.eraseFromParent();
3555 return true;
3556 }
3557 case TargetOpcode::G_ICMP: {
3558 if (Ty.isVector())
3559 return false;
3560
3561 if (Ty != LLT::scalar(32)) {
3562 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3563 << ", expected: " << LLT::scalar(32) << '\n');
3564 return false;
3565 }
3566
3567 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3568 const AArch64CC::CondCode InvCC =
3570 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3571 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3572 /*Src2=*/AArch64::WZR, InvCC, MIB);
3573 I.eraseFromParent();
3574 return true;
3575 }
3576
3577 case TargetOpcode::G_FCMP: {
3578 CmpInst::Predicate Pred =
3579 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3580 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3581 Pred) ||
3582 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3583 return false;
3584 I.eraseFromParent();
3585 return true;
3586 }
3587 case TargetOpcode::G_VASTART:
3588 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3589 : selectVaStartAAPCS(I, MF, MRI);
3590 case TargetOpcode::G_INTRINSIC:
3591 return selectIntrinsic(I, MRI);
3592 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3593 return selectIntrinsicWithSideEffects(I, MRI);
3594 case TargetOpcode::G_IMPLICIT_DEF: {
3595 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3596 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3597 const Register DstReg = I.getOperand(0).getReg();
3598 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3599 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3600 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3601 return true;
3602 }
3603 case TargetOpcode::G_BLOCK_ADDR: {
3604 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3605 if (std::optional<uint16_t> BADisc =
3606 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3607 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3608 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3609 MIB.buildInstr(AArch64::MOVaddrPAC)
3610 .addBlockAddress(I.getOperand(1).getBlockAddress())
3612 .addReg(/*AddrDisc=*/AArch64::XZR)
3613 .addImm(*BADisc)
3614 .constrainAllUses(TII, TRI, RBI);
3615 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3616 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3617 AArch64::GPR64RegClass, MRI);
3618 I.eraseFromParent();
3619 return true;
3620 }
3621 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3622 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3623 I.eraseFromParent();
3624 return true;
3625 } else {
3626 I.setDesc(TII.get(AArch64::MOVaddrBA));
3627 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3628 I.getOperand(0).getReg())
3629 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3630 /* Offset */ 0, AArch64II::MO_PAGE)
3632 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3634 I.eraseFromParent();
3635 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3636 }
3637 }
3638 case AArch64::G_DUP: {
3639 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3640 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3641 // difficult because at RBS we may end up pessimizing the fpr case if we
3642 // decided to add an anyextend to fix this. Manual selection is the most
3643 // robust solution for now.
3644 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3645 AArch64::GPRRegBankID)
3646 return false; // We expect the fpr regbank case to be imported.
3647 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3648 if (VecTy == LLT::fixed_vector(8, 8))
3649 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3650 else if (VecTy == LLT::fixed_vector(16, 8))
3651 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3652 else if (VecTy == LLT::fixed_vector(4, 16))
3653 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3654 else if (VecTy == LLT::fixed_vector(8, 16))
3655 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3656 else
3657 return false;
3659 }
3660 case TargetOpcode::G_BUILD_VECTOR:
3661 return selectBuildVector(I, MRI);
3662 case TargetOpcode::G_MERGE_VALUES:
3663 return selectMergeValues(I, MRI);
3664 case TargetOpcode::G_UNMERGE_VALUES:
3665 return selectUnmergeValues(I, MRI);
3666 case TargetOpcode::G_SHUFFLE_VECTOR:
3667 return selectShuffleVector(I, MRI);
3668 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3669 return selectExtractElt(I, MRI);
3670 case TargetOpcode::G_CONCAT_VECTORS:
3671 return selectConcatVectors(I, MRI);
3672 case TargetOpcode::G_JUMP_TABLE:
3673 return selectJumpTable(I, MRI);
3674 case TargetOpcode::G_MEMCPY:
3675 case TargetOpcode::G_MEMCPY_INLINE:
3676 case TargetOpcode::G_MEMMOVE:
3677 case TargetOpcode::G_MEMSET:
3678 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3679 return selectMOPS(I, MRI);
3680 }
3681
3682 return false;
3683}
3684
3685bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3686 MachineIRBuilderState OldMIBState = MIB.getState();
3687 bool Success = select(I);
3688 MIB.setState(OldMIBState);
3689 return Success;
3690}
3691
3692bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3694 unsigned Mopcode;
3695 switch (GI.getOpcode()) {
3696 case TargetOpcode::G_MEMCPY:
3697 case TargetOpcode::G_MEMCPY_INLINE:
3698 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3699 break;
3700 case TargetOpcode::G_MEMMOVE:
3701 Mopcode = AArch64::MOPSMemoryMovePseudo;
3702 break;
3703 case TargetOpcode::G_MEMSET:
3704 // For tagged memset see llvm.aarch64.mops.memset.tag
3705 Mopcode = AArch64::MOPSMemorySetPseudo;
3706 break;
3707 }
3708
3709 auto &DstPtr = GI.getOperand(0);
3710 auto &SrcOrVal = GI.getOperand(1);
3711 auto &Size = GI.getOperand(2);
3712
3713 // Create copies of the registers that can be clobbered.
3714 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3715 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3716 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3717
3718 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3719 const auto &SrcValRegClass =
3720 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3721
3722 // Constrain to specific registers
3723 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3724 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3725 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3726
3727 MIB.buildCopy(DstPtrCopy, DstPtr);
3728 MIB.buildCopy(SrcValCopy, SrcOrVal);
3729 MIB.buildCopy(SizeCopy, Size);
3730
3731 // New instruction uses the copied registers because it must update them.
3732 // The defs are not used since they don't exist in G_MEM*. They are still
3733 // tied.
3734 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3735 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3736 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3737 if (IsSet) {
3738 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3739 {DstPtrCopy, SizeCopy, SrcValCopy});
3740 } else {
3741 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3742 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3743 {DstPtrCopy, SrcValCopy, SizeCopy});
3744 }
3745
3746 GI.eraseFromParent();
3747 return true;
3748}
3749
3750bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3752 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3753 Register JTAddr = I.getOperand(0).getReg();
3754 unsigned JTI = I.getOperand(1).getIndex();
3755 Register Index = I.getOperand(2).getReg();
3756
3757 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3758
3759 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3760 // sequence later, to guarantee the integrity of the intermediate values.
3761 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3762 CodeModel::Model CM = TM.getCodeModel();
3763 if (STI.isTargetMachO()) {
3764 if (CM != CodeModel::Small && CM != CodeModel::Large)
3765 report_fatal_error("Unsupported code-model for hardened jump-table");
3766 } else {
3767 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3768 assert(STI.isTargetELF() &&
3769 "jump table hardening only supported on MachO/ELF");
3770 if (CM != CodeModel::Small)
3771 report_fatal_error("Unsupported code-model for hardened jump-table");
3772 }
3773
3774 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3775 MIB.buildInstr(AArch64::BR_JumpTable)
3776 .addJumpTableIndex(I.getOperand(1).getIndex());
3777 I.eraseFromParent();
3778 return true;
3779 }
3780
3781 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3782 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3783
3784 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3785 {TargetReg, ScratchReg}, {JTAddr, Index})
3786 .addJumpTableIndex(JTI);
3787 // Save the jump table info.
3788 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3789 {static_cast<int64_t>(JTI)});
3790 // Build the indirect branch.
3791 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3792 I.eraseFromParent();
3793 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3794}
3795
3796bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3798 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3799 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3800
3801 Register DstReg = I.getOperand(0).getReg();
3802 unsigned JTI = I.getOperand(1).getIndex();
3803 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3804 auto MovMI =
3805 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3806 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3808 I.eraseFromParent();
3809 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3810}
3811
3812bool AArch64InstructionSelector::selectTLSGlobalValue(
3814 if (!STI.isTargetMachO())
3815 return false;
3816 MachineFunction &MF = *I.getParent()->getParent();
3817 MF.getFrameInfo().setAdjustsStack(true);
3818
3819 const auto &GlobalOp = I.getOperand(1);
3820 assert(GlobalOp.getOffset() == 0 &&
3821 "Shouldn't have an offset on TLS globals!");
3822 const GlobalValue &GV = *GlobalOp.getGlobal();
3823
3824 auto LoadGOT =
3825 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3826 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3827
3828 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3829 {LoadGOT.getReg(0)})
3830 .addImm(0);
3831
3832 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3833 // TLS calls preserve all registers except those that absolutely must be
3834 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3835 // silly).
3836 unsigned Opcode = getBLRCallOpcode(MF);
3837
3838 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3839 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3840 assert(Opcode == AArch64::BLR);
3841 Opcode = AArch64::BLRAAZ;
3842 }
3843
3844 MIB.buildInstr(Opcode, {}, {Load})
3845 .addUse(AArch64::X0, RegState::Implicit)
3846 .addDef(AArch64::X0, RegState::Implicit)
3847 .addRegMask(TRI.getTLSCallPreservedMask());
3848
3849 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3850 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3851 MRI);
3852 I.eraseFromParent();
3853 return true;
3854}
3855
3856MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3857 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3858 MachineIRBuilder &MIRBuilder) const {
3859 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3860
3861 auto BuildFn = [&](unsigned SubregIndex) {
3862 auto Ins =
3863 MIRBuilder
3864 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3865 .addImm(SubregIndex);
3868 return &*Ins;
3869 };
3870
3871 switch (EltSize) {
3872 case 8:
3873 return BuildFn(AArch64::bsub);
3874 case 16:
3875 return BuildFn(AArch64::hsub);
3876 case 32:
3877 return BuildFn(AArch64::ssub);
3878 case 64:
3879 return BuildFn(AArch64::dsub);
3880 default:
3881 return nullptr;
3882 }
3883}
3884
3886AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3887 MachineIRBuilder &MIB,
3888 MachineRegisterInfo &MRI) const {
3889 LLT DstTy = MRI.getType(DstReg);
3890 const TargetRegisterClass *RC =
3891 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3892 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3893 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3894 return nullptr;
3895 }
3896 unsigned SubReg = 0;
3897 if (!getSubRegForClass(RC, TRI, SubReg))
3898 return nullptr;
3899 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3900 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3901 << DstTy.getSizeInBits() << "\n");
3902 return nullptr;
3903 }
3904 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3905 .addReg(SrcReg, 0, SubReg);
3906 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3907 return Copy;
3908}
3909
3910bool AArch64InstructionSelector::selectMergeValues(
3912 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3913 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3914 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3915 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3916 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3917
3918 if (I.getNumOperands() != 3)
3919 return false;
3920
3921 // Merging 2 s64s into an s128.
3922 if (DstTy == LLT::scalar(128)) {
3923 if (SrcTy.getSizeInBits() != 64)
3924 return false;
3925 Register DstReg = I.getOperand(0).getReg();
3926 Register Src1Reg = I.getOperand(1).getReg();
3927 Register Src2Reg = I.getOperand(2).getReg();
3928 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3929 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3930 /* LaneIdx */ 0, RB, MIB);
3931 if (!InsMI)
3932 return false;
3933 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3934 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3935 if (!Ins2MI)
3936 return false;
3939 I.eraseFromParent();
3940 return true;
3941 }
3942
3943 if (RB.getID() != AArch64::GPRRegBankID)
3944 return false;
3945
3946 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3947 return false;
3948
3949 auto *DstRC = &AArch64::GPR64RegClass;
3950 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3951 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3952 TII.get(TargetOpcode::SUBREG_TO_REG))
3953 .addDef(SubToRegDef)
3954 .addImm(0)
3955 .addUse(I.getOperand(1).getReg())
3956 .addImm(AArch64::sub_32);
3957 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3958 // Need to anyext the second scalar before we can use bfm
3959 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3960 TII.get(TargetOpcode::SUBREG_TO_REG))
3961 .addDef(SubToRegDef2)
3962 .addImm(0)
3963 .addUse(I.getOperand(2).getReg())
3964 .addImm(AArch64::sub_32);
3965 MachineInstr &BFM =
3966 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3967 .addDef(I.getOperand(0).getReg())
3968 .addUse(SubToRegDef)
3969 .addUse(SubToRegDef2)
3970 .addImm(32)
3971 .addImm(31);
3972 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3973 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3975 I.eraseFromParent();
3976 return true;
3977}
3978
3979static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3980 const unsigned EltSize) {
3981 // Choose a lane copy opcode and subregister based off of the size of the
3982 // vector's elements.
3983 switch (EltSize) {
3984 case 8:
3985 CopyOpc = AArch64::DUPi8;
3986 ExtractSubReg = AArch64::bsub;
3987 break;
3988 case 16:
3989 CopyOpc = AArch64::DUPi16;
3990 ExtractSubReg = AArch64::hsub;
3991 break;
3992 case 32:
3993 CopyOpc = AArch64::DUPi32;
3994 ExtractSubReg = AArch64::ssub;
3995 break;
3996 case 64:
3997 CopyOpc = AArch64::DUPi64;
3998 ExtractSubReg = AArch64::dsub;
3999 break;
4000 default:
4001 // Unknown size, bail out.
4002 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
4003 return false;
4004 }
4005 return true;
4006}
4007
4008MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4009 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4010 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4011 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4012 unsigned CopyOpc = 0;
4013 unsigned ExtractSubReg = 0;
4014 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4015 LLVM_DEBUG(
4016 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4017 return nullptr;
4018 }
4019
4020 const TargetRegisterClass *DstRC =
4021 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4022 if (!DstRC) {
4023 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4024 return nullptr;
4025 }
4026
4027 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4028 const LLT &VecTy = MRI.getType(VecReg);
4029 const TargetRegisterClass *VecRC =
4030 getRegClassForTypeOnBank(VecTy, VecRB, true);
4031 if (!VecRC) {
4032 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4033 return nullptr;
4034 }
4035
4036 // The register that we're going to copy into.
4037 Register InsertReg = VecReg;
4038 if (!DstReg)
4039 DstReg = MRI.createVirtualRegister(DstRC);
4040 // If the lane index is 0, we just use a subregister COPY.
4041 if (LaneIdx == 0) {
4042 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4043 .addReg(VecReg, 0, ExtractSubReg);
4044 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4045 return &*Copy;
4046 }
4047
4048 // Lane copies require 128-bit wide registers. If we're dealing with an
4049 // unpacked vector, then we need to move up to that width. Insert an implicit
4050 // def and a subregister insert to get us there.
4051 if (VecTy.getSizeInBits() != 128) {
4052 MachineInstr *ScalarToVector = emitScalarToVector(
4053 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4054 if (!ScalarToVector)
4055 return nullptr;
4056 InsertReg = ScalarToVector->getOperand(0).getReg();
4057 }
4058
4059 MachineInstr *LaneCopyMI =
4060 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4061 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4062
4063 // Make sure that we actually constrain the initial copy.
4064 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4065 return LaneCopyMI;
4066}
4067
4068bool AArch64InstructionSelector::selectExtractElt(
4070 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4071 "unexpected opcode!");
4072 Register DstReg = I.getOperand(0).getReg();
4073 const LLT NarrowTy = MRI.getType(DstReg);
4074 const Register SrcReg = I.getOperand(1).getReg();
4075 const LLT WideTy = MRI.getType(SrcReg);
4076 (void)WideTy;
4077 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4078 "source register size too small!");
4079 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4080
4081 // Need the lane index to determine the correct copy opcode.
4082 MachineOperand &LaneIdxOp = I.getOperand(2);
4083 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4084
4085 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4086 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4087 return false;
4088 }
4089
4090 // Find the index to extract from.
4091 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4092 if (!VRegAndVal)
4093 return false;
4094 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4095
4096
4097 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4098 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4099 LaneIdx, MIB);
4100 if (!Extract)
4101 return false;
4102
4103 I.eraseFromParent();
4104 return true;
4105}
4106
4107bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4109 unsigned NumElts = I.getNumOperands() - 1;
4110 Register SrcReg = I.getOperand(NumElts).getReg();
4111 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4112 const LLT SrcTy = MRI.getType(SrcReg);
4113
4114 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4115 if (SrcTy.getSizeInBits() > 128) {
4116 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4117 return false;
4118 }
4119
4120 // We implement a split vector operation by treating the sub-vectors as
4121 // scalars and extracting them.
4122 const RegisterBank &DstRB =
4123 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4124 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4125 Register Dst = I.getOperand(OpIdx).getReg();
4126 MachineInstr *Extract =
4127 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4128 if (!Extract)
4129 return false;
4130 }
4131 I.eraseFromParent();
4132 return true;
4133}
4134
4135bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4137 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4138 "unexpected opcode");
4139
4140 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4141 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4142 AArch64::FPRRegBankID ||
4143 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4144 AArch64::FPRRegBankID) {
4145 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4146 "currently unsupported.\n");
4147 return false;
4148 }
4149
4150 // The last operand is the vector source register, and every other operand is
4151 // a register to unpack into.
4152 unsigned NumElts = I.getNumOperands() - 1;
4153 Register SrcReg = I.getOperand(NumElts).getReg();
4154 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4155 const LLT WideTy = MRI.getType(SrcReg);
4156 (void)WideTy;
4157 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4158 "can only unmerge from vector or s128 types!");
4159 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4160 "source register size too small!");
4161
4162 if (!NarrowTy.isScalar())
4163 return selectSplitVectorUnmerge(I, MRI);
4164
4165 // Choose a lane copy opcode and subregister based off of the size of the
4166 // vector's elements.
4167 unsigned CopyOpc = 0;
4168 unsigned ExtractSubReg = 0;
4169 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4170 return false;
4171
4172 // Set up for the lane copies.
4173 MachineBasicBlock &MBB = *I.getParent();
4174
4175 // Stores the registers we'll be copying from.
4176 SmallVector<Register, 4> InsertRegs;
4177
4178 // We'll use the first register twice, so we only need NumElts-1 registers.
4179 unsigned NumInsertRegs = NumElts - 1;
4180
4181 // If our elements fit into exactly 128 bits, then we can copy from the source
4182 // directly. Otherwise, we need to do a bit of setup with some subregister
4183 // inserts.
4184 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4185 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4186 } else {
4187 // No. We have to perform subregister inserts. For each insert, create an
4188 // implicit def and a subregister insert, and save the register we create.
4189 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4190 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4191 *RBI.getRegBank(SrcReg, MRI, TRI));
4192 unsigned SubReg = 0;
4193 bool Found = getSubRegForClass(RC, TRI, SubReg);
4194 (void)Found;
4195 assert(Found && "expected to find last operand's subeg idx");
4196 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4197 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4198 MachineInstr &ImpDefMI =
4199 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4200 ImpDefReg);
4201
4202 // Now, create the subregister insert from SrcReg.
4203 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4204 MachineInstr &InsMI =
4205 *BuildMI(MBB, I, I.getDebugLoc(),
4206 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4207 .addUse(ImpDefReg)
4208 .addUse(SrcReg)
4209 .addImm(SubReg);
4210
4211 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4213
4214 // Save the register so that we can copy from it after.
4215 InsertRegs.push_back(InsertReg);
4216 }
4217 }
4218
4219 // Now that we've created any necessary subregister inserts, we can
4220 // create the copies.
4221 //
4222 // Perform the first copy separately as a subregister copy.
4223 Register CopyTo = I.getOperand(0).getReg();
4224 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4225 .addReg(InsertRegs[0], 0, ExtractSubReg);
4226 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4227
4228 // Now, perform the remaining copies as vector lane copies.
4229 unsigned LaneIdx = 1;
4230 for (Register InsReg : InsertRegs) {
4231 Register CopyTo = I.getOperand(LaneIdx).getReg();
4232 MachineInstr &CopyInst =
4233 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4234 .addUse(InsReg)
4235 .addImm(LaneIdx);
4236 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4237 ++LaneIdx;
4238 }
4239
4240 // Separately constrain the first copy's destination. Because of the
4241 // limitation in constrainOperandRegClass, we can't guarantee that this will
4242 // actually be constrained. So, do it ourselves using the second operand.
4243 const TargetRegisterClass *RC =
4244 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4245 if (!RC) {
4246 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4247 return false;
4248 }
4249
4250 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4251 I.eraseFromParent();
4252 return true;
4253}
4254
4255bool AArch64InstructionSelector::selectConcatVectors(
4257 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4258 "Unexpected opcode");
4259 Register Dst = I.getOperand(0).getReg();
4260 Register Op1 = I.getOperand(1).getReg();
4261 Register Op2 = I.getOperand(2).getReg();
4262 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4263 if (!ConcatMI)
4264 return false;
4265 I.eraseFromParent();
4266 return true;
4267}
4268
4269unsigned
4270AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4271 MachineFunction &MF) const {
4272 Type *CPTy = CPVal->getType();
4273 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4274
4276 return MCP->getConstantPoolIndex(CPVal, Alignment);
4277}
4278
4279MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4280 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4281 const TargetRegisterClass *RC;
4282 unsigned Opc;
4283 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4284 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4285 switch (Size) {
4286 case 16:
4287 RC = &AArch64::FPR128RegClass;
4288 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4289 break;
4290 case 8:
4291 RC = &AArch64::FPR64RegClass;
4292 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4293 break;
4294 case 4:
4295 RC = &AArch64::FPR32RegClass;
4296 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4297 break;
4298 case 2:
4299 RC = &AArch64::FPR16RegClass;
4300 Opc = AArch64::LDRHui;
4301 break;
4302 default:
4303 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4304 << *CPVal->getType());
4305 return nullptr;
4306 }
4307
4308 MachineInstr *LoadMI = nullptr;
4309 auto &MF = MIRBuilder.getMF();
4310 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4311 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4312 // Use load(literal) for tiny code model.
4313 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4314 } else {
4315 auto Adrp =
4316 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4317 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4318
4319 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4320 .addConstantPoolIndex(
4322
4324 }
4325
4327 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4329 Size, Align(Size)));
4331 return LoadMI;
4332}
4333
4334/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4335/// size and RB.
4336static std::pair<unsigned, unsigned>
4337getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4338 unsigned Opc, SubregIdx;
4339 if (RB.getID() == AArch64::GPRRegBankID) {
4340 if (EltSize == 8) {
4341 Opc = AArch64::INSvi8gpr;
4342 SubregIdx = AArch64::bsub;
4343 } else if (EltSize == 16) {
4344 Opc = AArch64::INSvi16gpr;
4345 SubregIdx = AArch64::ssub;
4346 } else if (EltSize == 32) {
4347 Opc = AArch64::INSvi32gpr;
4348 SubregIdx = AArch64::ssub;
4349 } else if (EltSize == 64) {
4350 Opc = AArch64::INSvi64gpr;
4351 SubregIdx = AArch64::dsub;
4352 } else {
4353 llvm_unreachable("invalid elt size!");
4354 }
4355 } else {
4356 if (EltSize == 8) {
4357 Opc = AArch64::INSvi8lane;
4358 SubregIdx = AArch64::bsub;
4359 } else if (EltSize == 16) {
4360 Opc = AArch64::INSvi16lane;
4361 SubregIdx = AArch64::hsub;
4362 } else if (EltSize == 32) {
4363 Opc = AArch64::INSvi32lane;
4364 SubregIdx = AArch64::ssub;
4365 } else if (EltSize == 64) {
4366 Opc = AArch64::INSvi64lane;
4367 SubregIdx = AArch64::dsub;
4368 } else {
4369 llvm_unreachable("invalid elt size!");
4370 }
4371 }
4372 return std::make_pair(Opc, SubregIdx);
4373}
4374
4375MachineInstr *AArch64InstructionSelector::emitInstr(
4376 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4377 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4378 const ComplexRendererFns &RenderFns) const {
4379 assert(Opcode && "Expected an opcode?");
4380 assert(!isPreISelGenericOpcode(Opcode) &&
4381 "Function should only be used to produce selected instructions!");
4382 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4383 if (RenderFns)
4384 for (auto &Fn : *RenderFns)
4385 Fn(MI);
4387 return &*MI;
4388}
4389
4390MachineInstr *AArch64InstructionSelector::emitAddSub(
4391 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4392 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4393 MachineIRBuilder &MIRBuilder) const {
4394 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4395 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4396 auto Ty = MRI.getType(LHS.getReg());
4397 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4398 unsigned Size = Ty.getSizeInBits();
4399 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4400 bool Is32Bit = Size == 32;
4401
4402 // INSTRri form with positive arithmetic immediate.
4403 if (auto Fns = selectArithImmed(RHS))
4404 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4405 MIRBuilder, Fns);
4406
4407 // INSTRri form with negative arithmetic immediate.
4408 if (auto Fns = selectNegArithImmed(RHS))
4409 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4410 MIRBuilder, Fns);
4411
4412 // INSTRrx form.
4413 if (auto Fns = selectArithExtendedRegister(RHS))
4414 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4415 MIRBuilder, Fns);
4416
4417 // INSTRrs form.
4418 if (auto Fns = selectShiftedRegister(RHS))
4419 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4420 MIRBuilder, Fns);
4421 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4422 MIRBuilder);
4423}
4424
4426AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4427 MachineOperand &RHS,
4428 MachineIRBuilder &MIRBuilder) const {
4429 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4430 {{AArch64::ADDXri, AArch64::ADDWri},
4431 {AArch64::ADDXrs, AArch64::ADDWrs},
4432 {AArch64::ADDXrr, AArch64::ADDWrr},
4433 {AArch64::SUBXri, AArch64::SUBWri},
4434 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4435 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4436}
4437
4439AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4440 MachineOperand &RHS,
4441 MachineIRBuilder &MIRBuilder) const {
4442 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4443 {{AArch64::ADDSXri, AArch64::ADDSWri},
4444 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4445 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4446 {AArch64::SUBSXri, AArch64::SUBSWri},
4447 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4448 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4449}
4450
4452AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4453 MachineOperand &RHS,
4454 MachineIRBuilder &MIRBuilder) const {
4455 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4456 {{AArch64::SUBSXri, AArch64::SUBSWri},
4457 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4458 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4459 {AArch64::ADDSXri, AArch64::ADDSWri},
4460 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4461 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4462}
4463
4465AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4466 MachineOperand &RHS,
4467 MachineIRBuilder &MIRBuilder) const {
4468 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4469 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4470 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4471 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4472 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4473}
4474
4476AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4477 MachineOperand &RHS,
4478 MachineIRBuilder &MIRBuilder) const {
4479 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4480 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4481 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4482 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4483 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4484}
4485
4487AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4488 MachineIRBuilder &MIRBuilder) const {
4489 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4490 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4491 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4492 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4493}
4494
4496AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4497 MachineIRBuilder &MIRBuilder) const {
4498 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4499 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500 LLT Ty = MRI.getType(LHS.getReg());
4501 unsigned RegSize = Ty.getSizeInBits();
4502 bool Is32Bit = (RegSize == 32);
4503 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4504 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4505 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4506 // ANDS needs a logical immediate for its immediate form. Check if we can
4507 // fold one in.
4508 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4509 int64_t Imm = ValAndVReg->Value.getSExtValue();
4510
4512 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4515 return &*TstMI;
4516 }
4517 }
4518
4519 if (auto Fns = selectLogicalShiftedRegister(RHS))
4520 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4521 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4522}
4523
4524MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4525 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4526 MachineIRBuilder &MIRBuilder) const {
4527 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4528 assert(Predicate.isPredicate() && "Expected predicate?");
4529 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4530 LLT CmpTy = MRI.getType(LHS.getReg());
4531 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4532 unsigned Size = CmpTy.getSizeInBits();
4533 (void)Size;
4534 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4535 // Fold the compare into a cmn or tst if possible.
4536 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4537 return FoldCmp;
4538 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4539 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4540}
4541
4542MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4543 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4544 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4545#ifndef NDEBUG
4546 LLT Ty = MRI.getType(Dst);
4547 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4548 "Expected a 32-bit scalar register?");
4549#endif
4550 const Register ZReg = AArch64::WZR;
4551 AArch64CC::CondCode CC1, CC2;
4552 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4553 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4554 if (CC2 == AArch64CC::AL)
4555 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4556 MIRBuilder);
4557 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4558 Register Def1Reg = MRI.createVirtualRegister(RC);
4559 Register Def2Reg = MRI.createVirtualRegister(RC);
4560 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4561 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4562 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4563 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4565 return &*OrMI;
4566}
4567
4568MachineInstr *AArch64InstructionSelector::emitFPCompare(
4569 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4570 std::optional<CmpInst::Predicate> Pred) const {
4571 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4572 LLT Ty = MRI.getType(LHS);
4573 if (Ty.isVector())
4574 return nullptr;
4575 unsigned OpSize = Ty.getSizeInBits();
4576 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4577
4578 // If this is a compare against +0.0, then we don't have
4579 // to explicitly materialize a constant.
4580 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4581 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4582
4583 auto IsEqualityPred = [](CmpInst::Predicate P) {
4584 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4586 };
4587 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4588 // Try commutating the operands.
4589 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4590 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4591 ShouldUseImm = true;
4592 std::swap(LHS, RHS);
4593 }
4594 }
4595 unsigned CmpOpcTbl[2][3] = {
4596 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4597 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4598 unsigned CmpOpc =
4599 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4600
4601 // Partially build the compare. Decide if we need to add a use for the
4602 // third operand based off whether or not we're comparing against 0.0.
4603 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4605 if (!ShouldUseImm)
4606 CmpMI.addUse(RHS);
4608 return &*CmpMI;
4609}
4610
4611MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4612 std::optional<Register> Dst, Register Op1, Register Op2,
4613 MachineIRBuilder &MIRBuilder) const {
4614 // We implement a vector concat by:
4615 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4616 // 2. Insert the upper vector into the destination's upper element
4617 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4618 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4619
4620 const LLT Op1Ty = MRI.getType(Op1);
4621 const LLT Op2Ty = MRI.getType(Op2);
4622
4623 if (Op1Ty != Op2Ty) {
4624 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4625 return nullptr;
4626 }
4627 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4628
4629 if (Op1Ty.getSizeInBits() >= 128) {
4630 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4631 return nullptr;
4632 }
4633
4634 // At the moment we just support 64 bit vector concats.
4635 if (Op1Ty.getSizeInBits() != 64) {
4636 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4637 return nullptr;
4638 }
4639
4640 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4641 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4642 const TargetRegisterClass *DstRC =
4643 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4644
4645 MachineInstr *WidenedOp1 =
4646 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4647 MachineInstr *WidenedOp2 =
4648 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4649 if (!WidenedOp1 || !WidenedOp2) {
4650 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4651 return nullptr;
4652 }
4653
4654 // Now do the insert of the upper element.
4655 unsigned InsertOpc, InsSubRegIdx;
4656 std::tie(InsertOpc, InsSubRegIdx) =
4657 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4658
4659 if (!Dst)
4660 Dst = MRI.createVirtualRegister(DstRC);
4661 auto InsElt =
4662 MIRBuilder
4663 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4664 .addImm(1) /* Lane index */
4665 .addUse(WidenedOp2->getOperand(0).getReg())
4666 .addImm(0);
4668 return &*InsElt;
4669}
4670
4672AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4673 Register Src2, AArch64CC::CondCode Pred,
4674 MachineIRBuilder &MIRBuilder) const {
4675 auto &MRI = *MIRBuilder.getMRI();
4676 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4677 // If we used a register class, then this won't necessarily have an LLT.