LLVM 20.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
354 AArch64CC::CondCode Predicate,
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
359 AArch64CC::CondCode Predicate,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508 MachineOperand &Predicate,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
626 }
627
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
630 default:
631 return nullptr;
632 case 8:
633 return &AArch64::FPR8RegClass;
634 case 16:
635 return &AArch64::FPR16RegClass;
636 case 32:
637 return &AArch64::FPR32RegClass;
638 case 64:
639 return &AArch64::FPR64RegClass;
640 case 128:
641 return &AArch64::FPR128RegClass;
642 }
643 }
644
645 return nullptr;
646}
647
648/// Returns the correct subregister to use for a given register class.
650 const TargetRegisterInfo &TRI, unsigned &SubReg) {
651 switch (TRI.getRegSizeInBits(*RC)) {
652 case 8:
653 SubReg = AArch64::bsub;
654 break;
655 case 16:
656 SubReg = AArch64::hsub;
657 break;
658 case 32:
659 if (RC != &AArch64::FPR32RegClass)
660 SubReg = AArch64::sub_32;
661 else
662 SubReg = AArch64::ssub;
663 break;
664 case 64:
665 SubReg = AArch64::dsub;
666 break;
667 default:
669 dbgs() << "Couldn't find appropriate subregister for register class.");
670 return false;
671 }
672
673 return true;
674}
675
676/// Returns the minimum size the given register bank can hold.
677static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
678 switch (RB.getID()) {
679 case AArch64::GPRRegBankID:
680 return 32;
681 case AArch64::FPRRegBankID:
682 return 8;
683 default:
684 llvm_unreachable("Tried to get minimum size for unknown register bank.");
685 }
686}
687
688/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
689/// Helper function for functions like createDTuple and createQTuple.
690///
691/// \p RegClassIDs - The list of register class IDs available for some tuple of
692/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
693/// expected to contain between 2 and 4 tuple classes.
694///
695/// \p SubRegs - The list of subregister classes associated with each register
696/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
697/// subregister class. The index of each subregister class is expected to
698/// correspond with the index of each register class.
699///
700/// \returns Either the destination register of REG_SEQUENCE instruction that
701/// was created, or the 0th element of \p Regs if \p Regs contains a single
702/// element.
704 const unsigned RegClassIDs[],
705 const unsigned SubRegs[], MachineIRBuilder &MIB) {
706 unsigned NumRegs = Regs.size();
707 if (NumRegs == 1)
708 return Regs[0];
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
713 auto RegSequence =
714 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
716 RegSequence.addUse(Regs[I]);
717 RegSequence.addImm(SubRegs[I]);
718 }
719 return RegSequence.getReg(0);
720}
721
722/// Create a tuple of D-registers using the registers in \p Regs.
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
729}
730
731/// Create a tuple of Q-registers using the registers in \p Regs.
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
738}
739
740static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
741 auto &MI = *Root.getParent();
742 auto &MBB = *MI.getParent();
743 auto &MF = *MBB.getParent();
744 auto &MRI = MF.getRegInfo();
745 uint64_t Immed;
746 if (Root.isImm())
747 Immed = Root.getImm();
748 else if (Root.isCImm())
749 Immed = Root.getCImm()->getZExtValue();
750 else if (Root.isReg()) {
751 auto ValAndVReg =
753 if (!ValAndVReg)
754 return std::nullopt;
755 Immed = ValAndVReg->Value.getSExtValue();
756 } else
757 return std::nullopt;
758 return Immed;
759}
760
761/// Check whether \p I is a currently unsupported binary operation:
762/// - it has an unsized type
763/// - an operand is not a vreg
764/// - all operands are not in the same bank
765/// These are checks that should someday live in the verifier, but right now,
766/// these are mostly limitations of the aarch64 selector.
767static bool unsupportedBinOp(const MachineInstr &I,
768 const AArch64RegisterBankInfo &RBI,
770 const AArch64RegisterInfo &TRI) {
771 LLT Ty = MRI.getType(I.getOperand(0).getReg());
772 if (!Ty.isValid()) {
773 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
774 return true;
775 }
776
777 const RegisterBank *PrevOpBank = nullptr;
778 for (auto &MO : I.operands()) {
779 // FIXME: Support non-register operands.
780 if (!MO.isReg()) {
781 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
782 return true;
783 }
784
785 // FIXME: Can generic operations have physical registers operands? If
786 // so, this will need to be taught about that, and we'll need to get the
787 // bank out of the minimal class for the register.
788 // Either way, this needs to be documented (and possibly verified).
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
791 return true;
792 }
793
794 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
795 if (!OpBank) {
796 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
797 return true;
798 }
799
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
802 return true;
803 }
804 PrevOpBank = OpBank;
805 }
806 return false;
807}
808
809/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
810/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
811/// and of size \p OpSize.
812/// \returns \p GenericOpc if the combination is unsupported.
813static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
814 unsigned OpSize) {
815 switch (RegBankID) {
816 case AArch64::GPRRegBankID:
817 if (OpSize == 32) {
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
825 default:
826 return GenericOpc;
827 }
828 } else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
838 default:
839 return GenericOpc;
840 }
841 }
842 break;
843 case AArch64::FPRRegBankID:
844 switch (OpSize) {
845 case 32:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
855 default:
856 return GenericOpc;
857 }
858 case 64:
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
870 default:
871 return GenericOpc;
872 }
873 }
874 break;
875 }
876 return GenericOpc;
877}
878
879/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
880/// appropriate for the (value) register bank \p RegBankID and of memory access
881/// size \p OpSize. This returns the variant with the base+unsigned-immediate
882/// addressing mode (e.g., LDRXui).
883/// \returns \p GenericOpc if the combination is unsupported.
884static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
885 unsigned OpSize) {
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
887 switch (RegBankID) {
888 case AArch64::GPRRegBankID:
889 switch (OpSize) {
890 case 8:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
892 case 16:
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
894 case 32:
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
896 case 64:
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
898 }
899 break;
900 case AArch64::FPRRegBankID:
901 switch (OpSize) {
902 case 8:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
904 case 16:
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
906 case 32:
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
908 case 64:
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
910 case 128:
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
912 }
913 break;
914 }
915 return GenericOpc;
916}
917
918/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
919/// to \p *To.
920///
921/// E.g "To = COPY SrcReg:SubReg"
923 const RegisterBankInfo &RBI, Register SrcReg,
924 const TargetRegisterClass *To, unsigned SubReg) {
925 assert(SrcReg.isValid() && "Expected a valid source register?");
926 assert(To && "Destination register class cannot be null");
927 assert(SubReg && "Expected a valid subregister");
928
929 MachineIRBuilder MIB(I);
930 auto SubRegCopy =
931 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
932 MachineOperand &RegOp = I.getOperand(1);
933 RegOp.setReg(SubRegCopy.getReg(0));
934
935 // It's possible that the destination register won't be constrained. Make
936 // sure that happens.
937 if (!I.getOperand(0).getReg().isPhysical())
938 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
939
940 return true;
941}
942
943/// Helper function to get the source and destination register classes for a
944/// copy. Returns a std::pair containing the source register class for the
945/// copy, and the destination register class for the copy. If a register class
946/// cannot be determined, then it will be nullptr.
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
950 const RegisterBankInfo &RBI) {
951 Register DstReg = I.getOperand(0).getReg();
952 Register SrcReg = I.getOperand(1).getReg();
953 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
954 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
955
956 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
957 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
958
959 // Special casing for cross-bank copies of s1s. We can technically represent
960 // a 1-bit value with any size of register. The minimum size for a GPR is 32
961 // bits. So, we need to put the FPR on 32 bits as well.
962 //
963 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
964 // then we can pull it into the helpers that get the appropriate class for a
965 // register bank. Or make a new helper that carries along some constraint
966 // information.
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
968 SrcSize = DstSize = TypeSize::getFixed(32);
969
970 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
971 getMinClassForRegBank(DstRegBank, DstSize, true)};
972}
973
974// FIXME: We need some sort of API in RBI/TRI to allow generic code to
975// constrain operands of simple instructions given a TargetRegisterClass
976// and LLT
978 const RegisterBankInfo &RBI) {
979 for (MachineOperand &MO : I.operands()) {
980 if (!MO.isReg())
981 continue;
982 Register Reg = MO.getReg();
983 if (!Reg)
984 continue;
985 if (Reg.isPhysical())
986 continue;
987 LLT Ty = MRI.getType(Reg);
988 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
989 const TargetRegisterClass *RC =
990 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
991 if (!RC) {
992 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
993 RC = getRegClassForTypeOnBank(Ty, RB);
994 if (!RC) {
996 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
997 break;
998 }
999 }
1000 RBI.constrainGenericRegister(Reg, *RC, MRI);
1001 }
1002
1003 return true;
1004}
1005
1008 const RegisterBankInfo &RBI) {
1009 Register DstReg = I.getOperand(0).getReg();
1010 Register SrcReg = I.getOperand(1).getReg();
1011 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1012 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1013
1014 // Find the correct register classes for the source and destination registers.
1015 const TargetRegisterClass *SrcRC;
1016 const TargetRegisterClass *DstRC;
1017 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1018
1019 if (!DstRC) {
1020 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1021 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1022 return false;
1023 }
1024
1025 // Is this a copy? If so, then we may need to insert a subregister copy.
1026 if (I.isCopy()) {
1027 // Yes. Check if there's anything to fix up.
1028 if (!SrcRC) {
1029 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1030 return false;
1031 }
1032
1033 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1035 unsigned SubReg;
1036
1037 // If the source bank doesn't support a subregister copy small enough,
1038 // then we first need to copy to the destination bank.
1039 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1040 const TargetRegisterClass *DstTempRC =
1041 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1042 getSubRegForClass(DstRC, TRI, SubReg);
1043
1044 MachineIRBuilder MIB(I);
1045 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1046 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1047 } else if (SrcSize > DstSize) {
1048 // If the source register is bigger than the destination we need to
1049 // perform a subregister copy.
1050 const TargetRegisterClass *SubRegRC =
1051 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1052 getSubRegForClass(SubRegRC, TRI, SubReg);
1053 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1054 } else if (DstSize > SrcSize) {
1055 // If the destination register is bigger than the source we need to do
1056 // a promotion using SUBREG_TO_REG.
1057 const TargetRegisterClass *PromotionRC =
1058 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1059 getSubRegForClass(SrcRC, TRI, SubReg);
1060
1061 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1062 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1064 .addImm(0)
1065 .addUse(SrcReg)
1066 .addImm(SubReg);
1067 MachineOperand &RegOp = I.getOperand(1);
1068 RegOp.setReg(PromoteReg);
1069 }
1070
1071 // If the destination is a physical register, then there's nothing to
1072 // change, so we're done.
1073 if (DstReg.isPhysical())
1074 return true;
1075 }
1076
1077 // No need to constrain SrcReg. It will get constrained when we hit another
1078 // of its use or its defs. Copies do not have constraints.
1079 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1080 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1081 << " operand\n");
1082 return false;
1083 }
1084
1085 // If this a GPR ZEXT that we want to just reduce down into a copy.
1086 // The sizes will be mismatched with the source < 32b but that's ok.
1087 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(TII.get(AArch64::COPY));
1089 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1090 return selectCopy(I, TII, MRI, TRI, RBI);
1091 }
1092
1093 I.setDesc(TII.get(AArch64::COPY));
1094 return true;
1095}
1096
1097static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1098 if (!DstTy.isScalar() || !SrcTy.isScalar())
1099 return GenericOpc;
1100
1101 const unsigned DstSize = DstTy.getSizeInBits();
1102 const unsigned SrcSize = SrcTy.getSizeInBits();
1103
1104 switch (DstSize) {
1105 case 32:
1106 switch (SrcSize) {
1107 case 32:
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1117 default:
1118 return GenericOpc;
1119 }
1120 case 64:
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1130 default:
1131 return GenericOpc;
1132 }
1133 default:
1134 return GenericOpc;
1135 }
1136 case 64:
1137 switch (SrcSize) {
1138 case 32:
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1148 default:
1149 return GenericOpc;
1150 }
1151 case 64:
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1161 default:
1162 return GenericOpc;
1163 }
1164 default:
1165 return GenericOpc;
1166 }
1167 default:
1168 return GenericOpc;
1169 };
1170 return GenericOpc;
1171}
1172
1174AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1176 MachineIRBuilder &MIB) const {
1177 MachineRegisterInfo &MRI = *MIB.getMRI();
1178 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1179 RBI.getRegBank(True, MRI, TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty = MRI.getType(True);
1182 if (Ty.isVector())
1183 return nullptr;
1184 const unsigned Size = Ty.getSizeInBits();
1185 assert((Size == 32 || Size == 64) &&
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit = Size == 32;
1188 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1192 return &*FCSel;
1193 }
1194
1195 // By default, we'll try and emit a CSEL.
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1197 bool Optimized = false;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1199 &Optimized](Register &Reg, Register &OtherReg,
1200 bool Invert) {
1201 if (Optimized)
1202 return false;
1203
1204 // Attempt to fold:
1205 //
1206 // %sub = G_SUB 0, %x
1207 // %select = G_SELECT cc, %reg, %sub
1208 //
1209 // Into:
1210 // %select = CSNEG %reg, %x, cc
1211 Register MatchReg;
1212 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214 Reg = MatchReg;
1215 if (Invert) {
1217 std::swap(Reg, OtherReg);
1218 }
1219 return true;
1220 }
1221
1222 // Attempt to fold:
1223 //
1224 // %xor = G_XOR %x, -1
1225 // %select = G_SELECT cc, %reg, %xor
1226 //
1227 // Into:
1228 // %select = CSINV %reg, %x, cc
1229 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231 Reg = MatchReg;
1232 if (Invert) {
1234 std::swap(Reg, OtherReg);
1235 }
1236 return true;
1237 }
1238
1239 // Attempt to fold:
1240 //
1241 // %add = G_ADD %x, 1
1242 // %select = G_SELECT cc, %reg, %add
1243 //
1244 // Into:
1245 // %select = CSINC %reg, %x, cc
1246 if (mi_match(Reg, MRI,
1247 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1248 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1250 Reg = MatchReg;
1251 if (Invert) {
1253 std::swap(Reg, OtherReg);
1254 }
1255 return true;
1256 }
1257
1258 return false;
1259 };
1260
1261 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1262 // true/false values are constants.
1263 // FIXME: All of these patterns already exist in tablegen. We should be
1264 // able to import these.
1265 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1266 &Optimized]() {
1267 if (Optimized)
1268 return false;
1269 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1270 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1271 if (!TrueCst && !FalseCst)
1272 return false;
1273
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t T = TrueCst->Value.getSExtValue();
1277 int64_t F = FalseCst->Value.getSExtValue();
1278
1279 if (T == 0 && F == 1) {
1280 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1282 True = ZReg;
1283 False = ZReg;
1284 return true;
1285 }
1286
1287 if (T == 0 && F == -1) {
1288 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1290 True = ZReg;
1291 False = ZReg;
1292 return true;
1293 }
1294 }
1295
1296 if (TrueCst) {
1297 int64_t T = TrueCst->Value.getSExtValue();
1298 if (T == 1) {
1299 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1301 True = False;
1302 False = ZReg;
1304 return true;
1305 }
1306
1307 if (T == -1) {
1308 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1310 True = False;
1311 False = ZReg;
1313 return true;
1314 }
1315 }
1316
1317 if (FalseCst) {
1318 int64_t F = FalseCst->Value.getSExtValue();
1319 if (F == 1) {
1320 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1322 False = ZReg;
1323 return true;
1324 }
1325
1326 if (F == -1) {
1327 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1329 False = ZReg;
1330 return true;
1331 }
1332 }
1333 return false;
1334 };
1335
1336 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1338 Optimized |= TryOptSelectCst();
1339 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1341 return &*SelectInst;
1342}
1343
1345 switch (P) {
1346 default:
1347 llvm_unreachable("Unknown condition code!");
1348 case CmpInst::ICMP_NE:
1349 return AArch64CC::NE;
1350 case CmpInst::ICMP_EQ:
1351 return AArch64CC::EQ;
1352 case CmpInst::ICMP_SGT:
1353 return AArch64CC::GT;
1354 case CmpInst::ICMP_SGE:
1355 return AArch64CC::GE;
1356 case CmpInst::ICMP_SLT:
1357 return AArch64CC::LT;
1358 case CmpInst::ICMP_SLE:
1359 return AArch64CC::LE;
1360 case CmpInst::ICMP_UGT:
1361 return AArch64CC::HI;
1362 case CmpInst::ICMP_UGE:
1363 return AArch64CC::HS;
1364 case CmpInst::ICMP_ULT:
1365 return AArch64CC::LO;
1366 case CmpInst::ICMP_ULE:
1367 return AArch64CC::LS;
1368 }
1369}
1370
1371/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1373 AArch64CC::CondCode &CondCode,
1374 AArch64CC::CondCode &CondCode2) {
1375 CondCode2 = AArch64CC::AL;
1376 switch (CC) {
1377 default:
1378 llvm_unreachable("Unknown FP condition!");
1379 case CmpInst::FCMP_OEQ:
1380 CondCode = AArch64CC::EQ;
1381 break;
1382 case CmpInst::FCMP_OGT:
1383 CondCode = AArch64CC::GT;
1384 break;
1385 case CmpInst::FCMP_OGE:
1386 CondCode = AArch64CC::GE;
1387 break;
1388 case CmpInst::FCMP_OLT:
1389 CondCode = AArch64CC::MI;
1390 break;
1391 case CmpInst::FCMP_OLE:
1392 CondCode = AArch64CC::LS;
1393 break;
1394 case CmpInst::FCMP_ONE:
1395 CondCode = AArch64CC::MI;
1396 CondCode2 = AArch64CC::GT;
1397 break;
1398 case CmpInst::FCMP_ORD:
1399 CondCode = AArch64CC::VC;
1400 break;
1401 case CmpInst::FCMP_UNO:
1402 CondCode = AArch64CC::VS;
1403 break;
1404 case CmpInst::FCMP_UEQ:
1405 CondCode = AArch64CC::EQ;
1406 CondCode2 = AArch64CC::VS;
1407 break;
1408 case CmpInst::FCMP_UGT:
1409 CondCode = AArch64CC::HI;
1410 break;
1411 case CmpInst::FCMP_UGE:
1412 CondCode = AArch64CC::PL;
1413 break;
1414 case CmpInst::FCMP_ULT:
1415 CondCode = AArch64CC::LT;
1416 break;
1417 case CmpInst::FCMP_ULE:
1418 CondCode = AArch64CC::LE;
1419 break;
1420 case CmpInst::FCMP_UNE:
1421 CondCode = AArch64CC::NE;
1422 break;
1423 }
1424}
1425
1426/// Convert an IR fp condition code to an AArch64 CC.
1427/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1428/// should be AND'ed instead of OR'ed.
1430 AArch64CC::CondCode &CondCode,
1431 AArch64CC::CondCode &CondCode2) {
1432 CondCode2 = AArch64CC::AL;
1433 switch (CC) {
1434 default:
1435 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1436 assert(CondCode2 == AArch64CC::AL);
1437 break;
1438 case CmpInst::FCMP_ONE:
1439 // (a one b)
1440 // == ((a olt b) || (a ogt b))
1441 // == ((a ord b) && (a une b))
1442 CondCode = AArch64CC::VC;
1443 CondCode2 = AArch64CC::NE;
1444 break;
1445 case CmpInst::FCMP_UEQ:
1446 // (a ueq b)
1447 // == ((a uno b) || (a oeq b))
1448 // == ((a ule b) && (a uge b))
1449 CondCode = AArch64CC::PL;
1450 CondCode2 = AArch64CC::LE;
1451 break;
1452 }
1453}
1454
1455/// Return a register which can be used as a bit to test in a TB(N)Z.
1456static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1458 assert(Reg.isValid() && "Expected valid register!");
1459 bool HasZext = false;
1460 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1461 unsigned Opc = MI->getOpcode();
1462
1463 if (!MI->getOperand(0).isReg() ||
1464 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1465 break;
1466
1467 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1468 //
1469 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1470 // on the truncated x is the same as the bit number on x.
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1474 HasZext = true;
1475
1476 Register NextReg = MI->getOperand(1).getReg();
1477 // Did we find something worth folding?
1478 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1479 break;
1480
1481 // NextReg is worth folding. Keep looking.
1482 Reg = NextReg;
1483 continue;
1484 }
1485
1486 // Attempt to find a suitable operation with a constant on one side.
1487 std::optional<uint64_t> C;
1488 Register TestReg;
1489 switch (Opc) {
1490 default:
1491 break;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg = MI->getOperand(1).getReg();
1495 Register ConstantReg = MI->getOperand(2).getReg();
1496 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1497 if (!VRegAndVal) {
1498 // AND commutes, check the other side for a constant.
1499 // FIXME: Can we canonicalize the constant so that it's always on the
1500 // same side at some point earlier?
1501 std::swap(ConstantReg, TestReg);
1502 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1503 }
1504 if (VRegAndVal) {
1505 if (HasZext)
1506 C = VRegAndVal->Value.getZExtValue();
1507 else
1508 C = VRegAndVal->Value.getSExtValue();
1509 }
1510 break;
1511 }
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg = MI->getOperand(1).getReg();
1516 auto VRegAndVal =
1517 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1518 if (VRegAndVal)
1519 C = VRegAndVal->Value.getSExtValue();
1520 break;
1521 }
1522 }
1523
1524 // Didn't find a constant or viable register. Bail out of the loop.
1525 if (!C || !TestReg.isValid())
1526 break;
1527
1528 // We found a suitable instruction with a constant. Check to see if we can
1529 // walk through the instruction.
1530 Register NextReg;
1531 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1532 switch (Opc) {
1533 default:
1534 break;
1535 case TargetOpcode::G_AND:
1536 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1537 if ((*C >> Bit) & 1)
1538 NextReg = TestReg;
1539 break;
1540 case TargetOpcode::G_SHL:
1541 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1542 // the type of the register.
1543 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1544 NextReg = TestReg;
1545 Bit = Bit - *C;
1546 }
1547 break;
1548 case TargetOpcode::G_ASHR:
1549 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1550 // in x
1551 NextReg = TestReg;
1552 Bit = Bit + *C;
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1555 break;
1556 case TargetOpcode::G_LSHR:
1557 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1558 if ((Bit + *C) < TestRegSize) {
1559 NextReg = TestReg;
1560 Bit = Bit + *C;
1561 }
1562 break;
1563 case TargetOpcode::G_XOR:
1564 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1565 // appropriate.
1566 //
1567 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1568 //
1569 // tbz x', b -> tbnz x, b
1570 //
1571 // Because x' only has the b-th bit set if x does not.
1572 if ((*C >> Bit) & 1)
1573 Invert = !Invert;
1574 NextReg = TestReg;
1575 break;
1576 }
1577
1578 // Check if we found anything worth folding.
1579 if (!NextReg.isValid())
1580 return Reg;
1581 Reg = NextReg;
1582 }
1583
1584 return Reg;
1585}
1586
1587MachineInstr *AArch64InstructionSelector::emitTestBit(
1588 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1589 MachineIRBuilder &MIB) const {
1590 assert(TestReg.isValid());
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1593 MachineRegisterInfo &MRI = *MIB.getMRI();
1594
1595 // Attempt to optimize the test bit by walking over instructions.
1596 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1597 LLT Ty = MRI.getType(TestReg);
1598 unsigned Size = Ty.getSizeInBits();
1599 assert(!Ty.isVector() && "Expected a scalar!");
1600 assert(Bit < 64 && "Bit is too large!");
1601
1602 // When the test register is a 64-bit register, we have to narrow to make
1603 // TBNZW work.
1604 bool UseWReg = Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1609 MIB);
1610
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1614 auto TestBitMI =
1615 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1616 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1617 return &*TestBitMI;
1618}
1619
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1621 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1622 MachineIRBuilder &MIB) const {
1623 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1624 // Given something like this:
1625 //
1626 // %x = ...Something...
1627 // %one = G_CONSTANT i64 1
1628 // %zero = G_CONSTANT i64 0
1629 // %and = G_AND %x, %one
1630 // %cmp = G_ICMP intpred(ne), %and, %zero
1631 // %cmp_trunc = G_TRUNC %cmp
1632 // G_BRCOND %cmp_trunc, %bb.3
1633 //
1634 // We want to try and fold the AND into the G_BRCOND and produce either a
1635 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1636 //
1637 // In this case, we'd get
1638 //
1639 // TBNZ %x %bb.3
1640 //
1641
1642 // Check if the AND has a constant on its RHS which we can use as a mask.
1643 // If it's a power of 2, then it's the same as checking a specific bit.
1644 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1645 auto MaybeBit = getIConstantVRegValWithLookThrough(
1646 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1647 if (!MaybeBit)
1648 return false;
1649
1650 int32_t Bit = MaybeBit->Value.exactLogBase2();
1651 if (Bit < 0)
1652 return false;
1653
1654 Register TestReg = AndInst.getOperand(1).getReg();
1655
1656 // Emit a TB(N)Z.
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1658 return true;
1659}
1660
1661MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1662 bool IsNegative,
1663 MachineBasicBlock *DestMBB,
1664 MachineIRBuilder &MIB) const {
1665 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1666 MachineRegisterInfo &MRI = *MIB.getMRI();
1667 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty = MRI.getType(CompareReg);
1671 unsigned Width = Ty.getSizeInBits();
1672 assert(!Ty.isVector() && "Expected scalar only?");
1673 assert(Width <= 64 && "Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1678 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1679 return &*BranchMI;
1680}
1681
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1683 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1684 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1685 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1686 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1687 // totally clean. Some of them require two branches to implement.
1688 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1689 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1690 Pred);
1691 AArch64CC::CondCode CC1, CC2;
1692 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1693 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1694 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1695 if (CC2 != AArch64CC::AL)
1696 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1697 I.eraseFromParent();
1698 return true;
1699}
1700
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1702 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1703 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1704 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1705 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1706 //
1707 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1708 // instructions will not be produced, as they are conditional branch
1709 // instructions that do not set flags.
1710 if (!ProduceNonFlagSettingCondBr)
1711 return false;
1712
1713 MachineRegisterInfo &MRI = *MIB.getMRI();
1714 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1715 auto Pred =
1716 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1717 Register LHS = ICmp.getOperand(2).getReg();
1718 Register RHS = ICmp.getOperand(3).getReg();
1719
1720 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1721 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1722 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1723
1724 // When we can emit a TB(N)Z, prefer that.
1725 //
1726 // Handle non-commutative condition codes first.
1727 // Note that we don't want to do this when we have a G_AND because it can
1728 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1729 if (VRegAndVal && !AndInst) {
1730 int64_t C = VRegAndVal->Value.getSExtValue();
1731
1732 // When we have a greater-than comparison, we can just test if the msb is
1733 // zero.
1734 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1735 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1736 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1737 I.eraseFromParent();
1738 return true;
1739 }
1740
1741 // When we have a less than comparison, we can just test if the msb is not
1742 // zero.
1743 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1744 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1745 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1746 I.eraseFromParent();
1747 return true;
1748 }
1749
1750 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1751 // we can test if the msb is zero.
1752 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1753 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1754 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1755 I.eraseFromParent();
1756 return true;
1757 }
1758 }
1759
1760 // Attempt to handle commutative condition codes. Right now, that's only
1761 // eq/ne.
1762 if (ICmpInst::isEquality(Pred)) {
1763 if (!VRegAndVal) {
1764 std::swap(RHS, LHS);
1765 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1766 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1767 }
1768
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1770 // If there's a G_AND feeding into this branch, try to fold it away by
1771 // emitting a TB(N)Z instead.
1772 //
1773 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1774 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1775 // would be redundant.
1776 if (AndInst &&
1777 tryOptAndIntoCompareBranch(
1778 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1779 I.eraseFromParent();
1780 return true;
1781 }
1782
1783 // Otherwise, try to emit a CB(N)Z instead.
1784 auto LHSTy = MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1786 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1787 I.eraseFromParent();
1788 return true;
1789 }
1790 }
1791 }
1792
1793 return false;
1794}
1795
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1797 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1798 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1799 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1801 return true;
1802
1803 // Couldn't optimize. Emit a compare + a Bcc.
1804 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1805 auto PredOp = ICmp.getOperand(1);
1806 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1808 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1809 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1810 I.eraseFromParent();
1811 return true;
1812}
1813
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg = I.getOperand(0).getReg();
1817 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1818 // Try to select the G_BRCOND using whatever is feeding the condition if
1819 // possible.
1820 unsigned CCMIOpc = CCMI->getOpcode();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1825
1826 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1827 // instructions will not be produced, as they are conditional branch
1828 // instructions that do not set flags.
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1833 return true;
1834 }
1835
1836 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1837 auto TstMI =
1838 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1840 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1842 .addMBB(I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1844 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1845}
1846
1847/// Returns the element immediate value of a vector shift operand if found.
1848/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1849static std::optional<int64_t> getVectorShiftImm(Register Reg,
1851 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1852 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1853 return getAArch64VectorSplatScalar(*OpMI, MRI);
1854}
1855
1856/// Matches and returns the shift immediate value for a SHL instruction given
1857/// a shift operand.
1858static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1860 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1861 if (!ShiftImm)
1862 return std::nullopt;
1863 // Check the immediate is in range for a SHL.
1864 int64_t Imm = *ShiftImm;
1865 if (Imm < 0)
1866 return std::nullopt;
1867 switch (SrcTy.getElementType().getSizeInBits()) {
1868 default:
1869 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1870 return std::nullopt;
1871 case 8:
1872 if (Imm > 7)
1873 return std::nullopt;
1874 break;
1875 case 16:
1876 if (Imm > 15)
1877 return std::nullopt;
1878 break;
1879 case 32:
1880 if (Imm > 31)
1881 return std::nullopt;
1882 break;
1883 case 64:
1884 if (Imm > 63)
1885 return std::nullopt;
1886 break;
1887 }
1888 return Imm;
1889}
1890
1891bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1893 assert(I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg = I.getOperand(0).getReg();
1895 const LLT Ty = MRI.getType(DstReg);
1896 Register Src1Reg = I.getOperand(1).getReg();
1897 Register Src2Reg = I.getOperand(2).getReg();
1898
1899 if (!Ty.isVector())
1900 return false;
1901
1902 // Check if we have a vector of constants on RHS that we can select as the
1903 // immediate form.
1904 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1905
1906 unsigned Opc = 0;
1907 if (Ty == LLT::fixed_vector(2, 64)) {
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1911 } else if (Ty == LLT::fixed_vector(2, 32)) {
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1913 } else if (Ty == LLT::fixed_vector(4, 16)) {
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1915 } else if (Ty == LLT::fixed_vector(8, 16)) {
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1917 } else if (Ty == LLT::fixed_vector(16, 8)) {
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1919 } else if (Ty == LLT::fixed_vector(8, 8)) {
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1921 } else {
1922 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1923 return false;
1924 }
1925
1926 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1927 if (ImmVal)
1928 Shl.addImm(*ImmVal);
1929 else
1930 Shl.addUse(Src2Reg);
1932 I.eraseFromParent();
1933 return true;
1934}
1935
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg = I.getOperand(0).getReg();
1941 const LLT Ty = MRI.getType(DstReg);
1942 Register Src1Reg = I.getOperand(1).getReg();
1943 Register Src2Reg = I.getOperand(2).getReg();
1944
1945 if (!Ty.isVector())
1946 return false;
1947
1948 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1949
1950 // We expect the immediate case to be lowered in the PostLegalCombiner to
1951 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1952
1953 // There is not a shift right register instruction, but the shift left
1954 // register instruction takes a signed value, where negative numbers specify a
1955 // right shift.
1956
1957 unsigned Opc = 0;
1958 unsigned NegOpc = 0;
1959 const TargetRegisterClass *RC =
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1961 if (Ty == LLT::fixed_vector(2, 64)) {
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1964 } else if (Ty == LLT::fixed_vector(4, 32)) {
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1967 } else if (Ty == LLT::fixed_vector(2, 32)) {
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1970 } else if (Ty == LLT::fixed_vector(4, 16)) {
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1973 } else if (Ty == LLT::fixed_vector(8, 16)) {
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1976 } else if (Ty == LLT::fixed_vector(16, 8)) {
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1979 } else if (Ty == LLT::fixed_vector(8, 8)) {
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1982 } else {
1983 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1984 return false;
1985 }
1986
1987 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1992 return true;
1993}
1994
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
1997 return false;
1998}
1999
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg = I.getOperand(0).getReg();
2004
2005 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2006
2007 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2010 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2011 ? FuncInfo->getVarArgsGPRIndex()
2012 : FuncInfo->getVarArgsStackIndex();
2013 }
2014
2015 auto MIB =
2016 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2017 .addDef(ArgsAddrReg)
2018 .addFrameIndex(FrameIdx)
2019 .addImm(0)
2020 .addImm(0);
2021
2023
2024 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2025 .addUse(ArgsAddrReg)
2026 .addUse(ListReg)
2027 .addImm(0)
2028 .addMemOperand(*I.memoperands_begin());
2029
2031 I.eraseFromParent();
2032 return true;
2033}
2034
2035void AArch64InstructionSelector::materializeLargeCMVal(
2036 MachineInstr &I, const Value *V, unsigned OpFlags) {
2037 MachineBasicBlock &MBB = *I.getParent();
2038 MachineFunction &MF = *MBB.getParent();
2040
2041 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2042 MovZ->addOperand(MF, I.getOperand(1));
2043 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2045 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2047
2048 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2049 Register ForceDstReg) {
2050 Register DstReg = ForceDstReg
2051 ? ForceDstReg
2052 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2053 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2054 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2057 } else {
2058 MovI->addOperand(
2059 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2060 MovZ->getOperand(1).getOffset(), Flags));
2061 }
2062 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2064 return DstReg;
2065 };
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2068 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2069 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2070}
2071
2072bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2073 MachineBasicBlock &MBB = *I.getParent();
2074 MachineFunction &MF = *MBB.getParent();
2076
2077 switch (I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2080 MachineOperand &SrcOp = I.getOperand(0);
2081 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2082 // Allow matching with imported patterns for stores of pointers. Unlike
2083 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2084 // and constrain.
2085 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2086 Register NewSrc = Copy.getReg(0);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2089 Changed = true;
2090 }
2091 return Changed;
2092 }
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(I, MRI);
2095 case TargetOpcode::G_LOAD: {
2096 // For scalar loads of pointers, we try to convert the dest type from p0
2097 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2098 // conversion, this should be ok because all users should have been
2099 // selected already, so the type doesn't matter for them.
2100 Register DstReg = I.getOperand(0).getReg();
2101 const LLT DstTy = MRI.getType(DstReg);
2102 if (!DstTy.isPointer())
2103 return false;
2104 MRI.setType(DstReg, LLT::scalar(64));
2105 return true;
2106 }
2107 case AArch64::G_DUP: {
2108 // Convert the type from p0 to s64 to help selection.
2109 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2110 if (!DstTy.isPointerVector())
2111 return false;
2112 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2113 MRI.setType(I.getOperand(0).getReg(),
2114 DstTy.changeElementType(LLT::scalar(64)));
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2117 return true;
2118 }
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2121 // If both source and destination regbanks are FPR, then convert the opcode
2122 // to G_SITOF so that the importer can select it to an fpr variant.
2123 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2124 // copy.
2125 Register SrcReg = I.getOperand(1).getReg();
2126 LLT SrcTy = MRI.getType(SrcReg);
2127 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2128 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2129 return false;
2130
2131 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(TII.get(AArch64::G_SITOF));
2134 else
2135 I.setDesc(TII.get(AArch64::G_UITOF));
2136 return true;
2137 }
2138 return false;
2139 }
2140 default:
2141 return false;
2142 }
2143}
2144
2145/// This lowering tries to look for G_PTR_ADD instructions and then converts
2146/// them to a standard G_ADD with a COPY on the source.
2147///
2148/// The motivation behind this is to expose the add semantics to the imported
2149/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2150/// because the selector works bottom up, uses before defs. By the time we
2151/// end up trying to select a G_PTR_ADD, we should have already attempted to
2152/// fold this into addressing modes and were therefore unsuccessful.
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2156 Register DstReg = I.getOperand(0).getReg();
2157 Register AddOp1Reg = I.getOperand(1).getReg();
2158 const LLT PtrTy = MRI.getType(DstReg);
2159 if (PtrTy.getAddressSpace() != 0)
2160 return false;
2161
2162 const LLT CastPtrTy =
2163 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2164 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2165 // Set regbanks on the registers.
2166 if (PtrTy.isVector())
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2168 else
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2170
2171 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2172 // %dst(intty) = G_ADD %intbase, off
2173 I.setDesc(TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2178 return false;
2179 }
2180
2181 // Also take the opportunity here to try to do some optimization.
2182 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2183 Register NegatedReg;
2184 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2185 return true;
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(TII.get(TargetOpcode::G_SUB));
2188 return true;
2189}
2190
2191bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2193 // We try to match the immediate variant of LSL, which is actually an alias
2194 // for a special case of UBFM. Otherwise, we fall back to the imported
2195 // selector which will match the register variant.
2196 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2197 const auto &MO = I.getOperand(2);
2198 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2199 if (!VRegAndVal)
2200 return false;
2201
2202 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2203 if (DstTy.isVector())
2204 return false;
2205 bool Is64Bit = DstTy.getSizeInBits() == 64;
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2208
2209 if (!Imm1Fn || !Imm2Fn)
2210 return false;
2211
2212 auto NewI =
2213 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2214 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2215
2216 for (auto &RenderFn : *Imm1Fn)
2217 RenderFn(NewI);
2218 for (auto &RenderFn : *Imm2Fn)
2219 RenderFn(NewI);
2220
2221 I.eraseFromParent();
2222 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2223}
2224
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2228 // If we're storing a scalar, it doesn't matter what register bank that
2229 // scalar is on. All that matters is the size.
2230 //
2231 // So, if we see something like this (with a 32-bit scalar as an example):
2232 //
2233 // %x:gpr(s32) = ... something ...
2234 // %y:fpr(s32) = COPY %x:gpr(s32)
2235 // G_STORE %y:fpr(s32)
2236 //
2237 // We can fix this up into something like this:
2238 //
2239 // G_STORE %x:gpr(s32)
2240 //
2241 // And then continue the selection process normally.
2242 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2243 if (!DefDstReg.isValid())
2244 return false;
2245 LLT DefDstTy = MRI.getType(DefDstReg);
2246 Register StoreSrcReg = I.getOperand(0).getReg();
2247 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2248
2249 // If we get something strange like a physical register, then we shouldn't
2250 // go any further.
2251 if (!DefDstTy.isValid())
2252 return false;
2253
2254 // Are the source and dst types the same size?
2255 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2256 return false;
2257
2258 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2259 RBI.getRegBank(DefDstReg, MRI, TRI))
2260 return false;
2261
2262 // We have a cross-bank copy, which is entering a store. Let's fold it.
2263 I.getOperand(0).setReg(DefDstReg);
2264 return true;
2265}
2266
2267bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2268 assert(I.getParent() && "Instruction should be in a basic block!");
2269 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2270
2271 MachineBasicBlock &MBB = *I.getParent();
2272 MachineFunction &MF = *MBB.getParent();
2274
2275 switch (I.getOpcode()) {
2276 case AArch64::G_DUP: {
2277 // Before selecting a DUP instruction, check if it is better selected as a
2278 // MOV or load from a constant pool.
2279 Register Src = I.getOperand(1).getReg();
2280 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2281 if (!ValAndVReg)
2282 return false;
2283 LLVMContext &Ctx = MF.getFunction().getContext();
2284 Register Dst = I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2287 ConstantInt::get(
2288 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2289 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2290 if (!emitConstantVector(Dst, CV, MIB, MRI))
2291 return false;
2292 I.eraseFromParent();
2293 return true;
2294 }
2295 case TargetOpcode::G_SEXT:
2296 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2297 // over a normal extend.
2298 if (selectUSMovFromExtend(I, MRI))
2299 return true;
2300 return false;
2301 case TargetOpcode::G_BR:
2302 return false;
2303 case TargetOpcode::G_SHL:
2304 return earlySelectSHL(I, MRI);
2305 case TargetOpcode::G_CONSTANT: {
2306 bool IsZero = false;
2307 if (I.getOperand(1).isCImm())
2308 IsZero = I.getOperand(1).getCImm()->isZero();
2309 else if (I.getOperand(1).isImm())
2310 IsZero = I.getOperand(1).getImm() == 0;
2311
2312 if (!IsZero)
2313 return false;
2314
2315 Register DefReg = I.getOperand(0).getReg();
2316 LLT Ty = MRI.getType(DefReg);
2317 if (Ty.getSizeInBits() == 64) {
2318 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2319 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2320 } else if (Ty.getSizeInBits() == 32) {
2321 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2322 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2323 } else
2324 return false;
2325
2326 I.setDesc(TII.get(TargetOpcode::COPY));
2327 return true;
2328 }
2329
2330 case TargetOpcode::G_ADD: {
2331 // Check if this is being fed by a G_ICMP on either side.
2332 //
2333 // (cmp pred, x, y) + z
2334 //
2335 // In the above case, when the cmp is true, we increment z by 1. So, we can
2336 // fold the add into the cset for the cmp by using cinc.
2337 //
2338 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2339 Register AddDst = I.getOperand(0).getReg();
2340 Register AddLHS = I.getOperand(1).getReg();
2341 Register AddRHS = I.getOperand(2).getReg();
2342 // Only handle scalars.
2343 LLT Ty = MRI.getType(AddLHS);
2344 if (Ty.isVector())
2345 return false;
2346 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2347 // bits.
2348 unsigned Size = Ty.getSizeInBits();
2349 if (Size != 32 && Size != 64)
2350 return false;
2351 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2352 if (!MRI.hasOneNonDBGUse(Reg))
2353 return nullptr;
2354 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2355 // compare.
2356 if (Size == 32)
2357 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2358 // We model scalar compares using 32-bit destinations right now.
2359 // If it's a 64-bit compare, it'll have 64-bit sources.
2360 Register ZExt;
2361 if (!mi_match(Reg, MRI,
2363 return nullptr;
2364 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2365 if (!Cmp ||
2366 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2367 return nullptr;
2368 return Cmp;
2369 };
2370 // Try to match
2371 // z + (cmp pred, x, y)
2372 MachineInstr *Cmp = MatchCmp(AddRHS);
2373 if (!Cmp) {
2374 // (cmp pred, x, y) + z
2375 std::swap(AddLHS, AddRHS);
2376 Cmp = MatchCmp(AddRHS);
2377 if (!Cmp)
2378 return false;
2379 }
2380 auto &PredOp = Cmp->getOperand(1);
2381 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2382 const AArch64CC::CondCode InvCC =
2385 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2386 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2387 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2388 I.eraseFromParent();
2389 return true;
2390 }
2391 case TargetOpcode::G_OR: {
2392 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2393 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2394 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2395 Register Dst = I.getOperand(0).getReg();
2396 LLT Ty = MRI.getType(Dst);
2397
2398 if (!Ty.isScalar())
2399 return false;
2400
2401 unsigned Size = Ty.getSizeInBits();
2402 if (Size != 32 && Size != 64)
2403 return false;
2404
2405 Register ShiftSrc;
2406 int64_t ShiftImm;
2407 Register MaskSrc;
2408 int64_t MaskImm;
2409 if (!mi_match(
2410 Dst, MRI,
2411 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2412 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2413 return false;
2414
2415 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2416 return false;
2417
2418 int64_t Immr = Size - ShiftImm;
2419 int64_t Imms = Size - ShiftImm - 1;
2420 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2421 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2422 I.eraseFromParent();
2423 return true;
2424 }
2425 case TargetOpcode::G_FENCE: {
2426 if (I.getOperand(1).getImm() == 0)
2427 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2428 else
2429 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2430 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2431 I.eraseFromParent();
2432 return true;
2433 }
2434 default:
2435 return false;
2436 }
2437}
2438
2439bool AArch64InstructionSelector::select(MachineInstr &I) {
2440 assert(I.getParent() && "Instruction should be in a basic block!");
2441 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2442
2443 MachineBasicBlock &MBB = *I.getParent();
2444 MachineFunction &MF = *MBB.getParent();
2446
2447 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2448 if (Subtarget->requiresStrictAlign()) {
2449 // We don't support this feature yet.
2450 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2451 return false;
2452 }
2453
2455
2456 unsigned Opcode = I.getOpcode();
2457 // G_PHI requires same handling as PHI
2458 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2459 // Certain non-generic instructions also need some special handling.
2460
2461 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2463
2464 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2465 const Register DefReg = I.getOperand(0).getReg();
2466 const LLT DefTy = MRI.getType(DefReg);
2467
2468 const RegClassOrRegBank &RegClassOrBank =
2469 MRI.getRegClassOrRegBank(DefReg);
2470
2471 const TargetRegisterClass *DefRC
2472 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2473 if (!DefRC) {
2474 if (!DefTy.isValid()) {
2475 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2476 return false;
2477 }
2478 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2479 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2480 if (!DefRC) {
2481 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2482 return false;
2483 }
2484 }
2485
2486 I.setDesc(TII.get(TargetOpcode::PHI));
2487
2488 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2489 }
2490
2491 if (I.isCopy())
2492 return selectCopy(I, TII, MRI, TRI, RBI);
2493
2494 if (I.isDebugInstr())
2495 return selectDebugInstr(I, MRI, RBI);
2496
2497 return true;
2498 }
2499
2500
2501 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2502 LLVM_DEBUG(
2503 dbgs() << "Generic instruction has unexpected implicit operands\n");
2504 return false;
2505 }
2506
2507 // Try to do some lowering before we start instruction selecting. These
2508 // lowerings are purely transformations on the input G_MIR and so selection
2509 // must continue after any modification of the instruction.
2510 if (preISelLower(I)) {
2511 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2512 }
2513
2514 // There may be patterns where the importer can't deal with them optimally,
2515 // but does select it to a suboptimal sequence so our custom C++ selection
2516 // code later never has a chance to work on it. Therefore, we have an early
2517 // selection attempt here to give priority to certain selection routines
2518 // over the imported ones.
2519 if (earlySelect(I))
2520 return true;
2521
2522 if (selectImpl(I, *CoverageInfo))
2523 return true;
2524
2525 LLT Ty =
2526 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2527
2528 switch (Opcode) {
2529 case TargetOpcode::G_SBFX:
2530 case TargetOpcode::G_UBFX: {
2531 static const unsigned OpcTable[2][2] = {
2532 {AArch64::UBFMWri, AArch64::UBFMXri},
2533 {AArch64::SBFMWri, AArch64::SBFMXri}};
2534 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2535 unsigned Size = Ty.getSizeInBits();
2536 unsigned Opc = OpcTable[IsSigned][Size == 64];
2537 auto Cst1 =
2538 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2539 assert(Cst1 && "Should have gotten a constant for src 1?");
2540 auto Cst2 =
2541 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2542 assert(Cst2 && "Should have gotten a constant for src 2?");
2543 auto LSB = Cst1->Value.getZExtValue();
2544 auto Width = Cst2->Value.getZExtValue();
2545 auto BitfieldInst =
2546 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2547 .addImm(LSB)
2548 .addImm(LSB + Width - 1);
2549 I.eraseFromParent();
2550 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2551 }
2552 case TargetOpcode::G_BRCOND:
2553 return selectCompareBranch(I, MF, MRI);
2554
2555 case TargetOpcode::G_BRINDIRECT: {
2556 const Function &Fn = MF.getFunction();
2557 if (std::optional<uint16_t> BADisc =
2558 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2559 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2560 MI.addImm(AArch64PACKey::IA);
2561 MI.addImm(*BADisc);
2562 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2563 I.eraseFromParent();
2565 }
2566 I.setDesc(TII.get(AArch64::BR));
2568 }
2569
2570 case TargetOpcode::G_BRJT:
2571 return selectBrJT(I, MRI);
2572
2573 case AArch64::G_ADD_LOW: {
2574 // This op may have been separated from it's ADRP companion by the localizer
2575 // or some other code motion pass. Given that many CPUs will try to
2576 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2577 // which will later be expanded into an ADRP+ADD pair after scheduling.
2578 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2579 if (BaseMI->getOpcode() != AArch64::ADRP) {
2580 I.setDesc(TII.get(AArch64::ADDXri));
2581 I.addOperand(MachineOperand::CreateImm(0));
2583 }
2584 assert(TM.getCodeModel() == CodeModel::Small &&
2585 "Expected small code model");
2586 auto Op1 = BaseMI->getOperand(1);
2587 auto Op2 = I.getOperand(2);
2588 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2589 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2590 Op1.getTargetFlags())
2591 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2592 Op2.getTargetFlags());
2593 I.eraseFromParent();
2594 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2595 }
2596
2597 case TargetOpcode::G_FCONSTANT:
2598 case TargetOpcode::G_CONSTANT: {
2599 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2600
2601 const LLT s8 = LLT::scalar(8);
2602 const LLT s16 = LLT::scalar(16);
2603 const LLT s32 = LLT::scalar(32);
2604 const LLT s64 = LLT::scalar(64);
2605 const LLT s128 = LLT::scalar(128);
2606 const LLT p0 = LLT::pointer(0, 64);
2607
2608 const Register DefReg = I.getOperand(0).getReg();
2609 const LLT DefTy = MRI.getType(DefReg);
2610 const unsigned DefSize = DefTy.getSizeInBits();
2611 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2612
2613 // FIXME: Redundant check, but even less readable when factored out.
2614 if (isFP) {
2615 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2616 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2617 << " constant, expected: " << s16 << " or " << s32
2618 << " or " << s64 << " or " << s128 << '\n');
2619 return false;
2620 }
2621
2622 if (RB.getID() != AArch64::FPRRegBankID) {
2623 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2624 << " constant on bank: " << RB
2625 << ", expected: FPR\n");
2626 return false;
2627 }
2628
2629 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2630 // can be sure tablegen works correctly and isn't rescued by this code.
2631 // 0.0 is not covered by tablegen for FP128. So we will handle this
2632 // scenario in the code here.
2633 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2634 return false;
2635 } else {
2636 // s32 and s64 are covered by tablegen.
2637 if (Ty != p0 && Ty != s8 && Ty != s16) {
2638 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2639 << " constant, expected: " << s32 << ", " << s64
2640 << ", or " << p0 << '\n');
2641 return false;
2642 }
2643
2644 if (RB.getID() != AArch64::GPRRegBankID) {
2645 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2646 << " constant on bank: " << RB
2647 << ", expected: GPR\n");
2648 return false;
2649 }
2650 }
2651
2652 if (isFP) {
2653 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2654 // For 16, 64, and 128b values, emit a constant pool load.
2655 switch (DefSize) {
2656 default:
2657 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2658 case 32:
2659 case 64: {
2660 bool OptForSize = shouldOptForSize(&MF);
2661 const auto &TLI = MF.getSubtarget().getTargetLowering();
2662 // If TLI says that this fpimm is illegal, then we'll expand to a
2663 // constant pool load.
2664 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2665 EVT::getFloatingPointVT(DefSize), OptForSize))
2666 break;
2667 [[fallthrough]];
2668 }
2669 case 16:
2670 case 128: {
2671 auto *FPImm = I.getOperand(1).getFPImm();
2672 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2673 if (!LoadMI) {
2674 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2675 return false;
2676 }
2677 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2678 I.eraseFromParent();
2679 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2680 }
2681 }
2682
2683 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2684 // Either emit a FMOV, or emit a copy to emit a normal mov.
2685 const Register DefGPRReg = MRI.createVirtualRegister(
2686 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2687 MachineOperand &RegOp = I.getOperand(0);
2688 RegOp.setReg(DefGPRReg);
2689 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2690 MIB.buildCopy({DefReg}, {DefGPRReg});
2691
2692 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2693 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2694 return false;
2695 }
2696
2697 MachineOperand &ImmOp = I.getOperand(1);
2698 // FIXME: Is going through int64_t always correct?
2699 ImmOp.ChangeToImmediate(
2701 } else if (I.getOperand(1).isCImm()) {
2702 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2703 I.getOperand(1).ChangeToImmediate(Val);
2704 } else if (I.getOperand(1).isImm()) {
2705 uint64_t Val = I.getOperand(1).getImm();
2706 I.getOperand(1).ChangeToImmediate(Val);
2707 }
2708
2709 const unsigned MovOpc =
2710 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2711 I.setDesc(TII.get(MovOpc));
2713 return true;
2714 }
2715 case TargetOpcode::G_EXTRACT: {
2716 Register DstReg = I.getOperand(0).getReg();
2717 Register SrcReg = I.getOperand(1).getReg();
2718 LLT SrcTy = MRI.getType(SrcReg);
2719 LLT DstTy = MRI.getType(DstReg);
2720 (void)DstTy;
2721 unsigned SrcSize = SrcTy.getSizeInBits();
2722
2723 if (SrcTy.getSizeInBits() > 64) {
2724 // This should be an extract of an s128, which is like a vector extract.
2725 if (SrcTy.getSizeInBits() != 128)
2726 return false;
2727 // Only support extracting 64 bits from an s128 at the moment.
2728 if (DstTy.getSizeInBits() != 64)
2729 return false;
2730
2731 unsigned Offset = I.getOperand(2).getImm();
2732 if (Offset % 64 != 0)
2733 return false;
2734
2735 // Check we have the right regbank always.
2736 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2737 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2738 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2739
2740 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2741 auto NewI =
2742 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2743 .addUse(SrcReg, 0,
2744 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2745 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2746 AArch64::GPR64RegClass, NewI->getOperand(0));
2747 I.eraseFromParent();
2748 return true;
2749 }
2750
2751 // Emit the same code as a vector extract.
2752 // Offset must be a multiple of 64.
2753 unsigned LaneIdx = Offset / 64;
2754 MachineInstr *Extract = emitExtractVectorElt(
2755 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2756 if (!Extract)
2757 return false;
2758 I.eraseFromParent();
2759 return true;
2760 }
2761
2762 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2763 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2764 Ty.getSizeInBits() - 1);
2765
2766 if (SrcSize < 64) {
2767 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2768 "unexpected G_EXTRACT types");
2770 }
2771
2772 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2773 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2774 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2775 .addReg(DstReg, 0, AArch64::sub_32);
2776 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2777 AArch64::GPR32RegClass, MRI);
2778 I.getOperand(0).setReg(DstReg);
2779
2781 }
2782
2783 case TargetOpcode::G_INSERT: {
2784 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2785 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2786 unsigned DstSize = DstTy.getSizeInBits();
2787 // Larger inserts are vectors, same-size ones should be something else by
2788 // now (split up or turned into COPYs).
2789 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2790 return false;
2791
2792 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2793 unsigned LSB = I.getOperand(3).getImm();
2794 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2795 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2796 MachineInstrBuilder(MF, I).addImm(Width - 1);
2797
2798 if (DstSize < 64) {
2799 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2800 "unexpected G_INSERT types");
2802 }
2803
2804 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2805 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2806 TII.get(AArch64::SUBREG_TO_REG))
2807 .addDef(SrcReg)
2808 .addImm(0)
2809 .addUse(I.getOperand(2).getReg())
2810 .addImm(AArch64::sub_32);
2811 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2812 AArch64::GPR32RegClass, MRI);
2813 I.getOperand(2).setReg(SrcReg);
2814
2816 }
2817 case TargetOpcode::G_FRAME_INDEX: {
2818 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2819 if (Ty != LLT::pointer(0, 64)) {
2820 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2821 << ", expected: " << LLT::pointer(0, 64) << '\n');
2822 return false;
2823 }
2824 I.setDesc(TII.get(AArch64::ADDXri));
2825
2826 // MOs for a #0 shifted immediate.
2827 I.addOperand(MachineOperand::CreateImm(0));
2828 I.addOperand(MachineOperand::CreateImm(0));
2829
2831 }
2832
2833 case TargetOpcode::G_GLOBAL_VALUE: {
2834 const GlobalValue *GV = nullptr;
2835 unsigned OpFlags;
2836 if (I.getOperand(1).isSymbol()) {
2837 OpFlags = I.getOperand(1).getTargetFlags();
2838 // Currently only used by "RtLibUseGOT".
2839 assert(OpFlags == AArch64II::MO_GOT);
2840 } else {
2841 GV = I.getOperand(1).getGlobal();
2842 if (GV->isThreadLocal())
2843 return selectTLSGlobalValue(I, MRI);
2844 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2845 }
2846
2847 if (OpFlags & AArch64II::MO_GOT) {
2848 I.setDesc(TII.get(AArch64::LOADgot));
2849 I.getOperand(1).setTargetFlags(OpFlags);
2850 } else if (TM.getCodeModel() == CodeModel::Large &&
2851 !TM.isPositionIndependent()) {
2852 // Materialize the global using movz/movk instructions.
2853 materializeLargeCMVal(I, GV, OpFlags);
2854 I.eraseFromParent();
2855 return true;
2856 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2857 I.setDesc(TII.get(AArch64::ADR));
2858 I.getOperand(1).setTargetFlags(OpFlags);
2859 } else {
2860 I.setDesc(TII.get(AArch64::MOVaddr));
2861 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2862 MachineInstrBuilder MIB(MF, I);
2863 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2865 }
2867 }
2868
2869 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2870 return selectPtrAuthGlobalValue(I, MRI);
2871
2872 case TargetOpcode::G_ZEXTLOAD:
2873 case TargetOpcode::G_LOAD:
2874 case TargetOpcode::G_STORE: {
2875 GLoadStore &LdSt = cast<GLoadStore>(I);
2876 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2877 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2878
2879 if (PtrTy != LLT::pointer(0, 64)) {
2880 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2881 << ", expected: " << LLT::pointer(0, 64) << '\n');
2882 return false;
2883 }
2884
2885 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2886 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2887 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2888
2889 // Need special instructions for atomics that affect ordering.
2890 if (Order != AtomicOrdering::NotAtomic &&
2891 Order != AtomicOrdering::Unordered &&
2892 Order != AtomicOrdering::Monotonic) {
2893 assert(!isa<GZExtLoad>(LdSt));
2894 assert(MemSizeInBytes <= 8 &&
2895 "128-bit atomics should already be custom-legalized");
2896
2897 if (isa<GLoad>(LdSt)) {
2898 static constexpr unsigned LDAPROpcodes[] = {
2899 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2900 static constexpr unsigned LDAROpcodes[] = {
2901 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2902 ArrayRef<unsigned> Opcodes =
2903 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2904 ? LDAPROpcodes
2905 : LDAROpcodes;
2906 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2907 } else {
2908 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2909 AArch64::STLRW, AArch64::STLRX};
2910 Register ValReg = LdSt.getReg(0);
2911 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2912 // Emit a subreg copy of 32 bits.
2913 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2914 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2915 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2916 I.getOperand(0).setReg(NewVal);
2917 }
2918 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2919 }
2921 return true;
2922 }
2923
2924#ifndef NDEBUG
2925 const Register PtrReg = LdSt.getPointerReg();
2926 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2927 // Check that the pointer register is valid.
2928 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2929 "Load/Store pointer operand isn't a GPR");
2930 assert(MRI.getType(PtrReg).isPointer() &&
2931 "Load/Store pointer operand isn't a pointer");
2932#endif
2933
2934 const Register ValReg = LdSt.getReg(0);
2935 const LLT ValTy = MRI.getType(ValReg);
2936 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2937
2938 // The code below doesn't support truncating stores, so we need to split it
2939 // again.
2940 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2941 unsigned SubReg;
2942 LLT MemTy = LdSt.getMMO().getMemoryType();
2943 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2944 if (!getSubRegForClass(RC, TRI, SubReg))
2945 return false;
2946
2947 // Generate a subreg copy.
2948 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2949 .addReg(ValReg, 0, SubReg)
2950 .getReg(0);
2951 RBI.constrainGenericRegister(Copy, *RC, MRI);
2952 LdSt.getOperand(0).setReg(Copy);
2953 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2954 // If this is an any-extending load from the FPR bank, split it into a regular
2955 // load + extend.
2956 if (RB.getID() == AArch64::FPRRegBankID) {
2957 unsigned SubReg;
2958 LLT MemTy = LdSt.getMMO().getMemoryType();
2959 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2960 if (!getSubRegForClass(RC, TRI, SubReg))
2961 return false;
2962 Register OldDst = LdSt.getReg(0);
2963 Register NewDst =
2964 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2965 LdSt.getOperand(0).setReg(NewDst);
2966 MRI.setRegBank(NewDst, RB);
2967 // Generate a SUBREG_TO_REG to extend it.
2968 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2969 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2970 .addImm(0)
2971 .addUse(NewDst)
2972 .addImm(SubReg);
2973 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2974 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2975 MIB.setInstr(LdSt);
2976 }
2977 }
2978
2979 // Helper lambda for partially selecting I. Either returns the original
2980 // instruction with an updated opcode, or a new instruction.
2981 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2982 bool IsStore = isa<GStore>(I);
2983 const unsigned NewOpc =
2984 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2985 if (NewOpc == I.getOpcode())
2986 return nullptr;
2987 // Check if we can fold anything into the addressing mode.
2988 auto AddrModeFns =
2989 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2990 if (!AddrModeFns) {
2991 // Can't fold anything. Use the original instruction.
2992 I.setDesc(TII.get(NewOpc));
2993 I.addOperand(MachineOperand::CreateImm(0));
2994 return &I;
2995 }
2996
2997 // Folded something. Create a new instruction and return it.
2998 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2999 Register CurValReg = I.getOperand(0).getReg();
3000 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3001 NewInst.cloneMemRefs(I);
3002 for (auto &Fn : *AddrModeFns)
3003 Fn(NewInst);
3004 I.eraseFromParent();
3005 return &*NewInst;
3006 };
3007
3008 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3009 if (!LoadStore)
3010 return false;
3011
3012 // If we're storing a 0, use WZR/XZR.
3013 if (Opcode == TargetOpcode::G_STORE) {
3015 LoadStore->getOperand(0).getReg(), MRI);
3016 if (CVal && CVal->Value == 0) {
3017 switch (LoadStore->getOpcode()) {
3018 case AArch64::STRWui:
3019 case AArch64::STRHHui:
3020 case AArch64::STRBBui:
3021 LoadStore->getOperand(0).setReg(AArch64::WZR);
3022 break;
3023 case AArch64::STRXui:
3024 LoadStore->getOperand(0).setReg(AArch64::XZR);
3025 break;
3026 }
3027 }
3028 }
3029
3030 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3031 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3032 // The any/zextload from a smaller type to i32 should be handled by the
3033 // importer.
3034 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3035 return false;
3036 // If we have an extending load then change the load's type to be a
3037 // narrower reg and zero_extend with SUBREG_TO_REG.
3038 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3039 Register DstReg = LoadStore->getOperand(0).getReg();
3040 LoadStore->getOperand(0).setReg(LdReg);
3041
3042 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3043 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3044 .addImm(0)
3045 .addUse(LdReg)
3046 .addImm(AArch64::sub_32);
3047 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3048 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3049 MRI);
3050 }
3051 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3052 }
3053
3054 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3055 case TargetOpcode::G_INDEXED_SEXTLOAD:
3056 return selectIndexedExtLoad(I, MRI);
3057 case TargetOpcode::G_INDEXED_LOAD:
3058 return selectIndexedLoad(I, MRI);
3059 case TargetOpcode::G_INDEXED_STORE:
3060 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3061
3062 case TargetOpcode::G_LSHR:
3063 case TargetOpcode::G_ASHR:
3064 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3065 return selectVectorAshrLshr(I, MRI);
3066 [[fallthrough]];
3067 case TargetOpcode::G_SHL:
3068 if (Opcode == TargetOpcode::G_SHL &&
3069 MRI.getType(I.getOperand(0).getReg()).isVector())
3070 return selectVectorSHL(I, MRI);
3071
3072 // These shifts were legalized to have 64 bit shift amounts because we
3073 // want to take advantage of the selection patterns that assume the
3074 // immediates are s64s, however, selectBinaryOp will assume both operands
3075 // will have the same bit size.
3076 {
3077 Register SrcReg = I.getOperand(1).getReg();
3078 Register ShiftReg = I.getOperand(2).getReg();
3079 const LLT ShiftTy = MRI.getType(ShiftReg);
3080 const LLT SrcTy = MRI.getType(SrcReg);
3081 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3082 ShiftTy.getSizeInBits() == 64) {
3083 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3084 // Insert a subregister copy to implement a 64->32 trunc
3085 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3086 .addReg(ShiftReg, 0, AArch64::sub_32);
3087 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3088 I.getOperand(2).setReg(Trunc.getReg(0));
3089 }
3090 }
3091 [[fallthrough]];
3092 case TargetOpcode::G_OR: {
3093 // Reject the various things we don't support yet.
3094 if (unsupportedBinOp(I, RBI, MRI, TRI))
3095 return false;
3096
3097 const unsigned OpSize = Ty.getSizeInBits();
3098
3099 const Register DefReg = I.getOperand(0).getReg();
3100 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3101
3102 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3103 if (NewOpc == I.getOpcode())
3104 return false;
3105
3106 I.setDesc(TII.get(NewOpc));
3107 // FIXME: Should the type be always reset in setDesc?
3108
3109 // Now that we selected an opcode, we need to constrain the register
3110 // operands to use appropriate classes.
3112 }
3113
3114 case TargetOpcode::G_PTR_ADD: {
3115 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3116 I.eraseFromParent();
3117 return true;
3118 }
3119
3120 case TargetOpcode::G_SADDE:
3121 case TargetOpcode::G_UADDE:
3122 case TargetOpcode::G_SSUBE:
3123 case TargetOpcode::G_USUBE:
3124 case TargetOpcode::G_SADDO:
3125 case TargetOpcode::G_UADDO:
3126 case TargetOpcode::G_SSUBO:
3127 case TargetOpcode::G_USUBO:
3128 return selectOverflowOp(I, MRI);
3129
3130 case TargetOpcode::G_PTRMASK: {
3131 Register MaskReg = I.getOperand(2).getReg();
3132 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3133 // TODO: Implement arbitrary cases
3134 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3135 return false;
3136
3137 uint64_t Mask = *MaskVal;
3138 I.setDesc(TII.get(AArch64::ANDXri));
3139 I.getOperand(2).ChangeToImmediate(
3141
3143 }
3144 case TargetOpcode::G_PTRTOINT:
3145 case TargetOpcode::G_TRUNC: {
3146 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3147 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3148
3149 const Register DstReg = I.getOperand(0).getReg();
3150 const Register SrcReg = I.getOperand(1).getReg();
3151
3152 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3153 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3154
3155 if (DstRB.getID() != SrcRB.getID()) {
3156 LLVM_DEBUG(
3157 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3158 return false;
3159 }
3160
3161 if (DstRB.getID() == AArch64::GPRRegBankID) {
3162 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3163 if (!DstRC)
3164 return false;
3165
3166 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3167 if (!SrcRC)
3168 return false;
3169
3170 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3171 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3172 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3173 return false;
3174 }
3175
3176 if (DstRC == SrcRC) {
3177 // Nothing to be done
3178 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3179 SrcTy == LLT::scalar(64)) {
3180 llvm_unreachable("TableGen can import this case");
3181 return false;
3182 } else if (DstRC == &AArch64::GPR32RegClass &&
3183 SrcRC == &AArch64::GPR64RegClass) {
3184 I.getOperand(1).setSubReg(AArch64::sub_32);
3185 } else {
3186 LLVM_DEBUG(
3187 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3188 return false;
3189 }
3190
3191 I.setDesc(TII.get(TargetOpcode::COPY));
3192 return true;
3193 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3194 if (DstTy == LLT::fixed_vector(4, 16) &&
3195 SrcTy == LLT::fixed_vector(4, 32)) {
3196 I.setDesc(TII.get(AArch64::XTNv4i16));
3198 return true;
3199 }
3200
3201 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3202 MachineInstr *Extract = emitExtractVectorElt(
3203 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3204 if (!Extract)
3205 return false;
3206 I.eraseFromParent();
3207 return true;
3208 }
3209
3210 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3211 if (Opcode == TargetOpcode::G_PTRTOINT) {
3212 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3213 I.setDesc(TII.get(TargetOpcode::COPY));
3214 return selectCopy(I, TII, MRI, TRI, RBI);
3215 }
3216 }
3217
3218 return false;
3219 }
3220
3221 case TargetOpcode::G_ANYEXT: {
3222 if (selectUSMovFromExtend(I, MRI))
3223 return true;
3224
3225 const Register DstReg = I.getOperand(0).getReg();
3226 const Register SrcReg = I.getOperand(1).getReg();
3227
3228 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3229 if (RBDst.getID() != AArch64::GPRRegBankID) {
3230 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3231 << ", expected: GPR\n");
3232 return false;
3233 }
3234
3235 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3236 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3237 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3238 << ", expected: GPR\n");
3239 return false;
3240 }
3241
3242 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3243
3244 if (DstSize == 0) {
3245 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3246 return false;
3247 }
3248
3249 if (DstSize != 64 && DstSize > 32) {
3250 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3251 << ", expected: 32 or 64\n");
3252 return false;
3253 }
3254 // At this point G_ANYEXT is just like a plain COPY, but we need
3255 // to explicitly form the 64-bit value if any.
3256 if (DstSize > 32) {
3257 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3258 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3259 .addDef(ExtSrc)
3260 .addImm(0)
3261 .addUse(SrcReg)
3262 .addImm(AArch64::sub_32);
3263 I.getOperand(1).setReg(ExtSrc);
3264 }
3265 return selectCopy(I, TII, MRI, TRI, RBI);
3266 }
3267
3268 case TargetOpcode::G_ZEXT:
3269 case TargetOpcode::G_SEXT_INREG:
3270 case TargetOpcode::G_SEXT: {
3271 if (selectUSMovFromExtend(I, MRI))
3272 return true;
3273
3274 unsigned Opcode = I.getOpcode();
3275 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3276 const Register DefReg = I.getOperand(0).getReg();
3277 Register SrcReg = I.getOperand(1).getReg();
3278 const LLT DstTy = MRI.getType(DefReg);
3279 const LLT SrcTy = MRI.getType(SrcReg);
3280 unsigned DstSize = DstTy.getSizeInBits();
3281 unsigned SrcSize = SrcTy.getSizeInBits();
3282
3283 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3284 // extended is encoded in the imm.
3285 if (Opcode == TargetOpcode::G_SEXT_INREG)
3286 SrcSize = I.getOperand(2).getImm();
3287
3288 if (DstTy.isVector())
3289 return false; // Should be handled by imported patterns.
3290
3291 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3292 AArch64::GPRRegBankID &&
3293 "Unexpected ext regbank");
3294
3295 MachineInstr *ExtI;
3296
3297 // First check if we're extending the result of a load which has a dest type
3298 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3299 // GPR register on AArch64 and all loads which are smaller automatically
3300 // zero-extend the upper bits. E.g.
3301 // %v(s8) = G_LOAD %p, :: (load 1)
3302 // %v2(s32) = G_ZEXT %v(s8)
3303 if (!IsSigned) {
3304 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3305 bool IsGPR =
3306 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3307 if (LoadMI && IsGPR) {
3308 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3309 unsigned BytesLoaded = MemOp->getSize().getValue();
3310 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3311 return selectCopy(I, TII, MRI, TRI, RBI);
3312 }
3313
3314 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3315 // + SUBREG_TO_REG.
3316 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3317 Register SubregToRegSrc =
3318 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3319 const Register ZReg = AArch64::WZR;
3320 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3321 .addImm(0);
3322
3323 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3324 .addImm(0)
3325 .addUse(SubregToRegSrc)
3326 .addImm(AArch64::sub_32);
3327
3328 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3329 MRI)) {
3330 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3331 return false;
3332 }
3333
3334 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3335 MRI)) {
3336 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3337 return false;
3338 }
3339
3340 I.eraseFromParent();
3341 return true;
3342 }
3343 }
3344
3345 if (DstSize == 64) {
3346 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3347 // FIXME: Can we avoid manually doing this?
3348 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3349 MRI)) {
3350 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3351 << " operand\n");
3352 return false;
3353 }
3354 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3355 {&AArch64::GPR64RegClass}, {})
3356 .addImm(0)
3357 .addUse(SrcReg)
3358 .addImm(AArch64::sub_32)
3359 .getReg(0);
3360 }
3361
3362 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3363 {DefReg}, {SrcReg})
3364 .addImm(0)
3365 .addImm(SrcSize - 1);
3366 } else if (DstSize <= 32) {
3367 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3368 {DefReg}, {SrcReg})
3369 .addImm(0)
3370 .addImm(SrcSize - 1);
3371 } else {
3372 return false;
3373 }
3374
3376 I.eraseFromParent();
3377 return true;
3378 }
3379
3380 case TargetOpcode::G_SITOFP:
3381 case TargetOpcode::G_UITOFP:
3382 case TargetOpcode::G_FPTOSI:
3383 case TargetOpcode::G_FPTOUI: {
3384 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3385 SrcTy = MRI.getType(I.getOperand(1).getReg());
3386 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3387 if (NewOpc == Opcode)
3388 return false;
3389
3390 I.setDesc(TII.get(NewOpc));
3392 I.setFlags(MachineInstr::NoFPExcept);
3393
3394 return true;
3395 }
3396
3397 case TargetOpcode::G_FREEZE:
3398 return selectCopy(I, TII, MRI, TRI, RBI);
3399
3400 case TargetOpcode::G_INTTOPTR:
3401 // The importer is currently unable to import pointer types since they
3402 // didn't exist in SelectionDAG.
3403 return selectCopy(I, TII, MRI, TRI, RBI);
3404
3405 case TargetOpcode::G_BITCAST:
3406 // Imported SelectionDAG rules can handle every bitcast except those that
3407 // bitcast from a type to the same type. Ideally, these shouldn't occur
3408 // but we might not run an optimizer that deletes them. The other exception
3409 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3410 // of them.
3411 return selectCopy(I, TII, MRI, TRI, RBI);
3412
3413 case TargetOpcode::G_SELECT: {
3414 auto &Sel = cast<GSelect>(I);
3415 const Register CondReg = Sel.getCondReg();
3416 const Register TReg = Sel.getTrueReg();
3417 const Register FReg = Sel.getFalseReg();
3418
3419 if (tryOptSelect(Sel))
3420 return true;
3421
3422 // Make sure to use an unused vreg instead of wzr, so that the peephole
3423 // optimizations will be able to optimize these.
3424 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3425 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3426 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3428 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3429 return false;
3430 Sel.eraseFromParent();
3431 return true;
3432 }
3433 case TargetOpcode::G_ICMP: {
3434 if (Ty.isVector())
3435 return false;
3436
3437 if (Ty != LLT::scalar(32)) {
3438 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3439 << ", expected: " << LLT::scalar(32) << '\n');
3440 return false;
3441 }
3442
3443 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3444 const AArch64CC::CondCode InvCC =
3446 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3447 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3448 /*Src2=*/AArch64::WZR, InvCC, MIB);
3449 I.eraseFromParent();
3450 return true;
3451 }
3452
3453 case TargetOpcode::G_FCMP: {
3454 CmpInst::Predicate Pred =
3455 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3456 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3457 Pred) ||
3458 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3459 return false;
3460 I.eraseFromParent();
3461 return true;
3462 }
3463 case TargetOpcode::G_VASTART:
3464 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3465 : selectVaStartAAPCS(I, MF, MRI);
3466 case TargetOpcode::G_INTRINSIC:
3467 return selectIntrinsic(I, MRI);
3468 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3469 return selectIntrinsicWithSideEffects(I, MRI);
3470 case TargetOpcode::G_IMPLICIT_DEF: {
3471 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3472 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3473 const Register DstReg = I.getOperand(0).getReg();
3474 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3475 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3476 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3477 return true;
3478 }
3479 case TargetOpcode::G_BLOCK_ADDR: {
3480 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3481 if (std::optional<uint16_t> BADisc =
3482 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3483 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3484 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3485 MIB.buildInstr(AArch64::MOVaddrPAC)
3486 .addBlockAddress(I.getOperand(1).getBlockAddress())
3488 .addReg(/*AddrDisc=*/AArch64::XZR)
3489 .addImm(*BADisc)
3490 .constrainAllUses(TII, TRI, RBI);
3491 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3492 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3493 AArch64::GPR64RegClass, MRI);
3494 I.eraseFromParent();
3495 return true;
3496 }
3497 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3498 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3499 I.eraseFromParent();
3500 return true;
3501 } else {
3502 I.setDesc(TII.get(AArch64::MOVaddrBA));
3503 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3504 I.getOperand(0).getReg())
3505 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3506 /* Offset */ 0, AArch64II::MO_PAGE)
3508 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3510 I.eraseFromParent();
3511 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3512 }
3513 }
3514 case AArch64::G_DUP: {
3515 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3516 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3517 // difficult because at RBS we may end up pessimizing the fpr case if we
3518 // decided to add an anyextend to fix this. Manual selection is the most
3519 // robust solution for now.
3520 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3521 AArch64::GPRRegBankID)
3522 return false; // We expect the fpr regbank case to be imported.
3523 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3524 if (VecTy == LLT::fixed_vector(8, 8))
3525 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3526 else if (VecTy == LLT::fixed_vector(16, 8))
3527 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3528 else if (VecTy == LLT::fixed_vector(4, 16))
3529 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3530 else if (VecTy == LLT::fixed_vector(8, 16))
3531 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3532 else
3533 return false;
3535 }
3536 case TargetOpcode::G_BUILD_VECTOR:
3537 return selectBuildVector(I, MRI);
3538 case TargetOpcode::G_MERGE_VALUES:
3539 return selectMergeValues(I, MRI);
3540 case TargetOpcode::G_UNMERGE_VALUES:
3541 return selectUnmergeValues(I, MRI);
3542 case TargetOpcode::G_SHUFFLE_VECTOR:
3543 return selectShuffleVector(I, MRI);
3544 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3545 return selectExtractElt(I, MRI);
3546 case TargetOpcode::G_CONCAT_VECTORS:
3547 return selectConcatVectors(I, MRI);
3548 case TargetOpcode::G_JUMP_TABLE:
3549 return selectJumpTable(I, MRI);
3550 case TargetOpcode::G_MEMCPY:
3551 case TargetOpcode::G_MEMCPY_INLINE:
3552 case TargetOpcode::G_MEMMOVE:
3553 case TargetOpcode::G_MEMSET:
3554 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3555 return selectMOPS(I, MRI);
3556 }
3557
3558 return false;
3559}
3560
3561bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3562 MachineIRBuilderState OldMIBState = MIB.getState();
3563 bool Success = select(I);
3564 MIB.setState(OldMIBState);
3565 return Success;
3566}
3567
3568bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3570 unsigned Mopcode;
3571 switch (GI.getOpcode()) {
3572 case TargetOpcode::G_MEMCPY:
3573 case TargetOpcode::G_MEMCPY_INLINE:
3574 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3575 break;
3576 case TargetOpcode::G_MEMMOVE:
3577 Mopcode = AArch64::MOPSMemoryMovePseudo;
3578 break;
3579 case TargetOpcode::G_MEMSET:
3580 // For tagged memset see llvm.aarch64.mops.memset.tag
3581 Mopcode = AArch64::MOPSMemorySetPseudo;
3582 break;
3583 }
3584
3585 auto &DstPtr = GI.getOperand(0);
3586 auto &SrcOrVal = GI.getOperand(1);
3587 auto &Size = GI.getOperand(2);
3588
3589 // Create copies of the registers that can be clobbered.
3590 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3591 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3592 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3593
3594 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3595 const auto &SrcValRegClass =
3596 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3597
3598 // Constrain to specific registers
3599 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3600 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3601 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3602
3603 MIB.buildCopy(DstPtrCopy, DstPtr);
3604 MIB.buildCopy(SrcValCopy, SrcOrVal);
3605 MIB.buildCopy(SizeCopy, Size);
3606
3607 // New instruction uses the copied registers because it must update them.
3608 // The defs are not used since they don't exist in G_MEM*. They are still
3609 // tied.
3610 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3611 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3612 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3613 if (IsSet) {
3614 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3615 {DstPtrCopy, SizeCopy, SrcValCopy});
3616 } else {
3617 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3618 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3619 {DstPtrCopy, SrcValCopy, SizeCopy});
3620 }
3621
3622 GI.eraseFromParent();
3623 return true;
3624}
3625
3626bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3628 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3629 Register JTAddr = I.getOperand(0).getReg();
3630 unsigned JTI = I.getOperand(1).getIndex();
3631 Register Index = I.getOperand(2).getReg();
3632
3633 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3634
3635 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3636 // sequence later, to guarantee the integrity of the intermediate values.
3637 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3638 CodeModel::Model CM = TM.getCodeModel();
3639 if (STI.isTargetMachO()) {
3640 if (CM != CodeModel::Small && CM != CodeModel::Large)
3641 report_fatal_error("Unsupported code-model for hardened jump-table");
3642 } else {
3643 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3644 assert(STI.isTargetELF() &&
3645 "jump table hardening only supported on MachO/ELF");
3646 if (CM != CodeModel::Small)
3647 report_fatal_error("Unsupported code-model for hardened jump-table");
3648 }
3649
3650 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3651 MIB.buildInstr(AArch64::BR_JumpTable)
3652 .addJumpTableIndex(I.getOperand(1).getIndex());
3653 I.eraseFromParent();
3654 return true;
3655 }
3656
3657 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3658 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3659
3660 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3661 {TargetReg, ScratchReg}, {JTAddr, Index})
3662 .addJumpTableIndex(JTI);
3663 // Save the jump table info.
3664 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3665 {static_cast<int64_t>(JTI)});
3666 // Build the indirect branch.
3667 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3668 I.eraseFromParent();
3669 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3670}
3671
3672bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3674 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3675 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3676
3677 Register DstReg = I.getOperand(0).getReg();
3678 unsigned JTI = I.getOperand(1).getIndex();
3679 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3680 auto MovMI =
3681 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3682 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3684 I.eraseFromParent();
3685 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3686}
3687
3688bool AArch64InstructionSelector::selectTLSGlobalValue(
3690 if (!STI.isTargetMachO())
3691 return false;
3692 MachineFunction &MF = *I.getParent()->getParent();
3693 MF.getFrameInfo().setAdjustsStack(true);
3694
3695 const auto &GlobalOp = I.getOperand(1);
3696 assert(GlobalOp.getOffset() == 0 &&
3697 "Shouldn't have an offset on TLS globals!");
3698 const GlobalValue &GV = *GlobalOp.getGlobal();
3699
3700 auto LoadGOT =
3701 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3702 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3703
3704 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3705 {LoadGOT.getReg(0)})
3706 .addImm(0);
3707
3708 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3709 // TLS calls preserve all registers except those that absolutely must be
3710 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3711 // silly).
3712 unsigned Opcode = getBLRCallOpcode(MF);
3713
3714 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3715 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3716 assert(Opcode == AArch64::BLR);
3717 Opcode = AArch64::BLRAAZ;
3718 }
3719
3720 MIB.buildInstr(Opcode, {}, {Load})
3721 .addUse(AArch64::X0, RegState::Implicit)
3722 .addDef(AArch64::X0, RegState::Implicit)
3723 .addRegMask(TRI.getTLSCallPreservedMask());
3724
3725 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3726 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3727 MRI);
3728 I.eraseFromParent();
3729 return true;
3730}
3731
3732MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3733 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3734 MachineIRBuilder &MIRBuilder) const {
3735 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3736
3737 auto BuildFn = [&](unsigned SubregIndex) {
3738 auto Ins =
3739 MIRBuilder
3740 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3741 .addImm(SubregIndex);
3744 return &*Ins;
3745 };
3746
3747 switch (EltSize) {
3748 case 8:
3749 return BuildFn(AArch64::bsub);
3750 case 16:
3751 return BuildFn(AArch64::hsub);
3752 case 32:
3753 return BuildFn(AArch64::ssub);
3754 case 64:
3755 return BuildFn(AArch64::dsub);
3756 default:
3757 return nullptr;
3758 }
3759}
3760
3762AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3763 MachineIRBuilder &MIB,
3764 MachineRegisterInfo &MRI) const {
3765 LLT DstTy = MRI.getType(DstReg);
3766 const TargetRegisterClass *RC =
3767 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3768 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3769 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3770 return nullptr;
3771 }
3772 unsigned SubReg = 0;
3773 if (!getSubRegForClass(RC, TRI, SubReg))
3774 return nullptr;
3775 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3776 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3777 << DstTy.getSizeInBits() << "\n");
3778 return nullptr;
3779 }
3780 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3781 .addReg(SrcReg, 0, SubReg);
3782 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3783 return Copy;
3784}
3785
3786bool AArch64InstructionSelector::selectMergeValues(
3788 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3789 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3790 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3791 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3792 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3793
3794 if (I.getNumOperands() != 3)
3795 return false;
3796
3797 // Merging 2 s64s into an s128.
3798 if (DstTy == LLT::scalar(128)) {
3799 if (SrcTy.getSizeInBits() != 64)
3800 return false;
3801 Register DstReg = I.getOperand(0).getReg();
3802 Register Src1Reg = I.getOperand(1).getReg();
3803 Register Src2Reg = I.getOperand(2).getReg();
3804 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3805 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3806 /* LaneIdx */ 0, RB, MIB);
3807 if (!InsMI)
3808 return false;
3809 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3810 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3811 if (!Ins2MI)
3812 return false;
3815 I.eraseFromParent();
3816 return true;
3817 }
3818
3819 if (RB.getID() != AArch64::GPRRegBankID)
3820 return false;
3821
3822 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3823 return false;
3824
3825 auto *DstRC = &AArch64::GPR64RegClass;
3826 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3827 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3828 TII.get(TargetOpcode::SUBREG_TO_REG))
3829 .addDef(SubToRegDef)
3830 .addImm(0)
3831 .addUse(I.getOperand(1).getReg())
3832 .addImm(AArch64::sub_32);
3833 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3834 // Need to anyext the second scalar before we can use bfm
3835 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3836 TII.get(TargetOpcode::SUBREG_TO_REG))
3837 .addDef(SubToRegDef2)
3838 .addImm(0)
3839 .addUse(I.getOperand(2).getReg())
3840 .addImm(AArch64::sub_32);
3841 MachineInstr &BFM =
3842 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3843 .addDef(I.getOperand(0).getReg())
3844 .addUse(SubToRegDef)
3845 .addUse(SubToRegDef2)
3846 .addImm(32)
3847 .addImm(31);
3848 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3849 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3851 I.eraseFromParent();
3852 return true;
3853}
3854
3855static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3856 const unsigned EltSize) {
3857 // Choose a lane copy opcode and subregister based off of the size of the
3858 // vector's elements.
3859 switch (EltSize) {
3860 case 8:
3861 CopyOpc = AArch64::DUPi8;
3862 ExtractSubReg = AArch64::bsub;
3863 break;
3864 case 16:
3865 CopyOpc = AArch64::DUPi16;
3866 ExtractSubReg = AArch64::hsub;
3867 break;
3868 case 32:
3869 CopyOpc = AArch64::DUPi32;
3870 ExtractSubReg = AArch64::ssub;
3871 break;
3872 case 64:
3873 CopyOpc = AArch64::DUPi64;
3874 ExtractSubReg = AArch64::dsub;
3875 break;
3876 default:
3877 // Unknown size, bail out.
3878 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3879 return false;
3880 }
3881 return true;
3882}
3883
3884MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3885 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3886 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3887 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3888 unsigned CopyOpc = 0;
3889 unsigned ExtractSubReg = 0;
3890 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3891 LLVM_DEBUG(
3892 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3893 return nullptr;
3894 }
3895
3896 const TargetRegisterClass *DstRC =
3897 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3898 if (!DstRC) {
3899 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3900 return nullptr;
3901 }
3902
3903 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3904 const LLT &VecTy = MRI.getType(VecReg);
3905 const TargetRegisterClass *VecRC =
3906 getRegClassForTypeOnBank(VecTy, VecRB, true);
3907 if (!VecRC) {
3908 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3909 return nullptr;
3910 }
3911
3912 // The register that we're going to copy into.
3913 Register InsertReg = VecReg;
3914 if (!DstReg)
3915 DstReg = MRI.createVirtualRegister(DstRC);
3916 // If the lane index is 0, we just use a subregister COPY.
3917 if (LaneIdx == 0) {
3918 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3919 .addReg(VecReg, 0, ExtractSubReg);
3920 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3921 return &*Copy;
3922 }
3923
3924 // Lane copies require 128-bit wide registers. If we're dealing with an
3925 // unpacked vector, then we need to move up to that width. Insert an implicit
3926 // def and a subregister insert to get us there.
3927 if (VecTy.getSizeInBits() != 128) {
3928 MachineInstr *ScalarToVector = emitScalarToVector(
3929 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3930 if (!ScalarToVector)
3931 return nullptr;
3932 InsertReg = ScalarToVector->getOperand(0).getReg();
3933 }
3934
3935 MachineInstr *LaneCopyMI =
3936 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3937 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3938
3939 // Make sure that we actually constrain the initial copy.
3940 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3941 return LaneCopyMI;
3942}
3943
3944bool AArch64InstructionSelector::selectExtractElt(
3946 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3947 "unexpected opcode!");
3948 Register DstReg = I.getOperand(0).getReg();
3949 const LLT NarrowTy = MRI.getType(DstReg);
3950 const Register SrcReg = I.getOperand(1).getReg();
3951 const LLT WideTy = MRI.getType(SrcReg);
3952 (void)WideTy;
3953 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3954 "source register size too small!");
3955 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3956
3957 // Need the lane index to determine the correct copy opcode.
3958 MachineOperand &LaneIdxOp = I.getOperand(2);
3959 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3960
3961 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3962 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3963 return false;
3964 }
3965
3966 // Find the index to extract from.
3967 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3968 if (!VRegAndVal)
3969 return false;
3970 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3971
3972
3973 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3974 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3975 LaneIdx, MIB);
3976 if (!Extract)
3977 return false;
3978
3979 I.eraseFromParent();
3980 return true;
3981}
3982
3983bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3985 unsigned NumElts = I.getNumOperands() - 1;
3986 Register SrcReg = I.getOperand(NumElts).getReg();
3987 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3988 const LLT SrcTy = MRI.getType(SrcReg);
3989
3990 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3991 if (SrcTy.getSizeInBits() > 128) {
3992 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3993 return false;
3994 }
3995
3996 // We implement a split vector operation by treating the sub-vectors as
3997 // scalars and extracting them.
3998 const RegisterBank &DstRB =
3999 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4000 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4001 Register Dst = I.getOperand(OpIdx).getReg();
4002 MachineInstr *Extract =
4003 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4004 if (!Extract)
4005 return false;
4006 }
4007 I.eraseFromParent();
4008 return true;
4009}
4010
4011bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4013 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4014 "unexpected opcode");
4015
4016 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4017 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4018 AArch64::FPRRegBankID ||
4019 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4020 AArch64::FPRRegBankID) {
4021 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4022 "currently unsupported.\n");
4023 return false;
4024 }
4025
4026 // The last operand is the vector source register, and every other operand is
4027 // a register to unpack into.
4028 unsigned NumElts = I.getNumOperands() - 1;
4029 Register SrcReg = I.getOperand(NumElts).getReg();
4030 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4031 const LLT WideTy = MRI.getType(SrcReg);
4032 (void)WideTy;
4033 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4034 "can only unmerge from vector or s128 types!");
4035 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4036 "source register size too small!");
4037
4038 if (!NarrowTy.isScalar())
4039 return selectSplitVectorUnmerge(I, MRI);
4040
4041 // Choose a lane copy opcode and subregister based off of the size of the
4042 // vector's elements.
4043 unsigned CopyOpc = 0;
4044 unsigned ExtractSubReg = 0;
4045 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4046 return false;
4047
4048 // Set up for the lane copies.
4049 MachineBasicBlock &MBB = *I.getParent();
4050
4051 // Stores the registers we'll be copying from.
4052 SmallVector<Register, 4> InsertRegs;
4053
4054 // We'll use the first register twice, so we only need NumElts-1 registers.
4055 unsigned NumInsertRegs = NumElts - 1;
4056
4057 // If our elements fit into exactly 128 bits, then we can copy from the source
4058 // directly. Otherwise, we need to do a bit of setup with some subregister
4059 // inserts.
4060 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4061 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4062 } else {
4063 // No. We have to perform subregister inserts. For each insert, create an
4064 // implicit def and a subregister insert, and save the register we create.
4065 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4066 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4067 *RBI.getRegBank(SrcReg, MRI, TRI));
4068 unsigned SubReg = 0;
4069 bool Found = getSubRegForClass(RC, TRI, SubReg);
4070 (void)Found;
4071 assert(Found && "expected to find last operand's subeg idx");
4072 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4073 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4074 MachineInstr &ImpDefMI =
4075 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4076 ImpDefReg);
4077
4078 // Now, create the subregister insert from SrcReg.
4079 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4080 MachineInstr &InsMI =
4081 *BuildMI(MBB, I, I.getDebugLoc(),
4082 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4083 .addUse(ImpDefReg)
4084 .addUse(SrcReg)
4085 .addImm(SubReg);
4086
4087 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4089
4090 // Save the register so that we can copy from it after.
4091 InsertRegs.push_back(InsertReg);
4092 }
4093 }
4094
4095 // Now that we've created any necessary subregister inserts, we can
4096 // create the copies.
4097 //
4098 // Perform the first copy separately as a subregister copy.
4099 Register CopyTo = I.getOperand(0).getReg();
4100 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4101 .addReg(InsertRegs[0], 0, ExtractSubReg);
4102 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4103
4104 // Now, perform the remaining copies as vector lane copies.
4105 unsigned LaneIdx = 1;
4106 for (Register InsReg : InsertRegs) {
4107 Register CopyTo = I.getOperand(LaneIdx).getReg();
4108 MachineInstr &CopyInst =
4109 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4110 .addUse(InsReg)
4111 .addImm(LaneIdx);
4112 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4113 ++LaneIdx;
4114 }
4115
4116 // Separately constrain the first copy's destination. Because of the
4117 // limitation in constrainOperandRegClass, we can't guarantee that this will
4118 // actually be constrained. So, do it ourselves using the second operand.
4119 const TargetRegisterClass *RC =
4120 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4121 if (!RC) {
4122 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4123 return false;
4124 }
4125
4126 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4127 I.eraseFromParent();
4128 return true;
4129}
4130
4131bool AArch64InstructionSelector::selectConcatVectors(
4133 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4134 "Unexpected opcode");
4135 Register Dst = I.getOperand(0).getReg();
4136 Register Op1 = I.getOperand(1).getReg();
4137 Register Op2 = I.getOperand(2).getReg();
4138 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4139 if (!ConcatMI)
4140 return false;
4141 I.eraseFromParent();
4142 return true;
4143}
4144
4145unsigned
4146AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4147 MachineFunction &MF) const {
4148 Type *CPTy = CPVal->getType();
4149 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4150
4152 return MCP->getConstantPoolIndex(CPVal, Alignment);
4153}
4154
4155MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4156 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4157 const TargetRegisterClass *RC;
4158 unsigned Opc;
4159 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4160 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4161 switch (Size) {
4162 case 16:
4163 RC = &AArch64::FPR128RegClass;
4164 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4165 break;
4166 case 8:
4167 RC = &AArch64::FPR64RegClass;
4168 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4169 break;
4170 case 4:
4171 RC = &AArch64::FPR32RegClass;
4172 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4173 break;
4174 case 2:
4175 RC = &AArch64::FPR16RegClass;
4176 Opc = AArch64::LDRHui;
4177 break;
4178 default:
4179 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4180 << *CPVal->getType());
4181 return nullptr;
4182 }
4183
4184 MachineInstr *LoadMI = nullptr;
4185 auto &MF = MIRBuilder.getMF();
4186 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4187 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4188 // Use load(literal) for tiny code model.
4189 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4190 } else {
4191 auto Adrp =
4192 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4193 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4194
4195 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4196 .addConstantPoolIndex(
4198
4200 }
4201
4203 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4205 Size, Align(Size)));
4207 return LoadMI;
4208}
4209
4210/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4211/// size and RB.
4212static std::pair<unsigned, unsigned>
4213getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4214 unsigned Opc, SubregIdx;
4215 if (RB.getID() == AArch64::GPRRegBankID) {
4216 if (EltSize == 8) {
4217 Opc = AArch64::INSvi8gpr;
4218 SubregIdx = AArch64::bsub;
4219 } else if (EltSize == 16) {
4220 Opc = AArch64::INSvi16gpr;
4221 SubregIdx = AArch64::ssub;
4222 } else if (EltSize == 32) {
4223 Opc = AArch64::INSvi32gpr;
4224 SubregIdx = AArch64::ssub;
4225 } else if (EltSize == 64) {
4226 Opc = AArch64::INSvi64gpr;
4227 SubregIdx = AArch64::dsub;
4228 } else {
4229 llvm_unreachable("invalid elt size!");
4230 }
4231 } else {
4232 if (EltSize == 8) {
4233 Opc = AArch64::INSvi8lane;
4234 SubregIdx = AArch64::bsub;
4235 } else if (EltSize == 16) {
4236 Opc = AArch64::INSvi16lane;
4237 SubregIdx = AArch64::hsub;
4238 } else if (EltSize == 32) {
4239 Opc = AArch64::INSvi32lane;
4240 SubregIdx = AArch64::ssub;
4241 } else if (EltSize == 64) {
4242 Opc = AArch64::INSvi64lane;
4243 SubregIdx = AArch64::dsub;
4244 } else {
4245 llvm_unreachable("invalid elt size!");
4246 }
4247 }
4248 return std::make_pair(Opc, SubregIdx);
4249}
4250
4251MachineInstr *AArch64InstructionSelector::emitInstr(
4252 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4253 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4254 const ComplexRendererFns &RenderFns) const {
4255 assert(Opcode && "Expected an opcode?");
4256 assert(!isPreISelGenericOpcode(Opcode) &&
4257 "Function should only be used to produce selected instructions!");
4258 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4259 if (RenderFns)
4260 for (auto &Fn : *RenderFns)
4261 Fn(MI);
4263 return &*MI;
4264}
4265
4266MachineInstr *AArch64InstructionSelector::emitAddSub(
4267 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4268 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4269 MachineIRBuilder &MIRBuilder) const {
4270 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4271 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4272 auto Ty = MRI.getType(LHS.getReg());
4273 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4274 unsigned Size = Ty.getSizeInBits();
4275 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4276 bool Is32Bit = Size == 32;
4277
4278 // INSTRri form with positive arithmetic immediate.
4279 if (auto Fns = selectArithImmed(RHS))
4280 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4281 MIRBuilder, Fns);
4282
4283 // INSTRri form with negative arithmetic immediate.
4284 if (auto Fns = selectNegArithImmed(RHS))
4285 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4286 MIRBuilder, Fns);
4287
4288 // INSTRrx form.
4289 if (auto Fns = selectArithExtendedRegister(RHS))
4290 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4291 MIRBuilder, Fns);
4292
4293 // INSTRrs form.
4294 if (auto Fns = selectShiftedRegister(RHS))
4295 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4296 MIRBuilder, Fns);
4297 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4298 MIRBuilder);
4299}
4300
4302AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4303 MachineOperand &RHS,
4304 MachineIRBuilder &MIRBuilder) const {
4305 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4306 {{AArch64::ADDXri, AArch64::ADDWri},
4307 {AArch64::ADDXrs, AArch64::ADDWrs},
4308 {AArch64::ADDXrr, AArch64::ADDWrr},
4309 {AArch64::SUBXri, AArch64::SUBWri},
4310 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4311 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4312}
4313
4315AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4316 MachineOperand &RHS,
4317 MachineIRBuilder &MIRBuilder) const {
4318 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4319 {{AArch64::ADDSXri, AArch64::ADDSWri},
4320 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4321 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4322 {AArch64::SUBSXri, AArch64::SUBSWri},
4323 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4324 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4325}
4326
4328AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4329 MachineOperand &RHS,
4330 MachineIRBuilder &MIRBuilder) const {
4331 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4332 {{AArch64::SUBSXri, AArch64::SUBSWri},
4333 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4334 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4335 {AArch64::ADDSXri, AArch64::ADDSWri},
4336 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4337 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4338}
4339
4341AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4342 MachineOperand &RHS,
4343 MachineIRBuilder &MIRBuilder) const {
4344 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4345 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4346 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4347 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4348 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4349}
4350
4352AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4353 MachineOperand &RHS,
4354 MachineIRBuilder &MIRBuilder) const {
4355 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4356 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4357 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4358 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4359 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4360}
4361
4363AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4364 MachineIRBuilder &MIRBuilder) const {
4365 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4366 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4367 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4368 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4369}
4370
4372AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4373 MachineIRBuilder &MIRBuilder) const {
4374 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4375 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4376 LLT Ty = MRI.getType(LHS.getReg());
4377 unsigned RegSize = Ty.getSizeInBits();
4378 bool Is32Bit = (RegSize == 32);
4379 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4380 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4381 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4382 // ANDS needs a logical immediate for its immediate form. Check if we can
4383 // fold one in.
4384 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4385 int64_t Imm = ValAndVReg->Value.getSExtValue();
4386
4388 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4391 return &*TstMI;
4392 }
4393 }
4394
4395 if (auto Fns = selectLogicalShiftedRegister(RHS))
4396 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4397 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4398}
4399
4400MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4401 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4402 MachineIRBuilder &MIRBuilder) const {
4403 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4404 assert(Predicate.isPredicate() && "Expected predicate?");
4405 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4406 LLT CmpTy = MRI.getType(LHS.getReg());
4407 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4408 unsigned Size = CmpTy.getSizeInBits();
4409 (void)Size;
4410 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4411 // Fold the compare into a cmn or tst if possible.
4412 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4413 return FoldCmp;
4414 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4415 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4416}
4417
4418MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4419 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4420 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4421#ifndef NDEBUG
4422 LLT Ty = MRI.getType(Dst);
4423 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4424 "Expected a 32-bit scalar register?");
4425#endif
4426 const Register ZReg = AArch64::WZR;
4427 AArch64CC::CondCode CC1, CC2;
4428 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4429 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4430 if (CC2 == AArch64CC::AL)
4431 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4432 MIRBuilder);
4433 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4434 Register Def1Reg = MRI.createVirtualRegister(RC);
4435 Register Def2Reg = MRI.createVirtualRegister(RC);
4436 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4437 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4438 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4439 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4441 return &*OrMI;
4442}
4443
4444MachineInstr *AArch64InstructionSelector::emitFPCompare(
4445 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4446 std::optional<CmpInst::Predicate> Pred) const {
4447 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4448 LLT Ty = MRI.getType(LHS);
4449 if (Ty.isVector())
4450 return nullptr;
4451 unsigned OpSize = Ty.getSizeInBits();
4452 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4453
4454 // If this is a compare against +0.0, then we don't have
4455 // to explicitly materialize a constant.
4456 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4457 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4458
4459 auto IsEqualityPred = [](CmpInst::Predicate P) {
4460 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4462 };
4463 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4464 // Try commutating the operands.
4465 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4466 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4467 ShouldUseImm = true;
4468 std::swap(LHS, RHS);
4469 }
4470 }
4471 unsigned CmpOpcTbl[2][3] = {
4472 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4473 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4474 unsigned CmpOpc =
4475 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4476
4477 // Partially build the compare. Decide if we need to add a use for the
4478 // third operand based off whether or not we're comparing against 0.0.
4479 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4481 if (!ShouldUseImm)
4482 CmpMI.addUse(RHS);
4484 return &*CmpMI;
4485}
4486
4487MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4488 std::optional<Register> Dst, Register Op1, Register Op2,
4489 MachineIRBuilder &MIRBuilder) const {
4490 // We implement a vector concat by:
4491 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4492 // 2. Insert the upper vector into the destination's upper element
4493 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4494 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4495
4496 const LLT Op1Ty = MRI.getType(Op1);
4497 const LLT Op2Ty = MRI.getType(Op2);
4498
4499 if (Op1Ty != Op2Ty) {
4500 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4501 return nullptr;
4502 }
4503 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4504
4505 if (Op1Ty.getSizeInBits() >= 128) {
4506 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4507 return nullptr;
4508 }
4509
4510 // At the moment we just support 64 bit vector concats.
4511 if (Op1Ty.getSizeInBits() != 64) {
4512 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4513 return nullptr;
4514 }
4515
4516 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4517 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4518 const TargetRegisterClass *DstRC =
4519 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4520
4521 MachineInstr *WidenedOp1 =
4522 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4523 MachineInstr *WidenedOp2 =
4524 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4525 if (!WidenedOp1 || !WidenedOp2) {
4526 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4527 return nullptr;
4528 }
4529
4530 // Now do the insert of the upper element.
4531 unsigned InsertOpc, InsSubRegIdx;
4532 std::tie(InsertOpc, InsSubRegIdx) =
4533 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4534
4535 if (!Dst)
4536 Dst = MRI.createVirtualRegister(DstRC);
4537 auto InsElt =
4538 MIRBuilder
4539 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4540 .addImm(1) /* Lane index */
4541 .addUse(WidenedOp2->getOperand(0).getReg())
4542 .addImm(0);
4544 return &*InsElt;
4545}
4546
4548AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4549 Register Src2, AArch64CC::CondCode Pred,
4550 MachineIRBuilder &MIRBuilder) const {
4551 auto &MRI = *MIRBuilder.getMRI();
4552 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4553 // If we used a register class, then this won't necessarily have an LLT.
4554 // Compute the size based off whether or not we have a class or bank.
4555 unsigned Size;
4556 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4557 Size = TRI.getRegSizeInBits(*RC);
4558 else
4559 Size = MRI.getType(Dst).getSizeInBits();
4560 // Some opcodes use s1.
4561 assert(Size <= 64 && "Expected 64 bits or less only!");
4562 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4563 unsigned Opc = OpcTable[Size == 64];
4564 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4566 return &*CSINC;
4567}
4568
4569MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4570 Register CarryReg) {
4572 unsigned Opcode = I.getOpcode();
4573
4574 // If the instruction is a SUB, we need to negate the carry,
4575 // because borrowing is indicated by carry-flag == 0.
4576 bool NeedsNegatedCarry =
4577 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4578
4579 // If the previous instruction will already produce the correct carry, do not
4580 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4581 // generated during legalization of wide add/sub. This optimization depends on
4582 // these sequences not being interrupted by other instructions.
4583 // We have to select the previous instruction before the carry-using
4584 // instruction is deleted by the calling function, otherwise the previous
4585 // instruction might become dead and would get deleted.
4586 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4587 if (SrcMI == I.getPrevNode()) {
4588 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4589 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4590 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4591 CarrySrcMI->isUnsigned() &&
4592 CarrySrcMI->getCarryOutReg() == CarryReg &&
4593 selectAndRestoreState(*SrcMI))
4594 return nullptr;
4595 }
4596 }
4597
4598 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4599
4600 if (NeedsNegatedCarry) {
4601 // (0 - Carry) sets !C in NZCV when Carry == 1
4602 Register ZReg = AArch64::WZR;
4603 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4604 }
4605
4606 // (Carry - 1) sets !C in NZCV when Carry == 0
4607 auto Fns = select12BitValueWithLeftShift(1);
4608 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4609}
4610
4611bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4613 auto &CarryMI = cast<GAddSubCarryOut>(I);
4614
4615 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4616 // Set NZCV carry according to carry-in VReg
4617 emitCarryIn(I, CarryInMI->getCarryInReg());
4618 }
4619
4620 // Emit the operation and get the correct condition code.
4621 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4622 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4623
4624 Register CarryOutReg = CarryMI.getCarryOutReg();
4625
4626 // Don't convert carry-out to VReg if it is never used
4627 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4628 // Now, put the overflow result in the register given by the first operand
4629 // to the overflow op. CSINC increments the result when the predicate is
4630 // false, so to get the increment when it's true, we need to use the
4631 // inverse. In this case, we want to increment when carry is set.
4632 Register ZReg = AArch64::WZR;
4633 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4634 getInvertedCondCode(OpAndCC.second), MIB);
4635 }
4636
4637 I.eraseFromParent();
4638 return true;
4639}
4640
4641std::pair<MachineInstr *, AArch64CC::CondCode>
4642AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4643 MachineOperand &LHS,
4644 MachineOperand &RHS,
4645 MachineIRBuilder &MIRBuilder) const {
4646 switch (Opcode) {
4647 default:
4648 llvm_unreachable("Unexpected opcode!");
4649 case TargetOpcode::G_SADDO:
4650 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4651 case TargetOpcode::G_UADDO:
4652 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4653 case TargetOpcode::G_SSUBO:
4654 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4655 case TargetOpcode::G_USUBO:
4656 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4657 case TargetOpcode::G_SADDE:
4658 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4659 case TargetOpcode::G_UADDE:
4660 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4661 case TargetOpcode::G_SSUBE:
4662 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4663 case TargetOpcode::G_USUBE:
4664 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4665 }
4666}
4667
4668/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4669/// expressed as a conjunction.
4670/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4671/// changing the conditions on the CMP tests.
4672/// (this means we can call emitConjunctionRec() with
4673/// Negate==true on this sub-tree)
4674/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4675/// cannot do the negation naturally. We are required to
4676/// emit the subtree first in this case.
4677/// \param WillNegate Is true if are called when the result of this
4678/// subexpression must be negated. This happens when the
4679/// outer expression is an OR. We can use this fact to know
4680/// that we have a double negation (or (or ...) ...) that
4681/// can be implemented for free.
4682static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4683 bool WillNegate, MachineRegisterInfo &MRI,
4684 unsigned Depth = 0) {
4685 if (!MRI.hasOneNonDBGUse(Val))
4686 return false;
4687 MachineInstr *ValDef = MRI.getVRegDef(