LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
228 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
230 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
231 unsigned Opc1, unsigned Opc2, bool isExt);
232
233 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
234 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
235 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
236
237 unsigned emitConstantPoolEntry(const Constant *CPVal,
238 MachineFunction &MF) const;
240 MachineIRBuilder &MIRBuilder) const;
241
242 // Emit a vector concat operation.
243 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
244 Register Op2,
245 MachineIRBuilder &MIRBuilder) const;
246
247 // Emit an integer compare between LHS and RHS, which checks for Predicate.
248 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
249 MachineOperand &Predicate,
250 MachineIRBuilder &MIRBuilder) const;
251
252 /// Emit a floating point comparison between \p LHS and \p RHS.
253 /// \p Pred if given is the intended predicate to use.
255 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
256 std::optional<CmpInst::Predicate> = std::nullopt) const;
257
259 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
260 std::initializer_list<llvm::SrcOp> SrcOps,
261 MachineIRBuilder &MIRBuilder,
262 const ComplexRendererFns &RenderFns = std::nullopt) const;
263 /// Helper function to emit an add or sub instruction.
264 ///
265 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
266 /// in a specific order.
267 ///
268 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
269 ///
270 /// \code
271 /// const std::array<std::array<unsigned, 2>, 4> Table {
272 /// {{AArch64::ADDXri, AArch64::ADDWri},
273 /// {AArch64::ADDXrs, AArch64::ADDWrs},
274 /// {AArch64::ADDXrr, AArch64::ADDWrr},
275 /// {AArch64::SUBXri, AArch64::SUBWri},
276 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
277 /// \endcode
278 ///
279 /// Each row in the table corresponds to a different addressing mode. Each
280 /// column corresponds to a different register size.
281 ///
282 /// \attention Rows must be structured as follows:
283 /// - Row 0: The ri opcode variants
284 /// - Row 1: The rs opcode variants
285 /// - Row 2: The rr opcode variants
286 /// - Row 3: The ri opcode variants for negative immediates
287 /// - Row 4: The rx opcode variants
288 ///
289 /// \attention Columns must be structured as follows:
290 /// - Column 0: The 64-bit opcode variants
291 /// - Column 1: The 32-bit opcode variants
292 ///
293 /// \p Dst is the destination register of the binop to emit.
294 /// \p LHS is the left-hand operand of the binop to emit.
295 /// \p RHS is the right-hand operand of the binop to emit.
296 MachineInstr *emitAddSub(
297 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
299 MachineIRBuilder &MIRBuilder) const;
300 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
302 MachineIRBuilder &MIRBuilder) const;
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
315 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
317 MachineIRBuilder &MIRBuilder) const;
318 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
319 const RegisterBank &DstRB, LLT ScalarTy,
320 Register VecReg, unsigned LaneIdx,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
324 MachineIRBuilder &MIRBuilder) const;
325 /// Emit a CSet for a FP compare.
326 ///
327 /// \p Dst is expected to be a 32-bit scalar register.
328 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
329 MachineIRBuilder &MIRBuilder) const;
330
331 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
332 /// Might elide the instruction if the previous instruction already sets NZCV
333 /// correctly.
334 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
335
336 /// Emit the overflow op for \p Opcode.
337 ///
338 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
339 /// G_USUBO, etc.
340 std::pair<MachineInstr *, AArch64CC::CondCode>
341 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
342 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
343
344 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
345
346 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
347 /// In some cases this is even possible with OR operations in the expression.
349 MachineIRBuilder &MIB) const;
352 AArch64CC::CondCode Predicate,
354 MachineIRBuilder &MIB) const;
356 bool Negate, Register CCOp,
357 AArch64CC::CondCode Predicate,
358 MachineIRBuilder &MIB) const;
359
360 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
361 /// \p IsNegative is true if the test should be "not zero".
362 /// This will also optimize the test bit instruction when possible.
363 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
364 MachineBasicBlock *DstMBB,
365 MachineIRBuilder &MIB) const;
366
367 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
368 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
369 MachineBasicBlock *DestMBB,
370 MachineIRBuilder &MIB) const;
371
372 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
373 // We use these manually instead of using the importer since it doesn't
374 // support SDNodeXForm.
375 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
376 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
377 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
379
380 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
381 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
382 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
383
384 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
385 unsigned Size) const;
386
387 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
388 return selectAddrModeUnscaled(Root, 1);
389 }
390 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
391 return selectAddrModeUnscaled(Root, 2);
392 }
393 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
394 return selectAddrModeUnscaled(Root, 4);
395 }
396 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
397 return selectAddrModeUnscaled(Root, 8);
398 }
399 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
400 return selectAddrModeUnscaled(Root, 16);
401 }
402
403 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
404 /// from complex pattern matchers like selectAddrModeIndexed().
405 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
406 MachineRegisterInfo &MRI) const;
407
408 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
409 unsigned Size) const;
410 template <int Width>
411 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
412 return selectAddrModeIndexed(Root, Width / 8);
413 }
414
415 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
416 const MachineRegisterInfo &MRI) const;
417 ComplexRendererFns
418 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
419 unsigned SizeInBytes) const;
420
421 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
422 /// or not a shift + extend should be folded into an addressing mode. Returns
423 /// None when this is not profitable or possible.
424 ComplexRendererFns
425 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
426 MachineOperand &Offset, unsigned SizeInBytes,
427 bool WantsExt) const;
428 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
429 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
430 unsigned SizeInBytes) const;
431 template <int Width>
432 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
433 return selectAddrModeXRO(Root, Width / 8);
434 }
435
436 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
440 return selectAddrModeWRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
444 bool AllowROR = false) const;
445
446 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
447 return selectShiftedRegister(Root);
448 }
449
450 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
451 return selectShiftedRegister(Root, true);
452 }
453
454 /// Given an extend instruction, determine the correct shift-extend type for
455 /// that instruction.
456 ///
457 /// If the instruction is going to be used in a load or store, pass
458 /// \p IsLoadStore = true.
460 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
461 bool IsLoadStore = false) const;
462
463 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
464 ///
465 /// \returns Either \p Reg if no change was necessary, or the new register
466 /// created by moving \p Reg.
467 ///
468 /// Note: This uses emitCopy right now.
469 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
470 MachineIRBuilder &MIB) const;
471
472 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
473
474 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
475
476 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
477 int OpIdx = -1) const;
478 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
479 int OpIdx = -1) const;
480 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
481 int OpIdx = -1) const;
482 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
483 int OpIdx) const;
484 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
485 int OpIdx = -1) const;
486 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
487 int OpIdx = -1) const;
488 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx = -1) const;
490 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
491 const MachineInstr &MI,
492 int OpIdx = -1) const;
493
494 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
495 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
496
497 // Optimization methods.
498 bool tryOptSelect(GSelect &Sel);
499 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
500 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
501 MachineOperand &Predicate,
502 MachineIRBuilder &MIRBuilder) const;
503
504 /// Return true if \p MI is a load or store of \p NumBytes bytes.
505 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
506
507 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
508 /// register zeroed out. In other words, the result of MI has been explicitly
509 /// zero extended.
510 bool isDef32(const MachineInstr &MI) const;
511
513 const AArch64Subtarget &STI;
514 const AArch64InstrInfo &TII;
516 const AArch64RegisterBankInfo &RBI;
517
518 bool ProduceNonFlagSettingCondBr = false;
519
520 // Some cached values used during selection.
521 // We use LR as a live-in register, and we keep track of it here as it can be
522 // clobbered by calls.
523 Register MFReturnAddr;
524
526
527#define GET_GLOBALISEL_PREDICATES_DECL
528#include "AArch64GenGlobalISel.inc"
529#undef GET_GLOBALISEL_PREDICATES_DECL
530
531// We declare the temporaries used by selectImpl() in the class to minimize the
532// cost of constructing placeholder values.
533#define GET_GLOBALISEL_TEMPORARIES_DECL
534#include "AArch64GenGlobalISel.inc"
535#undef GET_GLOBALISEL_TEMPORARIES_DECL
536};
537
538} // end anonymous namespace
539
540#define GET_GLOBALISEL_IMPL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_IMPL
543
544AArch64InstructionSelector::AArch64InstructionSelector(
545 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
546 const AArch64RegisterBankInfo &RBI)
547 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
548 RBI(RBI),
550#include "AArch64GenGlobalISel.inc"
553#include "AArch64GenGlobalISel.inc"
555{
556}
557
558// FIXME: This should be target-independent, inferred from the types declared
559// for each class in the bank.
560//
561/// Given a register bank, and a type, return the smallest register class that
562/// can represent that combination.
563static const TargetRegisterClass *
564getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
565 bool GetAllRegSet = false) {
566 if (RB.getID() == AArch64::GPRRegBankID) {
567 if (Ty.getSizeInBits() <= 32)
568 return GetAllRegSet ? &AArch64::GPR32allRegClass
569 : &AArch64::GPR32RegClass;
570 if (Ty.getSizeInBits() == 64)
571 return GetAllRegSet ? &AArch64::GPR64allRegClass
572 : &AArch64::GPR64RegClass;
573 if (Ty.getSizeInBits() == 128)
574 return &AArch64::XSeqPairsClassRegClass;
575 return nullptr;
576 }
577
578 if (RB.getID() == AArch64::FPRRegBankID) {
579 switch (Ty.getSizeInBits()) {
580 case 8:
581 return &AArch64::FPR8RegClass;
582 case 16:
583 return &AArch64::FPR16RegClass;
584 case 32:
585 return &AArch64::FPR32RegClass;
586 case 64:
587 return &AArch64::FPR64RegClass;
588 case 128:
589 return &AArch64::FPR128RegClass;
590 }
591 return nullptr;
592 }
593
594 return nullptr;
595}
596
597/// Given a register bank, and size in bits, return the smallest register class
598/// that can represent that combination.
599static const TargetRegisterClass *
601 bool GetAllRegSet = false) {
602 if (SizeInBits.isScalable()) {
603 assert(RB.getID() == AArch64::FPRRegBankID &&
604 "Expected FPR regbank for scalable type size");
605 return &AArch64::ZPRRegClass;
606 }
607
608 unsigned RegBankID = RB.getID();
609
610 if (RegBankID == AArch64::GPRRegBankID) {
611 if (SizeInBits <= 32)
612 return GetAllRegSet ? &AArch64::GPR32allRegClass
613 : &AArch64::GPR32RegClass;
614 if (SizeInBits == 64)
615 return GetAllRegSet ? &AArch64::GPR64allRegClass
616 : &AArch64::GPR64RegClass;
617 if (SizeInBits == 128)
618 return &AArch64::XSeqPairsClassRegClass;
619 }
620
621 if (RegBankID == AArch64::FPRRegBankID) {
622 switch (SizeInBits) {
623 default:
624 return nullptr;
625 case 8:
626 return &AArch64::FPR8RegClass;
627 case 16:
628 return &AArch64::FPR16RegClass;
629 case 32:
630 return &AArch64::FPR32RegClass;
631 case 64:
632 return &AArch64::FPR64RegClass;
633 case 128:
634 return &AArch64::FPR128RegClass;
635 }
636 }
637
638 return nullptr;
639}
640
641/// Returns the correct subregister to use for a given register class.
643 const TargetRegisterInfo &TRI, unsigned &SubReg) {
644 switch (TRI.getRegSizeInBits(*RC)) {
645 case 8:
646 SubReg = AArch64::bsub;
647 break;
648 case 16:
649 SubReg = AArch64::hsub;
650 break;
651 case 32:
652 if (RC != &AArch64::FPR32RegClass)
653 SubReg = AArch64::sub_32;
654 else
655 SubReg = AArch64::ssub;
656 break;
657 case 64:
658 SubReg = AArch64::dsub;
659 break;
660 default:
662 dbgs() << "Couldn't find appropriate subregister for register class.");
663 return false;
664 }
665
666 return true;
667}
668
669/// Returns the minimum size the given register bank can hold.
670static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
671 switch (RB.getID()) {
672 case AArch64::GPRRegBankID:
673 return 32;
674 case AArch64::FPRRegBankID:
675 return 8;
676 default:
677 llvm_unreachable("Tried to get minimum size for unknown register bank.");
678 }
679}
680
681/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
682/// Helper function for functions like createDTuple and createQTuple.
683///
684/// \p RegClassIDs - The list of register class IDs available for some tuple of
685/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
686/// expected to contain between 2 and 4 tuple classes.
687///
688/// \p SubRegs - The list of subregister classes associated with each register
689/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
690/// subregister class. The index of each subregister class is expected to
691/// correspond with the index of each register class.
692///
693/// \returns Either the destination register of REG_SEQUENCE instruction that
694/// was created, or the 0th element of \p Regs if \p Regs contains a single
695/// element.
697 const unsigned RegClassIDs[],
698 const unsigned SubRegs[], MachineIRBuilder &MIB) {
699 unsigned NumRegs = Regs.size();
700 if (NumRegs == 1)
701 return Regs[0];
702 assert(NumRegs >= 2 && NumRegs <= 4 &&
703 "Only support between two and 4 registers in a tuple!");
705 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
706 auto RegSequence =
707 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
708 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
709 RegSequence.addUse(Regs[I]);
710 RegSequence.addImm(SubRegs[I]);
711 }
712 return RegSequence.getReg(0);
713}
714
715/// Create a tuple of D-registers using the registers in \p Regs.
717 static const unsigned RegClassIDs[] = {
718 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
719 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
720 AArch64::dsub2, AArch64::dsub3};
721 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
722}
723
724/// Create a tuple of Q-registers using the registers in \p Regs.
726 static const unsigned RegClassIDs[] = {
727 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
728 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
729 AArch64::qsub2, AArch64::qsub3};
730 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
731}
732
733static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
734 auto &MI = *Root.getParent();
735 auto &MBB = *MI.getParent();
736 auto &MF = *MBB.getParent();
737 auto &MRI = MF.getRegInfo();
738 uint64_t Immed;
739 if (Root.isImm())
740 Immed = Root.getImm();
741 else if (Root.isCImm())
742 Immed = Root.getCImm()->getZExtValue();
743 else if (Root.isReg()) {
744 auto ValAndVReg =
746 if (!ValAndVReg)
747 return std::nullopt;
748 Immed = ValAndVReg->Value.getSExtValue();
749 } else
750 return std::nullopt;
751 return Immed;
752}
753
754/// Check whether \p I is a currently unsupported binary operation:
755/// - it has an unsized type
756/// - an operand is not a vreg
757/// - all operands are not in the same bank
758/// These are checks that should someday live in the verifier, but right now,
759/// these are mostly limitations of the aarch64 selector.
760static bool unsupportedBinOp(const MachineInstr &I,
761 const AArch64RegisterBankInfo &RBI,
763 const AArch64RegisterInfo &TRI) {
764 LLT Ty = MRI.getType(I.getOperand(0).getReg());
765 if (!Ty.isValid()) {
766 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
767 return true;
768 }
769
770 const RegisterBank *PrevOpBank = nullptr;
771 for (auto &MO : I.operands()) {
772 // FIXME: Support non-register operands.
773 if (!MO.isReg()) {
774 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
775 return true;
776 }
777
778 // FIXME: Can generic operations have physical registers operands? If
779 // so, this will need to be taught about that, and we'll need to get the
780 // bank out of the minimal class for the register.
781 // Either way, this needs to be documented (and possibly verified).
782 if (!MO.getReg().isVirtual()) {
783 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
784 return true;
785 }
786
787 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
788 if (!OpBank) {
789 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
790 return true;
791 }
792
793 if (PrevOpBank && OpBank != PrevOpBank) {
794 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
795 return true;
796 }
797 PrevOpBank = OpBank;
798 }
799 return false;
800}
801
802/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
803/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
804/// and of size \p OpSize.
805/// \returns \p GenericOpc if the combination is unsupported.
806static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
807 unsigned OpSize) {
808 switch (RegBankID) {
809 case AArch64::GPRRegBankID:
810 if (OpSize == 32) {
811 switch (GenericOpc) {
812 case TargetOpcode::G_SHL:
813 return AArch64::LSLVWr;
814 case TargetOpcode::G_LSHR:
815 return AArch64::LSRVWr;
816 case TargetOpcode::G_ASHR:
817 return AArch64::ASRVWr;
818 default:
819 return GenericOpc;
820 }
821 } else if (OpSize == 64) {
822 switch (GenericOpc) {
823 case TargetOpcode::G_PTR_ADD:
824 return AArch64::ADDXrr;
825 case TargetOpcode::G_SHL:
826 return AArch64::LSLVXr;
827 case TargetOpcode::G_LSHR:
828 return AArch64::LSRVXr;
829 case TargetOpcode::G_ASHR:
830 return AArch64::ASRVXr;
831 default:
832 return GenericOpc;
833 }
834 }
835 break;
836 case AArch64::FPRRegBankID:
837 switch (OpSize) {
838 case 32:
839 switch (GenericOpc) {
840 case TargetOpcode::G_FADD:
841 return AArch64::FADDSrr;
842 case TargetOpcode::G_FSUB:
843 return AArch64::FSUBSrr;
844 case TargetOpcode::G_FMUL:
845 return AArch64::FMULSrr;
846 case TargetOpcode::G_FDIV:
847 return AArch64::FDIVSrr;
848 default:
849 return GenericOpc;
850 }
851 case 64:
852 switch (GenericOpc) {
853 case TargetOpcode::G_FADD:
854 return AArch64::FADDDrr;
855 case TargetOpcode::G_FSUB:
856 return AArch64::FSUBDrr;
857 case TargetOpcode::G_FMUL:
858 return AArch64::FMULDrr;
859 case TargetOpcode::G_FDIV:
860 return AArch64::FDIVDrr;
861 case TargetOpcode::G_OR:
862 return AArch64::ORRv8i8;
863 default:
864 return GenericOpc;
865 }
866 }
867 break;
868 }
869 return GenericOpc;
870}
871
872/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
873/// appropriate for the (value) register bank \p RegBankID and of memory access
874/// size \p OpSize. This returns the variant with the base+unsigned-immediate
875/// addressing mode (e.g., LDRXui).
876/// \returns \p GenericOpc if the combination is unsupported.
877static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
878 unsigned OpSize) {
879 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
880 switch (RegBankID) {
881 case AArch64::GPRRegBankID:
882 switch (OpSize) {
883 case 8:
884 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
885 case 16:
886 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
887 case 32:
888 return isStore ? AArch64::STRWui : AArch64::LDRWui;
889 case 64:
890 return isStore ? AArch64::STRXui : AArch64::LDRXui;
891 }
892 break;
893 case AArch64::FPRRegBankID:
894 switch (OpSize) {
895 case 8:
896 return isStore ? AArch64::STRBui : AArch64::LDRBui;
897 case 16:
898 return isStore ? AArch64::STRHui : AArch64::LDRHui;
899 case 32:
900 return isStore ? AArch64::STRSui : AArch64::LDRSui;
901 case 64:
902 return isStore ? AArch64::STRDui : AArch64::LDRDui;
903 case 128:
904 return isStore ? AArch64::STRQui : AArch64::LDRQui;
905 }
906 break;
907 }
908 return GenericOpc;
909}
910
911/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
912/// to \p *To.
913///
914/// E.g "To = COPY SrcReg:SubReg"
916 const RegisterBankInfo &RBI, Register SrcReg,
917 const TargetRegisterClass *To, unsigned SubReg) {
918 assert(SrcReg.isValid() && "Expected a valid source register?");
919 assert(To && "Destination register class cannot be null");
920 assert(SubReg && "Expected a valid subregister");
921
922 MachineIRBuilder MIB(I);
923 auto SubRegCopy =
924 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
925 MachineOperand &RegOp = I.getOperand(1);
926 RegOp.setReg(SubRegCopy.getReg(0));
927
928 // It's possible that the destination register won't be constrained. Make
929 // sure that happens.
930 if (!I.getOperand(0).getReg().isPhysical())
931 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
932
933 return true;
934}
935
936/// Helper function to get the source and destination register classes for a
937/// copy. Returns a std::pair containing the source register class for the
938/// copy, and the destination register class for the copy. If a register class
939/// cannot be determined, then it will be nullptr.
940static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
943 const RegisterBankInfo &RBI) {
944 Register DstReg = I.getOperand(0).getReg();
945 Register SrcReg = I.getOperand(1).getReg();
946 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
947 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
948
949 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
950 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
951
952 // Special casing for cross-bank copies of s1s. We can technically represent
953 // a 1-bit value with any size of register. The minimum size for a GPR is 32
954 // bits. So, we need to put the FPR on 32 bits as well.
955 //
956 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
957 // then we can pull it into the helpers that get the appropriate class for a
958 // register bank. Or make a new helper that carries along some constraint
959 // information.
960 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
961 SrcSize = DstSize = TypeSize::getFixed(32);
962
963 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
964 getMinClassForRegBank(DstRegBank, DstSize, true)};
965}
966
967// FIXME: We need some sort of API in RBI/TRI to allow generic code to
968// constrain operands of simple instructions given a TargetRegisterClass
969// and LLT
971 const RegisterBankInfo &RBI) {
972 for (MachineOperand &MO : I.operands()) {
973 if (!MO.isReg())
974 continue;
975 Register Reg = MO.getReg();
976 if (!Reg)
977 continue;
978 if (Reg.isPhysical())
979 continue;
980 LLT Ty = MRI.getType(Reg);
981 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
982 const TargetRegisterClass *RC =
983 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
984 if (!RC) {
985 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
986 RC = getRegClassForTypeOnBank(Ty, RB);
987 if (!RC) {
989 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
990 break;
991 }
992 }
993 RBI.constrainGenericRegister(Reg, *RC, MRI);
994 }
995
996 return true;
997}
998
1001 const RegisterBankInfo &RBI) {
1002 Register DstReg = I.getOperand(0).getReg();
1003 Register SrcReg = I.getOperand(1).getReg();
1004 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1005 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1006
1007 // Find the correct register classes for the source and destination registers.
1008 const TargetRegisterClass *SrcRC;
1009 const TargetRegisterClass *DstRC;
1010 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1011
1012 if (!DstRC) {
1013 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1014 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1015 return false;
1016 }
1017
1018 // Is this a copy? If so, then we may need to insert a subregister copy.
1019 if (I.isCopy()) {
1020 // Yes. Check if there's anything to fix up.
1021 if (!SrcRC) {
1022 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1023 return false;
1024 }
1025
1026 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1027 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1028 unsigned SubReg;
1029
1030 // If the source bank doesn't support a subregister copy small enough,
1031 // then we first need to copy to the destination bank.
1032 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1033 const TargetRegisterClass *DstTempRC =
1034 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1035 getSubRegForClass(DstRC, TRI, SubReg);
1036
1037 MachineIRBuilder MIB(I);
1038 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1039 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1040 } else if (SrcSize > DstSize) {
1041 // If the source register is bigger than the destination we need to
1042 // perform a subregister copy.
1043 const TargetRegisterClass *SubRegRC =
1044 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1045 getSubRegForClass(SubRegRC, TRI, SubReg);
1046 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1047 } else if (DstSize > SrcSize) {
1048 // If the destination register is bigger than the source we need to do
1049 // a promotion using SUBREG_TO_REG.
1050 const TargetRegisterClass *PromotionRC =
1051 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1052 getSubRegForClass(SrcRC, TRI, SubReg);
1053
1054 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1055 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1056 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1057 .addImm(0)
1058 .addUse(SrcReg)
1059 .addImm(SubReg);
1060 MachineOperand &RegOp = I.getOperand(1);
1061 RegOp.setReg(PromoteReg);
1062 }
1063
1064 // If the destination is a physical register, then there's nothing to
1065 // change, so we're done.
1066 if (DstReg.isPhysical())
1067 return true;
1068 }
1069
1070 // No need to constrain SrcReg. It will get constrained when we hit another
1071 // of its use or its defs. Copies do not have constraints.
1072 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1073 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1074 << " operand\n");
1075 return false;
1076 }
1077
1078 // If this a GPR ZEXT that we want to just reduce down into a copy.
1079 // The sizes will be mismatched with the source < 32b but that's ok.
1080 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1081 I.setDesc(TII.get(AArch64::COPY));
1082 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1083 return selectCopy(I, TII, MRI, TRI, RBI);
1084 }
1085
1086 I.setDesc(TII.get(AArch64::COPY));
1087 return true;
1088}
1089
1090static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1091 if (!DstTy.isScalar() || !SrcTy.isScalar())
1092 return GenericOpc;
1093
1094 const unsigned DstSize = DstTy.getSizeInBits();
1095 const unsigned SrcSize = SrcTy.getSizeInBits();
1096
1097 switch (DstSize) {
1098 case 32:
1099 switch (SrcSize) {
1100 case 32:
1101 switch (GenericOpc) {
1102 case TargetOpcode::G_SITOFP:
1103 return AArch64::SCVTFUWSri;
1104 case TargetOpcode::G_UITOFP:
1105 return AArch64::UCVTFUWSri;
1106 case TargetOpcode::G_FPTOSI:
1107 return AArch64::FCVTZSUWSr;
1108 case TargetOpcode::G_FPTOUI:
1109 return AArch64::FCVTZUUWSr;
1110 default:
1111 return GenericOpc;
1112 }
1113 case 64:
1114 switch (GenericOpc) {
1115 case TargetOpcode::G_SITOFP:
1116 return AArch64::SCVTFUXSri;
1117 case TargetOpcode::G_UITOFP:
1118 return AArch64::UCVTFUXSri;
1119 case TargetOpcode::G_FPTOSI:
1120 return AArch64::FCVTZSUWDr;
1121 case TargetOpcode::G_FPTOUI:
1122 return AArch64::FCVTZUUWDr;
1123 default:
1124 return GenericOpc;
1125 }
1126 default:
1127 return GenericOpc;
1128 }
1129 case 64:
1130 switch (SrcSize) {
1131 case 32:
1132 switch (GenericOpc) {
1133 case TargetOpcode::G_SITOFP:
1134 return AArch64::SCVTFUWDri;
1135 case TargetOpcode::G_UITOFP:
1136 return AArch64::UCVTFUWDri;
1137 case TargetOpcode::G_FPTOSI:
1138 return AArch64::FCVTZSUXSr;
1139 case TargetOpcode::G_FPTOUI:
1140 return AArch64::FCVTZUUXSr;
1141 default:
1142 return GenericOpc;
1143 }
1144 case 64:
1145 switch (GenericOpc) {
1146 case TargetOpcode::G_SITOFP:
1147 return AArch64::SCVTFUXDri;
1148 case TargetOpcode::G_UITOFP:
1149 return AArch64::UCVTFUXDri;
1150 case TargetOpcode::G_FPTOSI:
1151 return AArch64::FCVTZSUXDr;
1152 case TargetOpcode::G_FPTOUI:
1153 return AArch64::FCVTZUUXDr;
1154 default:
1155 return GenericOpc;
1156 }
1157 default:
1158 return GenericOpc;
1159 }
1160 default:
1161 return GenericOpc;
1162 };
1163 return GenericOpc;
1164}
1165
1167AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1169 MachineIRBuilder &MIB) const {
1170 MachineRegisterInfo &MRI = *MIB.getMRI();
1171 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1172 RBI.getRegBank(True, MRI, TRI)->getID() &&
1173 "Expected both select operands to have the same regbank?");
1174 LLT Ty = MRI.getType(True);
1175 if (Ty.isVector())
1176 return nullptr;
1177 const unsigned Size = Ty.getSizeInBits();
1178 assert((Size == 32 || Size == 64) &&
1179 "Expected 32 bit or 64 bit select only?");
1180 const bool Is32Bit = Size == 32;
1181 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1182 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1183 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1185 return &*FCSel;
1186 }
1187
1188 // By default, we'll try and emit a CSEL.
1189 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1190 bool Optimized = false;
1191 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1192 &Optimized](Register &Reg, Register &OtherReg,
1193 bool Invert) {
1194 if (Optimized)
1195 return false;
1196
1197 // Attempt to fold:
1198 //
1199 // %sub = G_SUB 0, %x
1200 // %select = G_SELECT cc, %reg, %sub
1201 //
1202 // Into:
1203 // %select = CSNEG %reg, %x, cc
1204 Register MatchReg;
1205 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1206 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1207 Reg = MatchReg;
1208 if (Invert) {
1210 std::swap(Reg, OtherReg);
1211 }
1212 return true;
1213 }
1214
1215 // Attempt to fold:
1216 //
1217 // %xor = G_XOR %x, -1
1218 // %select = G_SELECT cc, %reg, %xor
1219 //
1220 // Into:
1221 // %select = CSINV %reg, %x, cc
1222 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1223 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1224 Reg = MatchReg;
1225 if (Invert) {
1227 std::swap(Reg, OtherReg);
1228 }
1229 return true;
1230 }
1231
1232 // Attempt to fold:
1233 //
1234 // %add = G_ADD %x, 1
1235 // %select = G_SELECT cc, %reg, %add
1236 //
1237 // Into:
1238 // %select = CSINC %reg, %x, cc
1239 if (mi_match(Reg, MRI,
1240 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1241 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1242 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1243 Reg = MatchReg;
1244 if (Invert) {
1246 std::swap(Reg, OtherReg);
1247 }
1248 return true;
1249 }
1250
1251 return false;
1252 };
1253
1254 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1255 // true/false values are constants.
1256 // FIXME: All of these patterns already exist in tablegen. We should be
1257 // able to import these.
1258 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1259 &Optimized]() {
1260 if (Optimized)
1261 return false;
1262 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1263 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1264 if (!TrueCst && !FalseCst)
1265 return false;
1266
1267 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1268 if (TrueCst && FalseCst) {
1269 int64_t T = TrueCst->Value.getSExtValue();
1270 int64_t F = FalseCst->Value.getSExtValue();
1271
1272 if (T == 0 && F == 1) {
1273 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1274 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1275 True = ZReg;
1276 False = ZReg;
1277 return true;
1278 }
1279
1280 if (T == 0 && F == -1) {
1281 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1282 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1283 True = ZReg;
1284 False = ZReg;
1285 return true;
1286 }
1287 }
1288
1289 if (TrueCst) {
1290 int64_t T = TrueCst->Value.getSExtValue();
1291 if (T == 1) {
1292 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1293 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1294 True = False;
1295 False = ZReg;
1297 return true;
1298 }
1299
1300 if (T == -1) {
1301 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1302 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1303 True = False;
1304 False = ZReg;
1306 return true;
1307 }
1308 }
1309
1310 if (FalseCst) {
1311 int64_t F = FalseCst->Value.getSExtValue();
1312 if (F == 1) {
1313 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1314 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1315 False = ZReg;
1316 return true;
1317 }
1318
1319 if (F == -1) {
1320 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1321 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1322 False = ZReg;
1323 return true;
1324 }
1325 }
1326 return false;
1327 };
1328
1329 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1330 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1331 Optimized |= TryOptSelectCst();
1332 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1334 return &*SelectInst;
1335}
1336
1338 switch (P) {
1339 default:
1340 llvm_unreachable("Unknown condition code!");
1341 case CmpInst::ICMP_NE:
1342 return AArch64CC::NE;
1343 case CmpInst::ICMP_EQ:
1344 return AArch64CC::EQ;
1345 case CmpInst::ICMP_SGT:
1346 return AArch64CC::GT;
1347 case CmpInst::ICMP_SGE:
1348 return AArch64CC::GE;
1349 case CmpInst::ICMP_SLT:
1350 return AArch64CC::LT;
1351 case CmpInst::ICMP_SLE:
1352 return AArch64CC::LE;
1353 case CmpInst::ICMP_UGT:
1354 return AArch64CC::HI;
1355 case CmpInst::ICMP_UGE:
1356 return AArch64CC::HS;
1357 case CmpInst::ICMP_ULT:
1358 return AArch64CC::LO;
1359 case CmpInst::ICMP_ULE:
1360 return AArch64CC::LS;
1361 }
1362}
1363
1364/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1366 AArch64CC::CondCode &CondCode,
1367 AArch64CC::CondCode &CondCode2) {
1368 CondCode2 = AArch64CC::AL;
1369 switch (CC) {
1370 default:
1371 llvm_unreachable("Unknown FP condition!");
1372 case CmpInst::FCMP_OEQ:
1373 CondCode = AArch64CC::EQ;
1374 break;
1375 case CmpInst::FCMP_OGT:
1376 CondCode = AArch64CC::GT;
1377 break;
1378 case CmpInst::FCMP_OGE:
1379 CondCode = AArch64CC::GE;
1380 break;
1381 case CmpInst::FCMP_OLT:
1382 CondCode = AArch64CC::MI;
1383 break;
1384 case CmpInst::FCMP_OLE:
1385 CondCode = AArch64CC::LS;
1386 break;
1387 case CmpInst::FCMP_ONE:
1388 CondCode = AArch64CC::MI;
1389 CondCode2 = AArch64CC::GT;
1390 break;
1391 case CmpInst::FCMP_ORD:
1392 CondCode = AArch64CC::VC;
1393 break;
1394 case CmpInst::FCMP_UNO:
1395 CondCode = AArch64CC::VS;
1396 break;
1397 case CmpInst::FCMP_UEQ:
1398 CondCode = AArch64CC::EQ;
1399 CondCode2 = AArch64CC::VS;
1400 break;
1401 case CmpInst::FCMP_UGT:
1402 CondCode = AArch64CC::HI;
1403 break;
1404 case CmpInst::FCMP_UGE:
1405 CondCode = AArch64CC::PL;
1406 break;
1407 case CmpInst::FCMP_ULT:
1408 CondCode = AArch64CC::LT;
1409 break;
1410 case CmpInst::FCMP_ULE:
1411 CondCode = AArch64CC::LE;
1412 break;
1413 case CmpInst::FCMP_UNE:
1414 CondCode = AArch64CC::NE;
1415 break;
1416 }
1417}
1418
1419/// Convert an IR fp condition code to an AArch64 CC.
1420/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1421/// should be AND'ed instead of OR'ed.
1423 AArch64CC::CondCode &CondCode,
1424 AArch64CC::CondCode &CondCode2) {
1425 CondCode2 = AArch64CC::AL;
1426 switch (CC) {
1427 default:
1428 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1429 assert(CondCode2 == AArch64CC::AL);
1430 break;
1431 case CmpInst::FCMP_ONE:
1432 // (a one b)
1433 // == ((a olt b) || (a ogt b))
1434 // == ((a ord b) && (a une b))
1435 CondCode = AArch64CC::VC;
1436 CondCode2 = AArch64CC::NE;
1437 break;
1438 case CmpInst::FCMP_UEQ:
1439 // (a ueq b)
1440 // == ((a uno b) || (a oeq b))
1441 // == ((a ule b) && (a uge b))
1442 CondCode = AArch64CC::PL;
1443 CondCode2 = AArch64CC::LE;
1444 break;
1445 }
1446}
1447
1448/// Return a register which can be used as a bit to test in a TB(N)Z.
1449static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1451 assert(Reg.isValid() && "Expected valid register!");
1452 bool HasZext = false;
1453 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1454 unsigned Opc = MI->getOpcode();
1455
1456 if (!MI->getOperand(0).isReg() ||
1457 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1458 break;
1459
1460 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1461 //
1462 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1463 // on the truncated x is the same as the bit number on x.
1464 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1465 Opc == TargetOpcode::G_TRUNC) {
1466 if (Opc == TargetOpcode::G_ZEXT)
1467 HasZext = true;
1468
1469 Register NextReg = MI->getOperand(1).getReg();
1470 // Did we find something worth folding?
1471 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1472 break;
1473
1474 // NextReg is worth folding. Keep looking.
1475 Reg = NextReg;
1476 continue;
1477 }
1478
1479 // Attempt to find a suitable operation with a constant on one side.
1480 std::optional<uint64_t> C;
1481 Register TestReg;
1482 switch (Opc) {
1483 default:
1484 break;
1485 case TargetOpcode::G_AND:
1486 case TargetOpcode::G_XOR: {
1487 TestReg = MI->getOperand(1).getReg();
1488 Register ConstantReg = MI->getOperand(2).getReg();
1489 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1490 if (!VRegAndVal) {
1491 // AND commutes, check the other side for a constant.
1492 // FIXME: Can we canonicalize the constant so that it's always on the
1493 // same side at some point earlier?
1494 std::swap(ConstantReg, TestReg);
1495 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1496 }
1497 if (VRegAndVal) {
1498 if (HasZext)
1499 C = VRegAndVal->Value.getZExtValue();
1500 else
1501 C = VRegAndVal->Value.getSExtValue();
1502 }
1503 break;
1504 }
1505 case TargetOpcode::G_ASHR:
1506 case TargetOpcode::G_LSHR:
1507 case TargetOpcode::G_SHL: {
1508 TestReg = MI->getOperand(1).getReg();
1509 auto VRegAndVal =
1510 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1511 if (VRegAndVal)
1512 C = VRegAndVal->Value.getSExtValue();
1513 break;
1514 }
1515 }
1516
1517 // Didn't find a constant or viable register. Bail out of the loop.
1518 if (!C || !TestReg.isValid())
1519 break;
1520
1521 // We found a suitable instruction with a constant. Check to see if we can
1522 // walk through the instruction.
1523 Register NextReg;
1524 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1525 switch (Opc) {
1526 default:
1527 break;
1528 case TargetOpcode::G_AND:
1529 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1530 if ((*C >> Bit) & 1)
1531 NextReg = TestReg;
1532 break;
1533 case TargetOpcode::G_SHL:
1534 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1535 // the type of the register.
1536 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1537 NextReg = TestReg;
1538 Bit = Bit - *C;
1539 }
1540 break;
1541 case TargetOpcode::G_ASHR:
1542 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1543 // in x
1544 NextReg = TestReg;
1545 Bit = Bit + *C;
1546 if (Bit >= TestRegSize)
1547 Bit = TestRegSize - 1;
1548 break;
1549 case TargetOpcode::G_LSHR:
1550 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1551 if ((Bit + *C) < TestRegSize) {
1552 NextReg = TestReg;
1553 Bit = Bit + *C;
1554 }
1555 break;
1556 case TargetOpcode::G_XOR:
1557 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1558 // appropriate.
1559 //
1560 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1561 //
1562 // tbz x', b -> tbnz x, b
1563 //
1564 // Because x' only has the b-th bit set if x does not.
1565 if ((*C >> Bit) & 1)
1566 Invert = !Invert;
1567 NextReg = TestReg;
1568 break;
1569 }
1570
1571 // Check if we found anything worth folding.
1572 if (!NextReg.isValid())
1573 return Reg;
1574 Reg = NextReg;
1575 }
1576
1577 return Reg;
1578}
1579
1580MachineInstr *AArch64InstructionSelector::emitTestBit(
1581 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1582 MachineIRBuilder &MIB) const {
1583 assert(TestReg.isValid());
1584 assert(ProduceNonFlagSettingCondBr &&
1585 "Cannot emit TB(N)Z with speculation tracking!");
1586 MachineRegisterInfo &MRI = *MIB.getMRI();
1587
1588 // Attempt to optimize the test bit by walking over instructions.
1589 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1590 LLT Ty = MRI.getType(TestReg);
1591 unsigned Size = Ty.getSizeInBits();
1592 assert(!Ty.isVector() && "Expected a scalar!");
1593 assert(Bit < 64 && "Bit is too large!");
1594
1595 // When the test register is a 64-bit register, we have to narrow to make
1596 // TBNZW work.
1597 bool UseWReg = Bit < 32;
1598 unsigned NecessarySize = UseWReg ? 32 : 64;
1599 if (Size != NecessarySize)
1600 TestReg = moveScalarRegClass(
1601 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1602 MIB);
1603
1604 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1605 {AArch64::TBZW, AArch64::TBNZW}};
1606 unsigned Opc = OpcTable[UseWReg][IsNegative];
1607 auto TestBitMI =
1608 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1609 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1610 return &*TestBitMI;
1611}
1612
1613bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1614 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1615 MachineIRBuilder &MIB) const {
1616 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1617 // Given something like this:
1618 //
1619 // %x = ...Something...
1620 // %one = G_CONSTANT i64 1
1621 // %zero = G_CONSTANT i64 0
1622 // %and = G_AND %x, %one
1623 // %cmp = G_ICMP intpred(ne), %and, %zero
1624 // %cmp_trunc = G_TRUNC %cmp
1625 // G_BRCOND %cmp_trunc, %bb.3
1626 //
1627 // We want to try and fold the AND into the G_BRCOND and produce either a
1628 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1629 //
1630 // In this case, we'd get
1631 //
1632 // TBNZ %x %bb.3
1633 //
1634
1635 // Check if the AND has a constant on its RHS which we can use as a mask.
1636 // If it's a power of 2, then it's the same as checking a specific bit.
1637 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1638 auto MaybeBit = getIConstantVRegValWithLookThrough(
1639 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1640 if (!MaybeBit)
1641 return false;
1642
1643 int32_t Bit = MaybeBit->Value.exactLogBase2();
1644 if (Bit < 0)
1645 return false;
1646
1647 Register TestReg = AndInst.getOperand(1).getReg();
1648
1649 // Emit a TB(N)Z.
1650 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1651 return true;
1652}
1653
1654MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1655 bool IsNegative,
1656 MachineBasicBlock *DestMBB,
1657 MachineIRBuilder &MIB) const {
1658 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1659 MachineRegisterInfo &MRI = *MIB.getMRI();
1660 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1661 AArch64::GPRRegBankID &&
1662 "Expected GPRs only?");
1663 auto Ty = MRI.getType(CompareReg);
1664 unsigned Width = Ty.getSizeInBits();
1665 assert(!Ty.isVector() && "Expected scalar only?");
1666 assert(Width <= 64 && "Expected width to be at most 64?");
1667 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1668 {AArch64::CBNZW, AArch64::CBNZX}};
1669 unsigned Opc = OpcTable[IsNegative][Width == 64];
1670 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1671 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1672 return &*BranchMI;
1673}
1674
1675bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1676 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1677 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1678 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1679 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1680 // totally clean. Some of them require two branches to implement.
1681 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1682 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1683 Pred);
1684 AArch64CC::CondCode CC1, CC2;
1685 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1686 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1687 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1688 if (CC2 != AArch64CC::AL)
1689 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1690 I.eraseFromParent();
1691 return true;
1692}
1693
1694bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1695 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1696 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1697 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1698 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1699 //
1700 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1701 // instructions will not be produced, as they are conditional branch
1702 // instructions that do not set flags.
1703 if (!ProduceNonFlagSettingCondBr)
1704 return false;
1705
1706 MachineRegisterInfo &MRI = *MIB.getMRI();
1707 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1708 auto Pred =
1709 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1710 Register LHS = ICmp.getOperand(2).getReg();
1711 Register RHS = ICmp.getOperand(3).getReg();
1712
1713 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1714 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1715 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1716
1717 // When we can emit a TB(N)Z, prefer that.
1718 //
1719 // Handle non-commutative condition codes first.
1720 // Note that we don't want to do this when we have a G_AND because it can
1721 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1722 if (VRegAndVal && !AndInst) {
1723 int64_t C = VRegAndVal->Value.getSExtValue();
1724
1725 // When we have a greater-than comparison, we can just test if the msb is
1726 // zero.
1727 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1728 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1729 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1730 I.eraseFromParent();
1731 return true;
1732 }
1733
1734 // When we have a less than comparison, we can just test if the msb is not
1735 // zero.
1736 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1737 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1738 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1739 I.eraseFromParent();
1740 return true;
1741 }
1742
1743 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1744 // we can test if the msb is zero.
1745 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1746 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1747 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1748 I.eraseFromParent();
1749 return true;
1750 }
1751 }
1752
1753 // Attempt to handle commutative condition codes. Right now, that's only
1754 // eq/ne.
1755 if (ICmpInst::isEquality(Pred)) {
1756 if (!VRegAndVal) {
1757 std::swap(RHS, LHS);
1758 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1759 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1760 }
1761
1762 if (VRegAndVal && VRegAndVal->Value == 0) {
1763 // If there's a G_AND feeding into this branch, try to fold it away by
1764 // emitting a TB(N)Z instead.
1765 //
1766 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1767 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1768 // would be redundant.
1769 if (AndInst &&
1770 tryOptAndIntoCompareBranch(
1771 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1772 I.eraseFromParent();
1773 return true;
1774 }
1775
1776 // Otherwise, try to emit a CB(N)Z instead.
1777 auto LHSTy = MRI.getType(LHS);
1778 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1779 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1780 I.eraseFromParent();
1781 return true;
1782 }
1783 }
1784 }
1785
1786 return false;
1787}
1788
1789bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1790 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1791 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1792 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1793 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1794 return true;
1795
1796 // Couldn't optimize. Emit a compare + a Bcc.
1797 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1798 auto PredOp = ICmp.getOperand(1);
1799 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1801 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1802 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1803 I.eraseFromParent();
1804 return true;
1805}
1806
1807bool AArch64InstructionSelector::selectCompareBranch(
1809 Register CondReg = I.getOperand(0).getReg();
1810 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1811 // Try to select the G_BRCOND using whatever is feeding the condition if
1812 // possible.
1813 unsigned CCMIOpc = CCMI->getOpcode();
1814 if (CCMIOpc == TargetOpcode::G_FCMP)
1815 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1816 if (CCMIOpc == TargetOpcode::G_ICMP)
1817 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1818
1819 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1820 // instructions will not be produced, as they are conditional branch
1821 // instructions that do not set flags.
1822 if (ProduceNonFlagSettingCondBr) {
1823 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1824 I.getOperand(1).getMBB(), MIB);
1825 I.eraseFromParent();
1826 return true;
1827 }
1828
1829 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1830 auto TstMI =
1831 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1833 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1835 .addMBB(I.getOperand(1).getMBB());
1836 I.eraseFromParent();
1837 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1838}
1839
1840/// Returns the element immediate value of a vector shift operand if found.
1841/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1842static std::optional<int64_t> getVectorShiftImm(Register Reg,
1844 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1845 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1846 return getAArch64VectorSplatScalar(*OpMI, MRI);
1847}
1848
1849/// Matches and returns the shift immediate value for a SHL instruction given
1850/// a shift operand.
1851static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1853 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1854 if (!ShiftImm)
1855 return std::nullopt;
1856 // Check the immediate is in range for a SHL.
1857 int64_t Imm = *ShiftImm;
1858 if (Imm < 0)
1859 return std::nullopt;
1860 switch (SrcTy.getElementType().getSizeInBits()) {
1861 default:
1862 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1863 return std::nullopt;
1864 case 8:
1865 if (Imm > 7)
1866 return std::nullopt;
1867 break;
1868 case 16:
1869 if (Imm > 15)
1870 return std::nullopt;
1871 break;
1872 case 32:
1873 if (Imm > 31)
1874 return std::nullopt;
1875 break;
1876 case 64:
1877 if (Imm > 63)
1878 return std::nullopt;
1879 break;
1880 }
1881 return Imm;
1882}
1883
1884bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1886 assert(I.getOpcode() == TargetOpcode::G_SHL);
1887 Register DstReg = I.getOperand(0).getReg();
1888 const LLT Ty = MRI.getType(DstReg);
1889 Register Src1Reg = I.getOperand(1).getReg();
1890 Register Src2Reg = I.getOperand(2).getReg();
1891
1892 if (!Ty.isVector())
1893 return false;
1894
1895 // Check if we have a vector of constants on RHS that we can select as the
1896 // immediate form.
1897 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1898
1899 unsigned Opc = 0;
1900 if (Ty == LLT::fixed_vector(2, 64)) {
1901 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1902 } else if (Ty == LLT::fixed_vector(4, 32)) {
1903 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1904 } else if (Ty == LLT::fixed_vector(2, 32)) {
1905 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1906 } else if (Ty == LLT::fixed_vector(4, 16)) {
1907 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1908 } else if (Ty == LLT::fixed_vector(8, 16)) {
1909 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1910 } else if (Ty == LLT::fixed_vector(16, 8)) {
1911 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1912 } else if (Ty == LLT::fixed_vector(8, 8)) {
1913 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1914 } else {
1915 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1916 return false;
1917 }
1918
1919 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1920 if (ImmVal)
1921 Shl.addImm(*ImmVal);
1922 else
1923 Shl.addUse(Src2Reg);
1925 I.eraseFromParent();
1926 return true;
1927}
1928
1929bool AArch64InstructionSelector::selectVectorAshrLshr(
1931 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1932 I.getOpcode() == TargetOpcode::G_LSHR);
1933 Register DstReg = I.getOperand(0).getReg();
1934 const LLT Ty = MRI.getType(DstReg);
1935 Register Src1Reg = I.getOperand(1).getReg();
1936 Register Src2Reg = I.getOperand(2).getReg();
1937
1938 if (!Ty.isVector())
1939 return false;
1940
1941 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1942
1943 // We expect the immediate case to be lowered in the PostLegalCombiner to
1944 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1945
1946 // There is not a shift right register instruction, but the shift left
1947 // register instruction takes a signed value, where negative numbers specify a
1948 // right shift.
1949
1950 unsigned Opc = 0;
1951 unsigned NegOpc = 0;
1952 const TargetRegisterClass *RC =
1953 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1954 if (Ty == LLT::fixed_vector(2, 64)) {
1955 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1956 NegOpc = AArch64::NEGv2i64;
1957 } else if (Ty == LLT::fixed_vector(4, 32)) {
1958 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1959 NegOpc = AArch64::NEGv4i32;
1960 } else if (Ty == LLT::fixed_vector(2, 32)) {
1961 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1962 NegOpc = AArch64::NEGv2i32;
1963 } else if (Ty == LLT::fixed_vector(4, 16)) {
1964 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1965 NegOpc = AArch64::NEGv4i16;
1966 } else if (Ty == LLT::fixed_vector(8, 16)) {
1967 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1968 NegOpc = AArch64::NEGv8i16;
1969 } else if (Ty == LLT::fixed_vector(16, 8)) {
1970 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1971 NegOpc = AArch64::NEGv16i8;
1972 } else if (Ty == LLT::fixed_vector(8, 8)) {
1973 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1974 NegOpc = AArch64::NEGv8i8;
1975 } else {
1976 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1977 return false;
1978 }
1979
1980 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1982 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1984 I.eraseFromParent();
1985 return true;
1986}
1987
1988bool AArch64InstructionSelector::selectVaStartAAPCS(
1990 return false;
1991}
1992
1993bool AArch64InstructionSelector::selectVaStartDarwin(
1996 Register ListReg = I.getOperand(0).getReg();
1997
1998 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1999
2000 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2002 MF.getFunction().getCallingConv())) {
2003 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2004 ? FuncInfo->getVarArgsGPRIndex()
2005 : FuncInfo->getVarArgsStackIndex();
2006 }
2007
2008 auto MIB =
2009 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2010 .addDef(ArgsAddrReg)
2011 .addFrameIndex(FrameIdx)
2012 .addImm(0)
2013 .addImm(0);
2014
2016
2017 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2018 .addUse(ArgsAddrReg)
2019 .addUse(ListReg)
2020 .addImm(0)
2021 .addMemOperand(*I.memoperands_begin());
2022
2024 I.eraseFromParent();
2025 return true;
2026}
2027
2028void AArch64InstructionSelector::materializeLargeCMVal(
2029 MachineInstr &I, const Value *V, unsigned OpFlags) {
2030 MachineBasicBlock &MBB = *I.getParent();
2031 MachineFunction &MF = *MBB.getParent();
2033
2034 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2035 MovZ->addOperand(MF, I.getOperand(1));
2036 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2038 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2040
2041 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2042 Register ForceDstReg) {
2043 Register DstReg = ForceDstReg
2044 ? ForceDstReg
2045 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2046 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2047 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2049 GV, MovZ->getOperand(1).getOffset(), Flags));
2050 } else {
2051 MovI->addOperand(
2052 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2053 MovZ->getOperand(1).getOffset(), Flags));
2054 }
2055 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2057 return DstReg;
2058 };
2059 Register DstReg = BuildMovK(MovZ.getReg(0),
2061 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2062 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2063}
2064
2065bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2066 MachineBasicBlock &MBB = *I.getParent();
2067 MachineFunction &MF = *MBB.getParent();
2069
2070 switch (I.getOpcode()) {
2071 case TargetOpcode::G_STORE: {
2072 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2073 MachineOperand &SrcOp = I.getOperand(0);
2074 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2075 // Allow matching with imported patterns for stores of pointers. Unlike
2076 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2077 // and constrain.
2078 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2079 Register NewSrc = Copy.getReg(0);
2080 SrcOp.setReg(NewSrc);
2081 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2082 Changed = true;
2083 }
2084 return Changed;
2085 }
2086 case TargetOpcode::G_PTR_ADD:
2087 return convertPtrAddToAdd(I, MRI);
2088 case TargetOpcode::G_LOAD: {
2089 // For scalar loads of pointers, we try to convert the dest type from p0
2090 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2091 // conversion, this should be ok because all users should have been
2092 // selected already, so the type doesn't matter for them.
2093 Register DstReg = I.getOperand(0).getReg();
2094 const LLT DstTy = MRI.getType(DstReg);
2095 if (!DstTy.isPointer())
2096 return false;
2097 MRI.setType(DstReg, LLT::scalar(64));
2098 return true;
2099 }
2100 case AArch64::G_DUP: {
2101 // Convert the type from p0 to s64 to help selection.
2102 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2103 if (!DstTy.isPointerVector())
2104 return false;
2105 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2106 MRI.setType(I.getOperand(0).getReg(),
2107 DstTy.changeElementType(LLT::scalar(64)));
2108 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2109 I.getOperand(1).setReg(NewSrc.getReg(0));
2110 return true;
2111 }
2112 case TargetOpcode::G_UITOFP:
2113 case TargetOpcode::G_SITOFP: {
2114 // If both source and destination regbanks are FPR, then convert the opcode
2115 // to G_SITOF so that the importer can select it to an fpr variant.
2116 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2117 // copy.
2118 Register SrcReg = I.getOperand(1).getReg();
2119 LLT SrcTy = MRI.getType(SrcReg);
2120 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2121 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2122 return false;
2123
2124 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2125 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2126 I.setDesc(TII.get(AArch64::G_SITOF));
2127 else
2128 I.setDesc(TII.get(AArch64::G_UITOF));
2129 return true;
2130 }
2131 return false;
2132 }
2133 default:
2134 return false;
2135 }
2136}
2137
2138/// This lowering tries to look for G_PTR_ADD instructions and then converts
2139/// them to a standard G_ADD with a COPY on the source.
2140///
2141/// The motivation behind this is to expose the add semantics to the imported
2142/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2143/// because the selector works bottom up, uses before defs. By the time we
2144/// end up trying to select a G_PTR_ADD, we should have already attempted to
2145/// fold this into addressing modes and were therefore unsuccessful.
2146bool AArch64InstructionSelector::convertPtrAddToAdd(
2148 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2149 Register DstReg = I.getOperand(0).getReg();
2150 Register AddOp1Reg = I.getOperand(1).getReg();
2151 const LLT PtrTy = MRI.getType(DstReg);
2152 if (PtrTy.getAddressSpace() != 0)
2153 return false;
2154
2155 const LLT CastPtrTy =
2156 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2157 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2158 // Set regbanks on the registers.
2159 if (PtrTy.isVector())
2160 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2161 else
2162 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2163
2164 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2165 // %dst(intty) = G_ADD %intbase, off
2166 I.setDesc(TII.get(TargetOpcode::G_ADD));
2167 MRI.setType(DstReg, CastPtrTy);
2168 I.getOperand(1).setReg(PtrToInt.getReg(0));
2169 if (!select(*PtrToInt)) {
2170 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2171 return false;
2172 }
2173
2174 // Also take the opportunity here to try to do some optimization.
2175 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2176 Register NegatedReg;
2177 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2178 return true;
2179 I.getOperand(2).setReg(NegatedReg);
2180 I.setDesc(TII.get(TargetOpcode::G_SUB));
2181 return true;
2182}
2183
2184bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2186 // We try to match the immediate variant of LSL, which is actually an alias
2187 // for a special case of UBFM. Otherwise, we fall back to the imported
2188 // selector which will match the register variant.
2189 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2190 const auto &MO = I.getOperand(2);
2191 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2192 if (!VRegAndVal)
2193 return false;
2194
2195 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2196 if (DstTy.isVector())
2197 return false;
2198 bool Is64Bit = DstTy.getSizeInBits() == 64;
2199 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2200 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2201
2202 if (!Imm1Fn || !Imm2Fn)
2203 return false;
2204
2205 auto NewI =
2206 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2207 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2208
2209 for (auto &RenderFn : *Imm1Fn)
2210 RenderFn(NewI);
2211 for (auto &RenderFn : *Imm2Fn)
2212 RenderFn(NewI);
2213
2214 I.eraseFromParent();
2215 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2216}
2217
2218bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2220 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2221 // If we're storing a scalar, it doesn't matter what register bank that
2222 // scalar is on. All that matters is the size.
2223 //
2224 // So, if we see something like this (with a 32-bit scalar as an example):
2225 //
2226 // %x:gpr(s32) = ... something ...
2227 // %y:fpr(s32) = COPY %x:gpr(s32)
2228 // G_STORE %y:fpr(s32)
2229 //
2230 // We can fix this up into something like this:
2231 //
2232 // G_STORE %x:gpr(s32)
2233 //
2234 // And then continue the selection process normally.
2235 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2236 if (!DefDstReg.isValid())
2237 return false;
2238 LLT DefDstTy = MRI.getType(DefDstReg);
2239 Register StoreSrcReg = I.getOperand(0).getReg();
2240 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2241
2242 // If we get something strange like a physical register, then we shouldn't
2243 // go any further.
2244 if (!DefDstTy.isValid())
2245 return false;
2246
2247 // Are the source and dst types the same size?
2248 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2249 return false;
2250
2251 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2252 RBI.getRegBank(DefDstReg, MRI, TRI))
2253 return false;
2254
2255 // We have a cross-bank copy, which is entering a store. Let's fold it.
2256 I.getOperand(0).setReg(DefDstReg);
2257 return true;
2258}
2259
2260bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2261 assert(I.getParent() && "Instruction should be in a basic block!");
2262 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2263
2264 MachineBasicBlock &MBB = *I.getParent();
2265 MachineFunction &MF = *MBB.getParent();
2267
2268 switch (I.getOpcode()) {
2269 case AArch64::G_DUP: {
2270 // Before selecting a DUP instruction, check if it is better selected as a
2271 // MOV or load from a constant pool.
2272 Register Src = I.getOperand(1).getReg();
2273 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2274 if (!ValAndVReg)
2275 return false;
2276 LLVMContext &Ctx = MF.getFunction().getContext();
2277 Register Dst = I.getOperand(0).getReg();
2279 MRI.getType(Dst).getNumElements(),
2280 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2281 ValAndVReg->Value));
2282 if (!emitConstantVector(Dst, CV, MIB, MRI))
2283 return false;
2284 I.eraseFromParent();
2285 return true;
2286 }
2287 case TargetOpcode::G_SEXT:
2288 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2289 // over a normal extend.
2290 if (selectUSMovFromExtend(I, MRI))
2291 return true;
2292 return false;
2293 case TargetOpcode::G_BR:
2294 return false;
2295 case TargetOpcode::G_SHL:
2296 return earlySelectSHL(I, MRI);
2297 case TargetOpcode::G_CONSTANT: {
2298 bool IsZero = false;
2299 if (I.getOperand(1).isCImm())
2300 IsZero = I.getOperand(1).getCImm()->isZero();
2301 else if (I.getOperand(1).isImm())
2302 IsZero = I.getOperand(1).getImm() == 0;
2303
2304 if (!IsZero)
2305 return false;
2306
2307 Register DefReg = I.getOperand(0).getReg();
2308 LLT Ty = MRI.getType(DefReg);
2309 if (Ty.getSizeInBits() == 64) {
2310 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2311 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2312 } else if (Ty.getSizeInBits() == 32) {
2313 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2314 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2315 } else
2316 return false;
2317
2318 I.setDesc(TII.get(TargetOpcode::COPY));
2319 return true;
2320 }
2321
2322 case TargetOpcode::G_ADD: {
2323 // Check if this is being fed by a G_ICMP on either side.
2324 //
2325 // (cmp pred, x, y) + z
2326 //
2327 // In the above case, when the cmp is true, we increment z by 1. So, we can
2328 // fold the add into the cset for the cmp by using cinc.
2329 //
2330 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2331 Register AddDst = I.getOperand(0).getReg();
2332 Register AddLHS = I.getOperand(1).getReg();
2333 Register AddRHS = I.getOperand(2).getReg();
2334 // Only handle scalars.
2335 LLT Ty = MRI.getType(AddLHS);
2336 if (Ty.isVector())
2337 return false;
2338 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2339 // bits.
2340 unsigned Size = Ty.getSizeInBits();
2341 if (Size != 32 && Size != 64)
2342 return false;
2343 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2344 if (!MRI.hasOneNonDBGUse(Reg))
2345 return nullptr;
2346 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2347 // compare.
2348 if (Size == 32)
2349 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2350 // We model scalar compares using 32-bit destinations right now.
2351 // If it's a 64-bit compare, it'll have 64-bit sources.
2352 Register ZExt;
2353 if (!mi_match(Reg, MRI,
2355 return nullptr;
2356 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2357 if (!Cmp ||
2358 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2359 return nullptr;
2360 return Cmp;
2361 };
2362 // Try to match
2363 // z + (cmp pred, x, y)
2364 MachineInstr *Cmp = MatchCmp(AddRHS);
2365 if (!Cmp) {
2366 // (cmp pred, x, y) + z
2367 std::swap(AddLHS, AddRHS);
2368 Cmp = MatchCmp(AddRHS);
2369 if (!Cmp)
2370 return false;
2371 }
2372 auto &PredOp = Cmp->getOperand(1);
2373 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2374 const AArch64CC::CondCode InvCC =
2377 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2378 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2379 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2380 I.eraseFromParent();
2381 return true;
2382 }
2383 case TargetOpcode::G_OR: {
2384 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2385 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2386 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2387 Register Dst = I.getOperand(0).getReg();
2388 LLT Ty = MRI.getType(Dst);
2389
2390 if (!Ty.isScalar())
2391 return false;
2392
2393 unsigned Size = Ty.getSizeInBits();
2394 if (Size != 32 && Size != 64)
2395 return false;
2396
2397 Register ShiftSrc;
2398 int64_t ShiftImm;
2399 Register MaskSrc;
2400 int64_t MaskImm;
2401 if (!mi_match(
2402 Dst, MRI,
2403 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2404 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2405 return false;
2406
2407 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2408 return false;
2409
2410 int64_t Immr = Size - ShiftImm;
2411 int64_t Imms = Size - ShiftImm - 1;
2412 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2413 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2414 I.eraseFromParent();
2415 return true;
2416 }
2417 case TargetOpcode::G_FENCE: {
2418 if (I.getOperand(1).getImm() == 0)
2419 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2420 else
2421 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2422 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2423 I.eraseFromParent();
2424 return true;
2425 }
2426 default:
2427 return false;
2428 }
2429}
2430
2431bool AArch64InstructionSelector::select(MachineInstr &I) {
2432 assert(I.getParent() && "Instruction should be in a basic block!");
2433 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2434
2435 MachineBasicBlock &MBB = *I.getParent();
2436 MachineFunction &MF = *MBB.getParent();
2438
2439 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2440 if (Subtarget->requiresStrictAlign()) {
2441 // We don't support this feature yet.
2442 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2443 return false;
2444 }
2445
2447
2448 unsigned Opcode = I.getOpcode();
2449 // G_PHI requires same handling as PHI
2450 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2451 // Certain non-generic instructions also need some special handling.
2452
2453 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2455
2456 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2457 const Register DefReg = I.getOperand(0).getReg();
2458 const LLT DefTy = MRI.getType(DefReg);
2459
2460 const RegClassOrRegBank &RegClassOrBank =
2461 MRI.getRegClassOrRegBank(DefReg);
2462
2463 const TargetRegisterClass *DefRC
2464 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2465 if (!DefRC) {
2466 if (!DefTy.isValid()) {
2467 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2468 return false;
2469 }
2470 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2471 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2472 if (!DefRC) {
2473 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2474 return false;
2475 }
2476 }
2477
2478 I.setDesc(TII.get(TargetOpcode::PHI));
2479
2480 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2481 }
2482
2483 if (I.isCopy())
2484 return selectCopy(I, TII, MRI, TRI, RBI);
2485
2486 if (I.isDebugInstr())
2487 return selectDebugInstr(I, MRI, RBI);
2488
2489 return true;
2490 }
2491
2492
2493 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2494 LLVM_DEBUG(
2495 dbgs() << "Generic instruction has unexpected implicit operands\n");
2496 return false;
2497 }
2498
2499 // Try to do some lowering before we start instruction selecting. These
2500 // lowerings are purely transformations on the input G_MIR and so selection
2501 // must continue after any modification of the instruction.
2502 if (preISelLower(I)) {
2503 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2504 }
2505
2506 // There may be patterns where the importer can't deal with them optimally,
2507 // but does select it to a suboptimal sequence so our custom C++ selection
2508 // code later never has a chance to work on it. Therefore, we have an early
2509 // selection attempt here to give priority to certain selection routines
2510 // over the imported ones.
2511 if (earlySelect(I))
2512 return true;
2513
2514 if (selectImpl(I, *CoverageInfo))
2515 return true;
2516
2517 LLT Ty =
2518 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2519
2520 switch (Opcode) {
2521 case TargetOpcode::G_SBFX:
2522 case TargetOpcode::G_UBFX: {
2523 static const unsigned OpcTable[2][2] = {
2524 {AArch64::UBFMWri, AArch64::UBFMXri},
2525 {AArch64::SBFMWri, AArch64::SBFMXri}};
2526 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2527 unsigned Size = Ty.getSizeInBits();
2528 unsigned Opc = OpcTable[IsSigned][Size == 64];
2529 auto Cst1 =
2530 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2531 assert(Cst1 && "Should have gotten a constant for src 1?");
2532 auto Cst2 =
2533 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2534 assert(Cst2 && "Should have gotten a constant for src 2?");
2535 auto LSB = Cst1->Value.getZExtValue();
2536 auto Width = Cst2->Value.getZExtValue();
2537 auto BitfieldInst =
2538 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2539 .addImm(LSB)
2540 .addImm(LSB + Width - 1);
2541 I.eraseFromParent();
2542 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2543 }
2544 case TargetOpcode::G_BRCOND:
2545 return selectCompareBranch(I, MF, MRI);
2546
2547 case TargetOpcode::G_BRINDIRECT: {
2548 I.setDesc(TII.get(AArch64::BR));
2550 }
2551
2552 case TargetOpcode::G_BRJT:
2553 return selectBrJT(I, MRI);
2554
2555 case AArch64::G_ADD_LOW: {
2556 // This op may have been separated from it's ADRP companion by the localizer
2557 // or some other code motion pass. Given that many CPUs will try to
2558 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2559 // which will later be expanded into an ADRP+ADD pair after scheduling.
2560 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2561 if (BaseMI->getOpcode() != AArch64::ADRP) {
2562 I.setDesc(TII.get(AArch64::ADDXri));
2563 I.addOperand(MachineOperand::CreateImm(0));
2565 }
2566 assert(TM.getCodeModel() == CodeModel::Small &&
2567 "Expected small code model");
2568 auto Op1 = BaseMI->getOperand(1);
2569 auto Op2 = I.getOperand(2);
2570 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2571 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2572 Op1.getTargetFlags())
2573 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2574 Op2.getTargetFlags());
2575 I.eraseFromParent();
2576 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2577 }
2578
2579 case TargetOpcode::G_FCONSTANT:
2580 case TargetOpcode::G_CONSTANT: {
2581 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2582
2583 const LLT s8 = LLT::scalar(8);
2584 const LLT s16 = LLT::scalar(16);
2585 const LLT s32 = LLT::scalar(32);
2586 const LLT s64 = LLT::scalar(64);
2587 const LLT s128 = LLT::scalar(128);
2588 const LLT p0 = LLT::pointer(0, 64);
2589
2590 const Register DefReg = I.getOperand(0).getReg();
2591 const LLT DefTy = MRI.getType(DefReg);
2592 const unsigned DefSize = DefTy.getSizeInBits();
2593 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2594
2595 // FIXME: Redundant check, but even less readable when factored out.
2596 if (isFP) {
2597 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2598 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2599 << " constant, expected: " << s16 << " or " << s32
2600 << " or " << s64 << " or " << s128 << '\n');
2601 return false;
2602 }
2603
2604 if (RB.getID() != AArch64::FPRRegBankID) {
2605 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2606 << " constant on bank: " << RB
2607 << ", expected: FPR\n");
2608 return false;
2609 }
2610
2611 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2612 // can be sure tablegen works correctly and isn't rescued by this code.
2613 // 0.0 is not covered by tablegen for FP128. So we will handle this
2614 // scenario in the code here.
2615 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2616 return false;
2617 } else {
2618 // s32 and s64 are covered by tablegen.
2619 if (Ty != p0 && Ty != s8 && Ty != s16) {
2620 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2621 << " constant, expected: " << s32 << ", " << s64
2622 << ", or " << p0 << '\n');
2623 return false;
2624 }
2625
2626 if (RB.getID() != AArch64::GPRRegBankID) {
2627 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2628 << " constant on bank: " << RB
2629 << ", expected: GPR\n");
2630 return false;
2631 }
2632 }
2633
2634 if (isFP) {
2635 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2636 // For 16, 64, and 128b values, emit a constant pool load.
2637 switch (DefSize) {
2638 default:
2639 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2640 case 32:
2641 case 64: {
2642 bool OptForSize = shouldOptForSize(&MF);
2643 const auto &TLI = MF.getSubtarget().getTargetLowering();
2644 // If TLI says that this fpimm is illegal, then we'll expand to a
2645 // constant pool load.
2646 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2647 EVT::getFloatingPointVT(DefSize), OptForSize))
2648 break;
2649 [[fallthrough]];
2650 }
2651 case 16:
2652 case 128: {
2653 auto *FPImm = I.getOperand(1).getFPImm();
2654 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2655 if (!LoadMI) {
2656 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2657 return false;
2658 }
2659 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2660 I.eraseFromParent();
2661 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2662 }
2663 }
2664
2665 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2666 // Either emit a FMOV, or emit a copy to emit a normal mov.
2667 const Register DefGPRReg = MRI.createVirtualRegister(
2668 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2669 MachineOperand &RegOp = I.getOperand(0);
2670 RegOp.setReg(DefGPRReg);
2671 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2672 MIB.buildCopy({DefReg}, {DefGPRReg});
2673
2674 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2675 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2676 return false;
2677 }
2678
2679 MachineOperand &ImmOp = I.getOperand(1);
2680 // FIXME: Is going through int64_t always correct?
2681 ImmOp.ChangeToImmediate(
2683 } else if (I.getOperand(1).isCImm()) {
2684 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2685 I.getOperand(1).ChangeToImmediate(Val);
2686 } else if (I.getOperand(1).isImm()) {
2687 uint64_t Val = I.getOperand(1).getImm();
2688 I.getOperand(1).ChangeToImmediate(Val);
2689 }
2690
2691 const unsigned MovOpc =
2692 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2693 I.setDesc(TII.get(MovOpc));
2695 return true;
2696 }
2697 case TargetOpcode::G_EXTRACT: {
2698 Register DstReg = I.getOperand(0).getReg();
2699 Register SrcReg = I.getOperand(1).getReg();
2700 LLT SrcTy = MRI.getType(SrcReg);
2701 LLT DstTy = MRI.getType(DstReg);
2702 (void)DstTy;
2703 unsigned SrcSize = SrcTy.getSizeInBits();
2704
2705 if (SrcTy.getSizeInBits() > 64) {
2706 // This should be an extract of an s128, which is like a vector extract.
2707 if (SrcTy.getSizeInBits() != 128)
2708 return false;
2709 // Only support extracting 64 bits from an s128 at the moment.
2710 if (DstTy.getSizeInBits() != 64)
2711 return false;
2712
2713 unsigned Offset = I.getOperand(2).getImm();
2714 if (Offset % 64 != 0)
2715 return false;
2716
2717 // Check we have the right regbank always.
2718 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2719 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2720 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2721
2722 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2723 auto NewI =
2724 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2725 .addUse(SrcReg, 0,
2726 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2727 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2728 AArch64::GPR64RegClass, NewI->getOperand(0));
2729 I.eraseFromParent();
2730 return true;
2731 }
2732
2733 // Emit the same code as a vector extract.
2734 // Offset must be a multiple of 64.
2735 unsigned LaneIdx = Offset / 64;
2736 MachineInstr *Extract = emitExtractVectorElt(
2737 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2738 if (!Extract)
2739 return false;
2740 I.eraseFromParent();
2741 return true;
2742 }
2743
2744 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2745 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2746 Ty.getSizeInBits() - 1);
2747
2748 if (SrcSize < 64) {
2749 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2750 "unexpected G_EXTRACT types");
2752 }
2753
2754 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2755 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2756 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2757 .addReg(DstReg, 0, AArch64::sub_32);
2758 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2759 AArch64::GPR32RegClass, MRI);
2760 I.getOperand(0).setReg(DstReg);
2761
2763 }
2764
2765 case TargetOpcode::G_INSERT: {
2766 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2767 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2768 unsigned DstSize = DstTy.getSizeInBits();
2769 // Larger inserts are vectors, same-size ones should be something else by
2770 // now (split up or turned into COPYs).
2771 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2772 return false;
2773
2774 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2775 unsigned LSB = I.getOperand(3).getImm();
2776 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2777 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2778 MachineInstrBuilder(MF, I).addImm(Width - 1);
2779
2780 if (DstSize < 64) {
2781 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2782 "unexpected G_INSERT types");
2784 }
2785
2786 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2787 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2788 TII.get(AArch64::SUBREG_TO_REG))
2789 .addDef(SrcReg)
2790 .addImm(0)
2791 .addUse(I.getOperand(2).getReg())
2792 .addImm(AArch64::sub_32);
2793 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2794 AArch64::GPR32RegClass, MRI);
2795 I.getOperand(2).setReg(SrcReg);
2796
2798 }
2799 case TargetOpcode::G_FRAME_INDEX: {
2800 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2801 if (Ty != LLT::pointer(0, 64)) {
2802 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2803 << ", expected: " << LLT::pointer(0, 64) << '\n');
2804 return false;
2805 }
2806 I.setDesc(TII.get(AArch64::ADDXri));
2807
2808 // MOs for a #0 shifted immediate.
2809 I.addOperand(MachineOperand::CreateImm(0));
2810 I.addOperand(MachineOperand::CreateImm(0));
2811
2813 }
2814
2815 case TargetOpcode::G_GLOBAL_VALUE: {
2816 const GlobalValue *GV = nullptr;
2817 unsigned OpFlags;
2818 if (I.getOperand(1).isSymbol()) {
2819 OpFlags = I.getOperand(1).getTargetFlags();
2820 // Currently only used by "RtLibUseGOT".
2821 assert(OpFlags == AArch64II::MO_GOT);
2822 } else {
2823 GV = I.getOperand(1).getGlobal();
2824 if (GV->isThreadLocal())
2825 return selectTLSGlobalValue(I, MRI);
2826 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2827 }
2828
2829 if (OpFlags & AArch64II::MO_GOT) {
2830 I.setDesc(TII.get(AArch64::LOADgot));
2831 I.getOperand(1).setTargetFlags(OpFlags);
2832 } else if (TM.getCodeModel() == CodeModel::Large &&
2833 !TM.isPositionIndependent()) {
2834 // Materialize the global using movz/movk instructions.
2835 materializeLargeCMVal(I, GV, OpFlags);
2836 I.eraseFromParent();
2837 return true;
2838 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2839 I.setDesc(TII.get(AArch64::ADR));
2840 I.getOperand(1).setTargetFlags(OpFlags);
2841 } else {
2842 I.setDesc(TII.get(AArch64::MOVaddr));
2843 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2844 MachineInstrBuilder MIB(MF, I);
2845 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2847 }
2849 }
2850
2851 case TargetOpcode::G_ZEXTLOAD:
2852 case TargetOpcode::G_LOAD:
2853 case TargetOpcode::G_STORE: {
2854 GLoadStore &LdSt = cast<GLoadStore>(I);
2855 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2856 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2857
2858 if (PtrTy != LLT::pointer(0, 64)) {
2859 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2860 << ", expected: " << LLT::pointer(0, 64) << '\n');
2861 return false;
2862 }
2863
2864 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2865 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2866 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2867
2868 // Need special instructions for atomics that affect ordering.
2869 if (Order != AtomicOrdering::NotAtomic &&
2870 Order != AtomicOrdering::Unordered &&
2871 Order != AtomicOrdering::Monotonic) {
2872 assert(!isa<GZExtLoad>(LdSt));
2873 assert(MemSizeInBytes <= 8 &&
2874 "128-bit atomics should already be custom-legalized");
2875
2876 if (isa<GLoad>(LdSt)) {
2877 static constexpr unsigned LDAPROpcodes[] = {
2878 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2879 static constexpr unsigned LDAROpcodes[] = {
2880 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2881 ArrayRef<unsigned> Opcodes =
2882 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2883 ? LDAPROpcodes
2884 : LDAROpcodes;
2885 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2886 } else {
2887 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2888 AArch64::STLRW, AArch64::STLRX};
2889 Register ValReg = LdSt.getReg(0);
2890 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2891 // Emit a subreg copy of 32 bits.
2892 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2893 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2894 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2895 I.getOperand(0).setReg(NewVal);
2896 }
2897 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2898 }
2900 return true;
2901 }
2902
2903#ifndef NDEBUG
2904 const Register PtrReg = LdSt.getPointerReg();
2905 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2906 // Check that the pointer register is valid.
2907 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2908 "Load/Store pointer operand isn't a GPR");
2909 assert(MRI.getType(PtrReg).isPointer() &&
2910 "Load/Store pointer operand isn't a pointer");
2911#endif
2912
2913 const Register ValReg = LdSt.getReg(0);
2914 const LLT ValTy = MRI.getType(ValReg);
2915 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2916
2917 // The code below doesn't support truncating stores, so we need to split it
2918 // again.
2919 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2920 unsigned SubReg;
2921 LLT MemTy = LdSt.getMMO().getMemoryType();
2922 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2923 if (!getSubRegForClass(RC, TRI, SubReg))
2924 return false;
2925
2926 // Generate a subreg copy.
2927 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2928 .addReg(ValReg, 0, SubReg)
2929 .getReg(0);
2930 RBI.constrainGenericRegister(Copy, *RC, MRI);
2931 LdSt.getOperand(0).setReg(Copy);
2932 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2933 // If this is an any-extending load from the FPR bank, split it into a regular
2934 // load + extend.
2935 if (RB.getID() == AArch64::FPRRegBankID) {
2936 unsigned SubReg;
2937 LLT MemTy = LdSt.getMMO().getMemoryType();
2938 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2939 if (!getSubRegForClass(RC, TRI, SubReg))
2940 return false;
2941 Register OldDst = LdSt.getReg(0);
2942 Register NewDst =
2943 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2944 LdSt.getOperand(0).setReg(NewDst);
2945 MRI.setRegBank(NewDst, RB);
2946 // Generate a SUBREG_TO_REG to extend it.
2947 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2948 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2949 .addImm(0)
2950 .addUse(NewDst)
2951 .addImm(SubReg);
2952 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2953 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2954 MIB.setInstr(LdSt);
2955 }
2956 }
2957
2958 // Helper lambda for partially selecting I. Either returns the original
2959 // instruction with an updated opcode, or a new instruction.
2960 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2961 bool IsStore = isa<GStore>(I);
2962 const unsigned NewOpc =
2963 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2964 if (NewOpc == I.getOpcode())
2965 return nullptr;
2966 // Check if we can fold anything into the addressing mode.
2967 auto AddrModeFns =
2968 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2969 if (!AddrModeFns) {
2970 // Can't fold anything. Use the original instruction.
2971 I.setDesc(TII.get(NewOpc));
2972 I.addOperand(MachineOperand::CreateImm(0));
2973 return &I;
2974 }
2975
2976 // Folded something. Create a new instruction and return it.
2977 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2978 Register CurValReg = I.getOperand(0).getReg();
2979 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2980 NewInst.cloneMemRefs(I);
2981 for (auto &Fn : *AddrModeFns)
2982 Fn(NewInst);
2983 I.eraseFromParent();
2984 return &*NewInst;
2985 };
2986
2987 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2988 if (!LoadStore)
2989 return false;
2990
2991 // If we're storing a 0, use WZR/XZR.
2992 if (Opcode == TargetOpcode::G_STORE) {
2994 LoadStore->getOperand(0).getReg(), MRI);
2995 if (CVal && CVal->Value == 0) {
2996 switch (LoadStore->getOpcode()) {
2997 case AArch64::STRWui:
2998 case AArch64::STRHHui:
2999 case AArch64::STRBBui:
3000 LoadStore->getOperand(0).setReg(AArch64::WZR);
3001 break;
3002 case AArch64::STRXui:
3003 LoadStore->getOperand(0).setReg(AArch64::XZR);
3004 break;
3005 }
3006 }
3007 }
3008
3009 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3010 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3011 // The any/zextload from a smaller type to i32 should be handled by the
3012 // importer.
3013 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3014 return false;
3015 // If we have an extending load then change the load's type to be a
3016 // narrower reg and zero_extend with SUBREG_TO_REG.
3017 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3018 Register DstReg = LoadStore->getOperand(0).getReg();
3019 LoadStore->getOperand(0).setReg(LdReg);
3020
3021 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3022 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3023 .addImm(0)
3024 .addUse(LdReg)
3025 .addImm(AArch64::sub_32);
3026 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3027 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3028 MRI);
3029 }
3030 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3031 }
3032
3033 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3034 case TargetOpcode::G_INDEXED_SEXTLOAD:
3035 return selectIndexedExtLoad(I, MRI);
3036 case TargetOpcode::G_INDEXED_LOAD:
3037 return selectIndexedLoad(I, MRI);
3038 case TargetOpcode::G_INDEXED_STORE:
3039 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3040
3041 case TargetOpcode::G_LSHR:
3042 case TargetOpcode::G_ASHR:
3043 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3044 return selectVectorAshrLshr(I, MRI);
3045 [[fallthrough]];
3046 case TargetOpcode::G_SHL:
3047 if (Opcode == TargetOpcode::G_SHL &&
3048 MRI.getType(I.getOperand(0).getReg()).isVector())
3049 return selectVectorSHL(I, MRI);
3050
3051 // These shifts were legalized to have 64 bit shift amounts because we
3052 // want to take advantage of the selection patterns that assume the
3053 // immediates are s64s, however, selectBinaryOp will assume both operands
3054 // will have the same bit size.
3055 {
3056 Register SrcReg = I.getOperand(1).getReg();
3057 Register ShiftReg = I.getOperand(2).getReg();
3058 const LLT ShiftTy = MRI.getType(ShiftReg);
3059 const LLT SrcTy = MRI.getType(SrcReg);
3060 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3061 ShiftTy.getSizeInBits() == 64) {
3062 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3063 // Insert a subregister copy to implement a 64->32 trunc
3064 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3065 .addReg(ShiftReg, 0, AArch64::sub_32);
3066 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3067 I.getOperand(2).setReg(Trunc.getReg(0));
3068 }
3069 }
3070 [[fallthrough]];
3071 case TargetOpcode::G_OR: {
3072 // Reject the various things we don't support yet.
3073 if (unsupportedBinOp(I, RBI, MRI, TRI))
3074 return false;
3075
3076 const unsigned OpSize = Ty.getSizeInBits();
3077
3078 const Register DefReg = I.getOperand(0).getReg();
3079 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3080
3081 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3082 if (NewOpc == I.getOpcode())
3083 return false;
3084
3085 I.setDesc(TII.get(NewOpc));
3086 // FIXME: Should the type be always reset in setDesc?
3087
3088 // Now that we selected an opcode, we need to constrain the register
3089 // operands to use appropriate classes.
3091 }
3092
3093 case TargetOpcode::G_PTR_ADD: {
3094 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3095 I.eraseFromParent();
3096 return true;
3097 }
3098
3099 case TargetOpcode::G_SADDE:
3100 case TargetOpcode::G_UADDE:
3101 case TargetOpcode::G_SSUBE:
3102 case TargetOpcode::G_USUBE:
3103 case TargetOpcode::G_SADDO:
3104 case TargetOpcode::G_UADDO:
3105 case TargetOpcode::G_SSUBO:
3106 case TargetOpcode::G_USUBO:
3107 return selectOverflowOp(I, MRI);
3108
3109 case TargetOpcode::G_PTRMASK: {
3110 Register MaskReg = I.getOperand(2).getReg();
3111 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3112 // TODO: Implement arbitrary cases
3113 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3114 return false;
3115
3116 uint64_t Mask = *MaskVal;
3117 I.setDesc(TII.get(AArch64::ANDXri));
3118 I.getOperand(2).ChangeToImmediate(
3120
3122 }
3123 case TargetOpcode::G_PTRTOINT:
3124 case TargetOpcode::G_TRUNC: {
3125 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3126 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3127
3128 const Register DstReg = I.getOperand(0).getReg();
3129 const Register SrcReg = I.getOperand(1).getReg();
3130
3131 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3132 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3133
3134 if (DstRB.getID() != SrcRB.getID()) {
3135 LLVM_DEBUG(
3136 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3137 return false;
3138 }
3139
3140 if (DstRB.getID() == AArch64::GPRRegBankID) {
3141 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3142 if (!DstRC)
3143 return false;
3144
3145 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3146 if (!SrcRC)
3147 return false;
3148
3149 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3150 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3151 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3152 return false;
3153 }
3154
3155 if (DstRC == SrcRC) {
3156 // Nothing to be done
3157 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3158 SrcTy == LLT::scalar(64)) {
3159 llvm_unreachable("TableGen can import this case");
3160 return false;
3161 } else if (DstRC == &AArch64::GPR32RegClass &&
3162 SrcRC == &AArch64::GPR64RegClass) {
3163 I.getOperand(1).setSubReg(AArch64::sub_32);
3164 } else {
3165 LLVM_DEBUG(
3166 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3167 return false;
3168 }
3169
3170 I.setDesc(TII.get(TargetOpcode::COPY));
3171 return true;
3172 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3173 if (DstTy == LLT::fixed_vector(4, 16) &&
3174 SrcTy == LLT::fixed_vector(4, 32)) {
3175 I.setDesc(TII.get(AArch64::XTNv4i16));
3177 return true;
3178 }
3179
3180 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3181 MachineInstr *Extract = emitExtractVectorElt(
3182 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3183 if (!Extract)
3184 return false;
3185 I.eraseFromParent();
3186 return true;
3187 }
3188
3189 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3190 if (Opcode == TargetOpcode::G_PTRTOINT) {
3191 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3192 I.setDesc(TII.get(TargetOpcode::COPY));
3193 return selectCopy(I, TII, MRI, TRI, RBI);
3194 }
3195 }
3196
3197 return false;
3198 }
3199
3200 case TargetOpcode::G_ANYEXT: {
3201 if (selectUSMovFromExtend(I, MRI))
3202 return true;
3203
3204 const Register DstReg = I.getOperand(0).getReg();
3205 const Register SrcReg = I.getOperand(1).getReg();
3206
3207 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3208 if (RBDst.getID() != AArch64::GPRRegBankID) {
3209 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3210 << ", expected: GPR\n");
3211 return false;
3212 }
3213
3214 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3215 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3216 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3217 << ", expected: GPR\n");
3218 return false;
3219 }
3220
3221 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3222
3223 if (DstSize == 0) {
3224 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3225 return false;
3226 }
3227
3228 if (DstSize != 64 && DstSize > 32) {
3229 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3230 << ", expected: 32 or 64\n");
3231 return false;
3232 }
3233 // At this point G_ANYEXT is just like a plain COPY, but we need
3234 // to explicitly form the 64-bit value if any.
3235 if (DstSize > 32) {
3236 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3237 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3238 .addDef(ExtSrc)
3239 .addImm(0)
3240 .addUse(SrcReg)
3241 .addImm(AArch64::sub_32);
3242 I.getOperand(1).setReg(ExtSrc);
3243 }
3244 return selectCopy(I, TII, MRI, TRI, RBI);
3245 }
3246
3247 case TargetOpcode::G_ZEXT:
3248 case TargetOpcode::G_SEXT_INREG:
3249 case TargetOpcode::G_SEXT: {
3250 if (selectUSMovFromExtend(I, MRI))
3251 return true;
3252
3253 unsigned Opcode = I.getOpcode();
3254 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3255 const Register DefReg = I.getOperand(0).getReg();
3256 Register SrcReg = I.getOperand(1).getReg();
3257 const LLT DstTy = MRI.getType(DefReg);
3258 const LLT SrcTy = MRI.getType(SrcReg);
3259 unsigned DstSize = DstTy.getSizeInBits();
3260 unsigned SrcSize = SrcTy.getSizeInBits();
3261
3262 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3263 // extended is encoded in the imm.
3264 if (Opcode == TargetOpcode::G_SEXT_INREG)
3265 SrcSize = I.getOperand(2).getImm();
3266
3267 if (DstTy.isVector())
3268 return false; // Should be handled by imported patterns.
3269
3270 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3271 AArch64::GPRRegBankID &&
3272 "Unexpected ext regbank");
3273
3274 MachineInstr *ExtI;
3275
3276 // First check if we're extending the result of a load which has a dest type
3277 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3278 // GPR register on AArch64 and all loads which are smaller automatically
3279 // zero-extend the upper bits. E.g.
3280 // %v(s8) = G_LOAD %p, :: (load 1)
3281 // %v2(s32) = G_ZEXT %v(s8)
3282 if (!IsSigned) {
3283 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3284 bool IsGPR =
3285 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3286 if (LoadMI && IsGPR) {
3287 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3288 unsigned BytesLoaded = MemOp->getSize().getValue();
3289 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3290 return selectCopy(I, TII, MRI, TRI, RBI);
3291 }
3292
3293 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3294 // + SUBREG_TO_REG.
3295 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3296 Register SubregToRegSrc =
3297 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3298 const Register ZReg = AArch64::WZR;
3299 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3300 .addImm(0);
3301
3302 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3303 .addImm(0)
3304 .addUse(SubregToRegSrc)
3305 .addImm(AArch64::sub_32);
3306
3307 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3308 MRI)) {
3309 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3310 return false;
3311 }
3312
3313 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3314 MRI)) {
3315 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3316 return false;
3317 }
3318
3319 I.eraseFromParent();
3320 return true;
3321 }
3322 }
3323
3324 if (DstSize == 64) {
3325 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3326 // FIXME: Can we avoid manually doing this?
3327 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3328 MRI)) {
3329 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3330 << " operand\n");
3331 return false;
3332 }
3333 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3334 {&AArch64::GPR64RegClass}, {})
3335 .addImm(0)
3336 .addUse(SrcReg)
3337 .addImm(AArch64::sub_32)
3338 .getReg(0);
3339 }
3340
3341 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3342 {DefReg}, {SrcReg})
3343 .addImm(0)
3344 .addImm(SrcSize - 1);
3345 } else if (DstSize <= 32) {
3346 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3347 {DefReg}, {SrcReg})
3348 .addImm(0)
3349 .addImm(SrcSize - 1);
3350 } else {
3351 return false;
3352 }
3353
3355 I.eraseFromParent();
3356 return true;
3357 }
3358
3359 case TargetOpcode::G_SITOFP:
3360 case TargetOpcode::G_UITOFP:
3361 case TargetOpcode::G_FPTOSI:
3362 case TargetOpcode::G_FPTOUI: {
3363 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3364 SrcTy = MRI.getType(I.getOperand(1).getReg());
3365 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3366 if (NewOpc == Opcode)
3367 return false;
3368
3369 I.setDesc(TII.get(NewOpc));
3371 I.setFlags(MachineInstr::NoFPExcept);
3372
3373 return true;
3374 }
3375
3376 case TargetOpcode::G_FREEZE:
3377 return selectCopy(I, TII, MRI, TRI, RBI);
3378
3379 case TargetOpcode::G_INTTOPTR:
3380 // The importer is currently unable to import pointer types since they
3381 // didn't exist in SelectionDAG.
3382 return selectCopy(I, TII, MRI, TRI, RBI);
3383
3384 case TargetOpcode::G_BITCAST:
3385 // Imported SelectionDAG rules can handle every bitcast except those that
3386 // bitcast from a type to the same type. Ideally, these shouldn't occur
3387 // but we might not run an optimizer that deletes them. The other exception
3388 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3389 // of them.
3390 return selectCopy(I, TII, MRI, TRI, RBI);
3391
3392 case TargetOpcode::G_SELECT: {
3393 auto &Sel = cast<GSelect>(I);
3394 const Register CondReg = Sel.getCondReg();
3395 const Register TReg = Sel.getTrueReg();
3396 const Register FReg = Sel.getFalseReg();
3397
3398 if (tryOptSelect(Sel))
3399 return true;
3400
3401 // Make sure to use an unused vreg instead of wzr, so that the peephole
3402 // optimizations will be able to optimize these.
3403 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3404 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3405 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3407 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3408 return false;
3409 Sel.eraseFromParent();
3410 return true;
3411 }
3412 case TargetOpcode::G_ICMP: {
3413 if (Ty.isVector())
3414 return false;
3415
3416 if (Ty != LLT::scalar(32)) {
3417 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3418 << ", expected: " << LLT::scalar(32) << '\n');
3419 return false;
3420 }
3421
3422 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3423 const AArch64CC::CondCode InvCC =
3425 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3426 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3427 /*Src2=*/AArch64::WZR, InvCC, MIB);
3428 I.eraseFromParent();
3429 return true;
3430 }
3431
3432 case TargetOpcode::G_FCMP: {
3433 CmpInst::Predicate Pred =
3434 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3435 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3436 Pred) ||
3437 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3438 return false;
3439 I.eraseFromParent();
3440 return true;
3441 }
3442 case TargetOpcode::G_VASTART:
3443 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3444 : selectVaStartAAPCS(I, MF, MRI);
3445 case TargetOpcode::G_INTRINSIC:
3446 return selectIntrinsic(I, MRI);
3447 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3448 return selectIntrinsicWithSideEffects(I, MRI);
3449 case TargetOpcode::G_IMPLICIT_DEF: {
3450 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3451 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3452 const Register DstReg = I.getOperand(0).getReg();
3453 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3454 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3455 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3456 return true;
3457 }
3458 case TargetOpcode::G_BLOCK_ADDR: {
3459 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3460 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3461 I.eraseFromParent();
3462 return true;
3463 } else {
3464 I.setDesc(TII.get(AArch64::MOVaddrBA));
3465 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3466 I.getOperand(0).getReg())
3467 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3468 /* Offset */ 0, AArch64II::MO_PAGE)
3470 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3472 I.eraseFromParent();
3473 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3474 }
3475 }
3476 case AArch64::G_DUP: {
3477 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3478 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3479 // difficult because at RBS we may end up pessimizing the fpr case if we
3480 // decided to add an anyextend to fix this. Manual selection is the most
3481 // robust solution for now.
3482 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3483 AArch64::GPRRegBankID)
3484 return false; // We expect the fpr regbank case to be imported.
3485 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3486 if (VecTy == LLT::fixed_vector(8, 8))
3487 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3488 else if (VecTy == LLT::fixed_vector(16, 8))
3489 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3490 else if (VecTy == LLT::fixed_vector(4, 16))
3491 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3492 else if (VecTy == LLT::fixed_vector(8, 16))
3493 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3494 else
3495 return false;
3497 }
3498 case TargetOpcode::G_BUILD_VECTOR:
3499 return selectBuildVector(I, MRI);
3500 case TargetOpcode::G_MERGE_VALUES:
3501 return selectMergeValues(I, MRI);
3502 case TargetOpcode::G_UNMERGE_VALUES:
3503 return selectUnmergeValues(I, MRI);
3504 case TargetOpcode::G_SHUFFLE_VECTOR:
3505 return selectShuffleVector(I, MRI);
3506 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3507 return selectExtractElt(I, MRI);
3508 case TargetOpcode::G_CONCAT_VECTORS:
3509 return selectConcatVectors(I, MRI);
3510 case TargetOpcode::G_JUMP_TABLE:
3511 return selectJumpTable(I, MRI);
3512 case TargetOpcode::G_MEMCPY:
3513 case TargetOpcode::G_MEMCPY_INLINE:
3514 case TargetOpcode::G_MEMMOVE:
3515 case TargetOpcode::G_MEMSET:
3516 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3517 return selectMOPS(I, MRI);
3518 }
3519
3520 return false;
3521}
3522
3523bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3524 MachineIRBuilderState OldMIBState = MIB.getState();
3525 bool Success = select(I);
3526 MIB.setState(OldMIBState);
3527 return Success;
3528}
3529
3530bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3532 unsigned Mopcode;
3533 switch (GI.getOpcode()) {
3534 case TargetOpcode::G_MEMCPY:
3535 case TargetOpcode::G_MEMCPY_INLINE:
3536 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3537 break;
3538 case TargetOpcode::G_MEMMOVE:
3539 Mopcode = AArch64::MOPSMemoryMovePseudo;
3540 break;
3541 case TargetOpcode::G_MEMSET:
3542 // For tagged memset see llvm.aarch64.mops.memset.tag
3543 Mopcode = AArch64::MOPSMemorySetPseudo;
3544 break;
3545 }
3546
3547 auto &DstPtr = GI.getOperand(0);
3548 auto &SrcOrVal = GI.getOperand(1);
3549 auto &Size = GI.getOperand(2);
3550
3551 // Create copies of the registers that can be clobbered.
3552 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3553 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3554 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3555
3556 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3557 const auto &SrcValRegClass =
3558 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3559
3560 // Constrain to specific registers
3561 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3562 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3563 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3564
3565 MIB.buildCopy(DstPtrCopy, DstPtr);
3566 MIB.buildCopy(SrcValCopy, SrcOrVal);
3567 MIB.buildCopy(SizeCopy, Size);
3568
3569 // New instruction uses the copied registers because it must update them.
3570 // The defs are not used since they don't exist in G_MEM*. They are still
3571 // tied.
3572 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3573 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3574 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3575 if (IsSet) {
3576 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3577 {DstPtrCopy, SizeCopy, SrcValCopy});
3578 } else {
3579 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3580 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3581 {DstPtrCopy, SrcValCopy, SizeCopy});
3582 }
3583
3584 GI.eraseFromParent();
3585 return true;
3586}
3587
3588bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3590 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3591 Register JTAddr = I.getOperand(0).getReg();
3592 unsigned JTI = I.getOperand(1).getIndex();
3593 Register Index = I.getOperand(2).getReg();
3594
3595 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3596 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3597
3598 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3599 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3600 {TargetReg, ScratchReg}, {JTAddr, Index})
3601 .addJumpTableIndex(JTI);
3602 // Save the jump table info.
3603 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3604 {static_cast<int64_t>(JTI)});
3605 // Build the indirect branch.
3606 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3607 I.eraseFromParent();
3608 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3609}
3610
3611bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3613 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3614 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3615
3616 Register DstReg = I.getOperand(0).getReg();
3617 unsigned JTI = I.getOperand(1).getIndex();
3618 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3619 auto MovMI =
3620 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3621 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3623 I.eraseFromParent();
3624 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3625}
3626
3627bool AArch64InstructionSelector::selectTLSGlobalValue(
3629 if (!STI.isTargetMachO())
3630 return false;
3631 MachineFunction &MF = *I.getParent()->getParent();
3632 MF.getFrameInfo().setAdjustsStack(true);
3633
3634 const auto &GlobalOp = I.getOperand(1);
3635 assert(GlobalOp.getOffset() == 0 &&
3636 "Shouldn't have an offset on TLS globals!");
3637 const GlobalValue &GV = *GlobalOp.getGlobal();
3638
3639 auto LoadGOT =
3640 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3641 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3642
3643 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3644 {LoadGOT.getReg(0)})
3645 .addImm(0);
3646
3647 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3648 // TLS calls preserve all registers except those that absolutely must be
3649 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3650 // silly).
3651 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3652 .addUse(AArch64::X0, RegState::Implicit)
3653 .addDef(AArch64::X0, RegState::Implicit)
3654 .addRegMask(TRI.getTLSCallPreservedMask());
3655
3656 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3657 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3658 MRI);
3659 I.eraseFromParent();
3660 return true;
3661}
3662
3663MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3664 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3665 MachineIRBuilder &MIRBuilder) const {
3666 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3667
3668 auto BuildFn = [&](unsigned SubregIndex) {
3669 auto Ins =
3670 MIRBuilder
3671 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3672 .addImm(SubregIndex);
3675 return &*Ins;
3676 };
3677
3678 switch (EltSize) {
3679 case 8:
3680 return BuildFn(AArch64::bsub);
3681 case 16:
3682 return BuildFn(AArch64::hsub);
3683 case 32:
3684 return BuildFn(AArch64::ssub);
3685 case 64:
3686 return BuildFn(AArch64::dsub);
3687 default:
3688 return nullptr;
3689 }
3690}
3691
3693AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3694 MachineIRBuilder &MIB,
3695 MachineRegisterInfo &MRI) const {
3696 LLT DstTy = MRI.getType(DstReg);
3697 const TargetRegisterClass *RC =
3698 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3699 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3700 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3701 return nullptr;
3702 }
3703 unsigned SubReg = 0;
3704 if (!getSubRegForClass(RC, TRI, SubReg))
3705 return nullptr;
3706 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3707 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3708 << DstTy.getSizeInBits() << "\n");
3709 return nullptr;
3710 }
3711 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3712 .addReg(SrcReg, 0, SubReg);
3713 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3714 return Copy;
3715}
3716
3717bool AArch64InstructionSelector::selectMergeValues(
3719 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3720 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3721 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3722 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3723 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3724
3725 if (I.getNumOperands() != 3)
3726 return false;
3727
3728 // Merging 2 s64s into an s128.
3729 if (DstTy == LLT::scalar(128)) {
3730 if (SrcTy.getSizeInBits() != 64)
3731 return false;
3732 Register DstReg = I.getOperand(0).getReg();
3733 Register Src1Reg = I.getOperand(1).getReg();
3734 Register Src2Reg = I.getOperand(2).getReg();
3735 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3736 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3737 /* LaneIdx */ 0, RB, MIB);
3738 if (!InsMI)
3739 return false;
3740 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3741 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3742 if (!Ins2MI)
3743 return false;
3746 I.eraseFromParent();
3747 return true;
3748 }
3749
3750 if (RB.getID() != AArch64::GPRRegBankID)
3751 return false;
3752
3753 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3754 return false;
3755
3756 auto *DstRC = &AArch64::GPR64RegClass;
3757 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3758 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3759 TII.get(TargetOpcode::SUBREG_TO_REG))
3760 .addDef(SubToRegDef)
3761 .addImm(0)
3762 .addUse(I.getOperand(1).getReg())
3763 .addImm(AArch64::sub_32);
3764 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3765 // Need to anyext the second scalar before we can use bfm
3766 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3767 TII.get(TargetOpcode::SUBREG_TO_REG))
3768 .addDef(SubToRegDef2)
3769 .addImm(0)
3770 .addUse(I.getOperand(2).getReg())
3771 .addImm(AArch64::sub_32);
3772 MachineInstr &BFM =
3773 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3774 .addDef(I.getOperand(0).getReg())
3775 .addUse(SubToRegDef)
3776 .addUse(SubToRegDef2)
3777 .addImm(32)
3778 .addImm(31);
3779 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3780 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3782 I.eraseFromParent();
3783 return true;
3784}
3785
3786static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3787 const unsigned EltSize) {
3788 // Choose a lane copy opcode and subregister based off of the size of the
3789 // vector's elements.
3790 switch (EltSize) {
3791 case 8:
3792 CopyOpc = AArch64::DUPi8;
3793 ExtractSubReg = AArch64::bsub;
3794 break;
3795 case 16:
3796 CopyOpc = AArch64::DUPi16;
3797 ExtractSubReg = AArch64::hsub;
3798 break;
3799 case 32:
3800 CopyOpc = AArch64::DUPi32;
3801 ExtractSubReg = AArch64::ssub;
3802 break;
3803 case 64:
3804 CopyOpc = AArch64::DUPi64;
3805 ExtractSubReg = AArch64::dsub;
3806 break;
3807 default:
3808 // Unknown size, bail out.
3809 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3810 return false;
3811 }
3812 return true;
3813}
3814
3815MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3816 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3817 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3818 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3819 unsigned CopyOpc = 0;
3820 unsigned ExtractSubReg = 0;
3821 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3822 LLVM_DEBUG(
3823 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3824 return nullptr;
3825 }
3826
3827 const TargetRegisterClass *DstRC =
3828 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3829 if (!DstRC) {
3830 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3831 return nullptr;
3832 }
3833
3834 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3835 const LLT &VecTy = MRI.getType(VecReg);
3836 const TargetRegisterClass *VecRC =
3837 getRegClassForTypeOnBank(VecTy, VecRB, true);
3838 if (!VecRC) {
3839 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3840 return nullptr;
3841 }
3842
3843 // The register that we're going to copy into.
3844 Register InsertReg = VecReg;
3845 if (!DstReg)
3846 DstReg = MRI.createVirtualRegister(DstRC);
3847 // If the lane index is 0, we just use a subregister COPY.
3848 if (LaneIdx == 0) {
3849 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3850 .addReg(VecReg, 0, ExtractSubReg);
3851 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3852 return &*Copy;
3853 }
3854
3855 // Lane copies require 128-bit wide registers. If we're dealing with an
3856 // unpacked vector, then we need to move up to that width. Insert an implicit
3857 // def and a subregister insert to get us there.
3858 if (VecTy.getSizeInBits() != 128) {
3859 MachineInstr *ScalarToVector = emitScalarToVector(
3860 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3861 if (!ScalarToVector)
3862 return nullptr;
3863 InsertReg = ScalarToVector->getOperand(0).getReg();
3864 }
3865
3866 MachineInstr *LaneCopyMI =
3867 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3868 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3869
3870 // Make sure that we actually constrain the initial copy.
3871 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3872 return LaneCopyMI;
3873}
3874
3875bool AArch64InstructionSelector::selectExtractElt(
3877 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3878 "unexpected opcode!");
3879 Register DstReg = I.getOperand(0).getReg();
3880 const LLT NarrowTy = MRI.getType(DstReg);
3881 const Register SrcReg = I.getOperand(1).getReg();
3882 const LLT WideTy = MRI.getType(SrcReg);
3883 (void)WideTy;
3884 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3885 "source register size too small!");
3886 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3887
3888 // Need the lane index to determine the correct copy opcode.
3889 MachineOperand &LaneIdxOp = I.getOperand(2);
3890 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3891
3892 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3893 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3894 return false;
3895 }
3896
3897 // Find the index to extract from.
3898 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3899 if (!VRegAndVal)
3900 return false;
3901 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3902
3903
3904 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3905 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3906 LaneIdx, MIB);
3907 if (!Extract)
3908 return false;
3909
3910 I.eraseFromParent();
3911 return true;
3912}
3913
3914bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3916 unsigned NumElts = I.getNumOperands() - 1;
3917 Register SrcReg = I.getOperand(NumElts).getReg();
3918 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3919 const LLT SrcTy = MRI.getType(SrcReg);
3920
3921 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3922 if (SrcTy.getSizeInBits() > 128) {
3923 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3924 return false;
3925 }
3926
3927 // We implement a split vector operation by treating the sub-vectors as
3928 // scalars and extracting them.
3929 const RegisterBank &DstRB =
3930 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3931 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3932 Register Dst = I.getOperand(OpIdx).getReg();
3933 MachineInstr *Extract =
3934 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3935 if (!Extract)
3936 return false;
3937 }
3938 I.eraseFromParent();
3939 return true;
3940}
3941
3942bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
3944 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3945 "unexpected opcode");
3946
3947 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3948 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3949 AArch64::FPRRegBankID ||
3950 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3951 AArch64::FPRRegBankID) {
3952 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3953 "currently unsupported.\n");
3954 return false;
3955 }
3956
3957 // The last operand is the vector source register, and every other operand is
3958 // a register to unpack into.
3959 unsigned NumElts = I.getNumOperands() - 1;
3960 Register SrcReg = I.getOperand(NumElts).getReg();
3961 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3962 const LLT WideTy = MRI.getType(SrcReg);
3963 (void)WideTy;
3964 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3965 "can only unmerge from vector or s128 types!");
3966 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3967 "source register size too small!");
3968
3969 if (!NarrowTy.isScalar())
3970 return selectSplitVectorUnmerge(I, MRI);
3971
3972 // Choose a lane copy opcode and subregister based off of the size of the
3973 // vector's elements.
3974 unsigned CopyOpc = 0;
3975 unsigned ExtractSubReg = 0;
3976 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3977 return false;
3978
3979 // Set up for the lane copies.
3980 MachineBasicBlock &MBB = *I.getParent();
3981
3982 // Stores the registers we'll be copying from.
3983 SmallVector<Register, 4> InsertRegs;
3984
3985 // We'll use the first register twice, so we only need NumElts-1 registers.
3986 unsigned NumInsertRegs = NumElts - 1;
3987
3988 // If our elements fit into exactly 128 bits, then we can copy from the source
3989 // directly. Otherwise, we need to do a bit of setup with some subregister
3990 // inserts.
3991 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3992 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3993 } else {
3994 // No. We have to perform subregister inserts. For each insert, create an
3995 // implicit def and a subregister insert, and save the register we create.
3996 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
3997 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
3998 *RBI.getRegBank(SrcReg, MRI, TRI));
3999 unsigned SubReg = 0;
4000 bool Found = getSubRegForClass(RC, TRI, SubReg);
4001 (void)Found;
4002 assert(Found && "expected to find last operand's subeg idx");
4003 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4004 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4005 MachineInstr &ImpDefMI =
4006 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4007 ImpDefReg);
4008
4009 // Now, create the subregister insert from SrcReg.
4010 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4011 MachineInstr &InsMI =
4012 *BuildMI(MBB, I, I.getDebugLoc(),
4013 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4014 .addUse(ImpDefReg)
4015 .addUse(SrcReg)
4016 .addImm(SubReg);
4017
4018 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4020
4021 // Save the register so that we can copy from it after.
4022 InsertRegs.push_back(InsertReg);
4023 }
4024 }
4025
4026 // Now that we've created any necessary subregister inserts, we can
4027 // create the copies.
4028 //
4029 // Perform the first copy separately as a subregister copy.
4030 Register CopyTo = I.getOperand(0).getReg();
4031 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4032 .addReg(InsertRegs[0], 0, ExtractSubReg);
4033 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4034
4035 // Now, perform the remaining copies as vector lane copies.
4036 unsigned LaneIdx = 1;
4037 for (Register InsReg : InsertRegs) {
4038 Register CopyTo = I.getOperand(LaneIdx).getReg();
4039 MachineInstr &CopyInst =
4040 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4041 .addUse(InsReg)
4042 .addImm(LaneIdx);
4043 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4044 ++LaneIdx;
4045 }
4046
4047 // Separately constrain the first copy's destination. Because of the
4048 // limitation in constrainOperandRegClass, we can't guarantee that this will
4049 // actually be constrained. So, do it ourselves using the second operand.
4050 const TargetRegisterClass *RC =
4051 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4052 if (!RC) {
4053 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4054 return false;
4055 }
4056
4057 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4058 I.eraseFromParent();
4059 return true;
4060}
4061
4062bool AArch64InstructionSelector::selectConcatVectors(
4064 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4065 "Unexpected opcode");
4066 Register Dst = I.getOperand(0).getReg();
4067 Register Op1 = I.getOperand(1).getReg();
4068 Register Op2 = I.getOperand(2).getReg();
4069 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4070 if (!ConcatMI)
4071 return false;
4072 I.eraseFromParent();
4073 return true;
4074}
4075
4076unsigned
4077AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4078 MachineFunction &MF) const {
4079 Type *CPTy = CPVal->getType();
4080 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4081
4083 return MCP->getConstantPoolIndex(CPVal, Alignment);
4084}
4085
4086MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4087 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4088 const TargetRegisterClass *RC;
4089 unsigned Opc;
4090 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4091 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4092 switch (Size) {
4093 case 16:
4094 RC = &AArch64::FPR128RegClass;
4095 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4096 break;
4097 case 8:
4098 RC = &AArch64::FPR64RegClass;
4099 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4100 break;
4101 case 4:
4102 RC = &AArch64::FPR32RegClass;
4103 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4104 break;
4105 case 2:
4106 RC = &AArch64::FPR16RegClass;
4107 Opc = AArch64::LDRHui;
4108 break;
4109 default:
4110 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4111 << *CPVal->getType());
4112 return nullptr;
4113 }
4114
4115 MachineInstr *LoadMI = nullptr;
4116 auto &MF = MIRBuilder.getMF();
4117 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4118 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4119 // Use load(literal) for tiny code model.
4120 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4121 } else {
4122 auto Adrp =
4123 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4124 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4125
4126 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4127 .addConstantPoolIndex(
4129
4131 }
4132
4134 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4136 Size, Align(Size)));
4138 return LoadMI;
4139}
4140
4141/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4142/// size and RB.
4143static std::pair<unsigned, unsigned>
4144getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4145 unsigned Opc, SubregIdx;
4146 if (RB.getID() == AArch64::GPRRegBankID) {
4147 if (EltSize == 8) {
4148 Opc = AArch64::INSvi8gpr;
4149 SubregIdx = AArch64::bsub;
4150 } else if (EltSize == 16) {
4151 Opc = AArch64::INSvi16gpr;
4152 SubregIdx = AArch64::ssub;
4153 } else if (EltSize == 32) {
4154 Opc = AArch64::INSvi32gpr;
4155 SubregIdx = AArch64::ssub;
4156 } else if (EltSize == 64) {
4157 Opc = AArch64::INSvi64gpr;
4158 SubregIdx = AArch64::dsub;
4159 } else {
4160 llvm_unreachable("invalid elt size!");
4161 }
4162 } else {
4163 if (EltSize == 8) {
4164 Opc = AArch64::INSvi8lane;
4165 SubregIdx = AArch64::bsub;
4166 } else if (EltSize == 16) {
4167 Opc = AArch64::INSvi16lane;
4168 SubregIdx = AArch64::hsub;
4169 } else if (EltSize == 32) {
4170 Opc = AArch64::INSvi32lane;
4171 SubregIdx = AArch64::ssub;
4172 } else if (EltSize == 64) {
4173 Opc = AArch64::INSvi64lane;
4174 SubregIdx = AArch64::dsub;
4175 } else {
4176 llvm_unreachable("invalid elt size!");
4177 }
4178 }
4179 return std::make_pair(Opc, SubregIdx);
4180}
4181
4182MachineInstr *AArch64InstructionSelector::emitInstr(
4183 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4184 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4185 const ComplexRendererFns &RenderFns) const {
4186 assert(Opcode && "Expected an opcode?");
4187 assert(!isPreISelGenericOpcode(Opcode) &&
4188 "Function should only be used to produce selected instructions!");
4189 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4190 if (RenderFns)
4191 for (auto &Fn : *RenderFns)
4192 Fn(MI);
4194 return &*MI;
4195}
4196
4197MachineInstr *AArch64InstructionSelector::emitAddSub(
4198 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4199 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4200 MachineIRBuilder &MIRBuilder) const {
4201 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4202 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4203 auto Ty = MRI.getType(LHS.getReg());
4204 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4205 unsigned Size = Ty.getSizeInBits();
4206 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4207 bool Is32Bit = Size == 32;
4208
4209 // INSTRri form with positive arithmetic immediate.
4210 if (auto Fns = selectArithImmed(RHS))
4211 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4212 MIRBuilder, Fns);
4213
4214 // INSTRri form with negative arithmetic immediate.
4215 if (auto Fns = selectNegArithImmed(RHS))
4216 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4217 MIRBuilder, Fns);
4218
4219 // INSTRrx form.
4220 if (auto Fns = selectArithExtendedRegister(RHS))
4221 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4222 MIRBuilder, Fns);
4223
4224 // INSTRrs form.
4225 if (auto Fns = selectShiftedRegister(RHS))
4226 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4227 MIRBuilder, Fns);
4228 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4229 MIRBuilder);
4230}
4231
4233AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4234 MachineOperand &RHS,
4235 MachineIRBuilder &MIRBuilder) const {
4236 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4237 {{AArch64::ADDXri, AArch64::ADDWri},
4238 {AArch64::ADDXrs, AArch64::ADDWrs},
4239 {AArch64::ADDXrr, AArch64::ADDWrr},
4240 {AArch64::SUBXri, AArch64::SUBWri},
4241 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4242 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4243}
4244
4246AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4247 MachineOperand &RHS,
4248 MachineIRBuilder &MIRBuilder) const {
4249 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4250 {{AArch64::ADDSXri, AArch64::ADDSWri},
4251 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4252 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4253 {AArch64::SUBSXri, AArch64::SUBSWri},
4254 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4255 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4256}
4257
4259AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4260 MachineOperand &RHS,
4261 MachineIRBuilder &MIRBuilder) const {
4262 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4263 {{AArch64::SUBSXri, AArch64::SUBSWri},
4264 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4265 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4266 {AArch64::ADDSXri, AArch64::ADDSWri},
4267 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4268 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4269}
4270
4272AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4273 MachineOperand &RHS,
4274 MachineIRBuilder &MIRBuilder) const {
4275 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4276 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4277 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4278 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4279 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4280}
4281
4283AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4284 MachineOperand &RHS,
4285 MachineIRBuilder &MIRBuilder) const {
4286 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4287 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4288 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4289 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4290 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4291}
4292
4294AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4295 MachineIRBuilder &MIRBuilder) const {
4296 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4297 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4298 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4299 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4300}
4301
4303AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4304 MachineIRBuilder &MIRBuilder) const {
4305 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4306 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4307 LLT Ty = MRI.getType(LHS.getReg());
4308 unsigned RegSize = Ty.getSizeInBits();
4309 bool Is32Bit = (RegSize == 32);
4310 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4311 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4312 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4313 // ANDS needs a logical immediate for its immediate form. Check if we can
4314 // fold one in.
4315 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4316 int64_t Imm = ValAndVReg->Value.getSExtValue();
4317
4319 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4322 return &*TstMI;
4323 }
4324 }
4325
4326 if (auto Fns = selectLogicalShiftedRegister(RHS))
4327 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4328 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4329}
4330
4331MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4332 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4333 MachineIRBuilder &MIRBuilder) const {
4334 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4335 assert(Predicate.isPredicate() && "Expected predicate?");
4336 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4337 LLT CmpTy = MRI.getType(LHS.getReg());
4338 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4339 unsigned Size = CmpTy.getSizeInBits();
4340 (void)Size;
4341 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4342 // Fold the compare into a cmn or tst if possible.
4343 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4344 return FoldCmp;
4345 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4346 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4347}
4348
4349MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4350 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4351 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4352#ifndef NDEBUG
4353 LLT Ty = MRI.getType(Dst);
4354 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4355 "Expected a 32-bit scalar register?");
4356#endif
4357 const Register ZReg = AArch64::WZR;
4358 AArch64CC::CondCode CC1, CC2;
4359 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4360 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4361 if (CC2 == AArch64CC::AL)
4362 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4363 MIRBuilder);
4364 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4365 Register Def1Reg = MRI.createVirtualRegister(RC);
4366 Register Def2Reg = MRI.createVirtualRegister(RC);
4367 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4368 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4369 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4370 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4372 return &*OrMI;
4373}
4374
4375MachineInstr *AArch64InstructionSelector::emitFPCompare(
4376 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4377 std::optional<CmpInst::Predicate> Pred) const {
4378 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4379 LLT Ty = MRI.getType(LHS);
4380 if (Ty.isVector())
4381 return nullptr;
4382 unsigned OpSize = Ty.getSizeInBits();
4383 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4384
4385 // If this is a compare against +0.0, then we don't have
4386 // to explicitly materialize a constant.
4387 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4388 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4389
4390 auto IsEqualityPred = [](CmpInst::Predicate P) {
4391 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4393 };
4394 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4395 // Try commutating the operands.
4396 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4397 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4398 ShouldUseImm = true;
4399 std::swap(LHS, RHS);
4400 }
4401 }
4402 unsigned CmpOpcTbl[2][3] = {
4403 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4404 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4405 unsigned CmpOpc =
4406 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4407
4408 // Partially build the compare. Decide if we need to add a use for the
4409 // third operand based off whether or not we're comparing against 0.0.
4410 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4412 if (!ShouldUseImm)
4413 CmpMI.addUse(RHS);
4415 return &*CmpMI;
4416}
4417
4418MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4419 std::optional<Register> Dst, Register Op1, Register Op2,
4420 MachineIRBuilder &MIRBuilder) const {
4421 // We implement a vector concat by:
4422 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4423 // 2. Insert the upper vector into the destination's upper element
4424 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4425 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4426
4427 const LLT Op1Ty = MRI.getType(Op1);
4428 const LLT Op2Ty = MRI.getType(Op2);
4429
4430 if (Op1Ty != Op2Ty) {
4431 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4432 return nullptr;
4433 }
4434 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4435
4436 if (Op1Ty.getSizeInBits() >= 128) {
4437 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4438 return nullptr;
4439 }
4440
4441 // At the moment we just support 64 bit vector concats.
4442 if (Op1Ty.getSizeInBits() != 64) {
4443 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4444 return nullptr;
4445 }
4446
4447 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4448 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4449 const TargetRegisterClass *DstRC =
4450 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4451
4452 MachineInstr *WidenedOp1 =
4453 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4454 MachineInstr *WidenedOp2 =
4455 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4456 if (!WidenedOp1 || !WidenedOp2) {
4457 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4458 return nullptr;
4459 }
4460
4461 // Now do the insert of the upper element.
4462 unsigned InsertOpc, InsSubRegIdx;
4463 std::tie(InsertOpc, InsSubRegIdx) =
4464 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4465
4466 if (!Dst)
4467 Dst = MRI.createVirtualRegister(DstRC);
4468 auto InsElt =
4469 MIRBuilder
4470 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4471 .addImm(1) /* Lane index */
4472 .addUse(WidenedOp2->getOperand(0).getReg())
4473 .addImm(0);
4475 return &*InsElt;
4476}
4477
4479AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4480 Register Src2, AArch64CC::CondCode Pred,
4481 MachineIRBuilder &MIRBuilder) const {
4482 auto &MRI = *MIRBuilder.getMRI();
4483 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4484 // If we used a register class, then this won't necessarily have an LLT.
4485 // Compute the size based off whether or not we have a class or bank.
4486 unsigned Size;
4487 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4488 Size = TRI.getRegSizeInBits(*RC);
4489 else
4490 Size = MRI.getType(Dst).getSizeInBits();
4491 // Some opcodes use s1.
4492 assert(Size <= 64 && "Expected 64 bits or less only!");
4493 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4494 unsigned Opc = OpcTable[Size == 64];
4495 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4497 return &*CSINC;
4498}
4499
4500MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4501 Register CarryReg) {
4503 unsigned Opcode = I.getOpcode();
4504
4505 // If the instruction is a SUB, we need to negate the carry,
4506 // because borrowing is indicated by carry-flag == 0.
4507 bool NeedsNegatedCarry =
4508 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4509
4510 // If the previous instruction will already produce the correct carry, do not
4511 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4512 // generated during legalization of wide add/sub. This optimization depends on
4513 // these sequences not being interrupted by other instructions.
4514 // We have to select the previous instruction before the carry-using
4515 // instruction is deleted by the calling function, otherwise the previous
4516 // instruction might become dead and would get deleted.
4517 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4518 if (SrcMI == I.getPrevNode()) {
4519 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4520 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4521 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4522 CarrySrcMI->isUnsigned() &&
4523 CarrySrcMI->getCarryOutReg() == CarryReg &&
4524 selectAndRestoreState(*SrcMI))
4525 return nullptr;
4526 }
4527 }
4528
4529 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4530
4531 if (NeedsNegatedCarry) {
4532 // (0 - Carry) sets !C in NZCV when Carry == 1
4533 Register ZReg = AArch64::WZR;
4534 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4535 }
4536
4537 // (Carry - 1) sets !C in NZCV when Carry == 0
4538 auto Fns = select12BitValueWithLeftShift(1);
4539 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4540}
4541
4542bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4544 auto &CarryMI = cast<GAddSubCarryOut>(I);
4545
4546 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4547 // Set NZCV carry according to carry-in VReg
4548 emitCarryIn(I, CarryInMI->getCarryInReg());
4549 }
4550
4551 // Emit the operation and get the correct condition code.
4552 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4553 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4554
4555 Register CarryOutReg = CarryMI.getCarryOutReg();
4556
4557 // Don't convert carry-out to VReg if it is never used
4558 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4559 // Now, put the overflow result in the register given by the first operand
4560 // to the overflow op. CSINC increments the result when the predicate is
4561 // false, so to get the increment when it's true, we need to use the
4562 // inverse. In this case, we want to increment when carry is set.
4563 Register ZReg = AArch64::WZR;
4564 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4565 getInvertedCondCode(OpAndCC.second), MIB);
4566 }
4567
4568 I.eraseFromParent();
4569 return true;
4570}
4571
4572std::pair<MachineInstr *, AArch64CC::CondCode>
4573AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4574 MachineOperand &LHS,
4575 MachineOperand &RHS,
4576 MachineIRBuilder &MIRBuilder) const {
4577 switch (Opcode) {
4578 default:
4579 llvm_unreachable("Unexpected opcode!");
4580 case TargetOpcode::G_SADDO:
4581 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4582 case TargetOpcode::G_UADDO:
4583 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4584 case TargetOpcode::G_SSUBO:
4585 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4586 case TargetOpcode::G_USUBO:
4587 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4588 case TargetOpcode::G_SADDE:
4589 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4590 case TargetOpcode::G_UADDE:
4591 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4592 case TargetOpcode::G_SSUBE:
4593 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4594 case TargetOpcode::G_USUBE:
4595 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4596 }
4597}
4598
4599/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4600/// expressed as a conjunction.
4601/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4602/// changing the conditions on the CMP tests.
4603/// (this means we can call emitConjunctionRec() with
4604/// Negate==true on this sub-tree)
4605/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4606/// cannot do the negation naturally. We are required to
4607/// emit the subtree first in this case.
4608/// \param WillNegate Is true if are called when the result of this
4609/// subexpression must be negated. This happens when the
4610/// outer expression is an OR. We can use this fact to know
4611/// that we have a double negation (or (or ...) ...) that
4612/// can be implemented for free.
4613static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4614 bool WillNegate, MachineRegisterInfo &MRI,
4615 unsigned Depth = 0) {
4616 if (!MRI.hasOneNonDBGUse(Val))
4617 return false;
4618 MachineInstr *ValDef = MRI.getVRegDef(Val);
4619 unsigned Opcode = ValDef->getOpcode();
4620 if (isa<GAnyCmp>(ValDef)) {
4621 CanNegate = true;
4622 MustBeFirst = false;
4623 return true;
4624 }
4625 // Protect against exponential runtime and stack overflow.
4626 if (Depth > 6)
4627 return false;
4628 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4629 bool IsOR = Opcode == TargetOpcode::G_OR;
4630 Register O0 = ValDef->getOperand(1).getReg();
4631 Register O1 = ValDef->getOperand(2).getReg();
4632 bool CanNegateL;
4633 bool MustBeFirstL;
4634 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4635 return false;
4636 bool CanNegateR;
4637 bool MustBeFirstR;
4638 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4639 return false;
4640
4641 if (MustBeFirstL && MustBeFirstR)
4642 return false;
4643
4644 if (IsOR) {
4645 // For an OR expression we need to be able to naturally negate at least
4646 // one side or we cannot do the transformation at all.
4647 if (!CanNegateL && !CanNegateR)
4648 return false;
4649 // If we the result of the OR will be negated and we can naturally negate
4650 // the leaves, then this sub-tree as a whole negates naturally.
4651 CanNegate = WillNegate && CanNegateL && CanNegateR;
4652 // If we cannot naturally negate the whole sub-tree, then this must be
4653 // emitted first.
4654 MustBeFirst = !CanNegate;
4655 } else {
4656 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4657 // We cannot naturally negate an AND operation.
4658 CanNegate = false;
4659 MustBeFirst = MustBeFirstL || MustBeFirstR;
4660 }
4661 return true;
4662 }
4663 return false;
4664}
4665
4666MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4669 MachineIRBuilder &MIB) const {
4670 auto &MRI = *MIB.getMRI();
4671 LLT OpTy = MRI.getType(LHS);
4672 unsigned CCmpOpc;
4673 std::optional<ValueAndVReg> C;
4675 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4677 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4678 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4679 else if (C->Value.ule(31))
4680 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4681 else
4682 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4683 } else {
4684 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4685 OpTy.getSizeInBits() == 64);
4686 switch (OpTy.getSizeInBits()) {
4687 case 16:
4688 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4689 CCmpOpc = AArch64::FCCMPHrr;
4690 break;
4691 case 32:
4692 CCmpOpc = AArch64::FCCMPSrr;
4693 break;
4694 case 64:
4695 CCmpOpc = AArch64::FCCMPDrr;
4696 break;