LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
197 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
198 /// SUBREG_TO_REG.
199 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
200 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
203
204 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
207 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
208
209 /// Helper function to select vector load intrinsics like
210 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
211 /// \p Opc is the opcode that the selected instruction should use.
212 /// \p NumVecs is the number of vector destinations for the instruction.
213 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
214 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
215 MachineInstr &I);
216 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
217 MachineInstr &I);
218 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
219 unsigned Opc);
220 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
221 unsigned Opc);
222 bool selectIntrinsicWithSideEffects(MachineInstr &I,
224 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
228 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232
233 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
234 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
235 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
236
237 unsigned emitConstantPoolEntry(const Constant *CPVal,
238 MachineFunction &MF) const;
240 MachineIRBuilder &MIRBuilder) const;
241
242 // Emit a vector concat operation.
243 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
244 Register Op2,
245 MachineIRBuilder &MIRBuilder) const;
246
247 // Emit an integer compare between LHS and RHS, which checks for Predicate.
248 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
249 MachineOperand &Predicate,
250 MachineIRBuilder &MIRBuilder) const;
251
252 /// Emit a floating point comparison between \p LHS and \p RHS.
253 /// \p Pred if given is the intended predicate to use.
255 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
256 std::optional<CmpInst::Predicate> = std::nullopt) const;
257
259 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
260 std::initializer_list<llvm::SrcOp> SrcOps,
261 MachineIRBuilder &MIRBuilder,
262 const ComplexRendererFns &RenderFns = std::nullopt) const;
263 /// Helper function to emit an add or sub instruction.
264 ///
265 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
266 /// in a specific order.
267 ///
268 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
269 ///
270 /// \code
271 /// const std::array<std::array<unsigned, 2>, 4> Table {
272 /// {{AArch64::ADDXri, AArch64::ADDWri},
273 /// {AArch64::ADDXrs, AArch64::ADDWrs},
274 /// {AArch64::ADDXrr, AArch64::ADDWrr},
275 /// {AArch64::SUBXri, AArch64::SUBWri},
276 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
277 /// \endcode
278 ///
279 /// Each row in the table corresponds to a different addressing mode. Each
280 /// column corresponds to a different register size.
281 ///
282 /// \attention Rows must be structured as follows:
283 /// - Row 0: The ri opcode variants
284 /// - Row 1: The rs opcode variants
285 /// - Row 2: The rr opcode variants
286 /// - Row 3: The ri opcode variants for negative immediates
287 /// - Row 4: The rx opcode variants
288 ///
289 /// \attention Columns must be structured as follows:
290 /// - Column 0: The 64-bit opcode variants
291 /// - Column 1: The 32-bit opcode variants
292 ///
293 /// \p Dst is the destination register of the binop to emit.
294 /// \p LHS is the left-hand operand of the binop to emit.
295 /// \p RHS is the right-hand operand of the binop to emit.
296 MachineInstr *emitAddSub(
297 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
299 MachineIRBuilder &MIRBuilder) const;
300 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
302 MachineIRBuilder &MIRBuilder) const;
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
315 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
317 MachineIRBuilder &MIRBuilder) const;
318 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
319 const RegisterBank &DstRB, LLT ScalarTy,
320 Register VecReg, unsigned LaneIdx,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
324 MachineIRBuilder &MIRBuilder) const;
325 /// Emit a CSet for a FP compare.
326 ///
327 /// \p Dst is expected to be a 32-bit scalar register.
328 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
329 MachineIRBuilder &MIRBuilder) const;
330
331 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
332 /// Might elide the instruction if the previous instruction already sets NZCV
333 /// correctly.
334 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
335
336 /// Emit the overflow op for \p Opcode.
337 ///
338 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
339 /// G_USUBO, etc.
340 std::pair<MachineInstr *, AArch64CC::CondCode>
341 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
342 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
343
344 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
345
346 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
347 /// In some cases this is even possible with OR operations in the expression.
349 MachineIRBuilder &MIB) const;
352 AArch64CC::CondCode Predicate,
354 MachineIRBuilder &MIB) const;
356 bool Negate, Register CCOp,
357 AArch64CC::CondCode Predicate,
358 MachineIRBuilder &MIB) const;
359
360 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
361 /// \p IsNegative is true if the test should be "not zero".
362 /// This will also optimize the test bit instruction when possible.
363 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
364 MachineBasicBlock *DstMBB,
365 MachineIRBuilder &MIB) const;
366
367 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
368 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
369 MachineBasicBlock *DestMBB,
370 MachineIRBuilder &MIB) const;
371
372 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
373 // We use these manually instead of using the importer since it doesn't
374 // support SDNodeXForm.
375 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
376 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
377 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
379
380 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
381 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
382 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
383
384 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
385 unsigned Size) const;
386
387 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
388 return selectAddrModeUnscaled(Root, 1);
389 }
390 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
391 return selectAddrModeUnscaled(Root, 2);
392 }
393 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
394 return selectAddrModeUnscaled(Root, 4);
395 }
396 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
397 return selectAddrModeUnscaled(Root, 8);
398 }
399 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
400 return selectAddrModeUnscaled(Root, 16);
401 }
402
403 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
404 /// from complex pattern matchers like selectAddrModeIndexed().
405 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
406 MachineRegisterInfo &MRI) const;
407
408 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
409 unsigned Size) const;
410 template <int Width>
411 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
412 return selectAddrModeIndexed(Root, Width / 8);
413 }
414
415 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
416 const MachineRegisterInfo &MRI) const;
417 ComplexRendererFns
418 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
419 unsigned SizeInBytes) const;
420
421 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
422 /// or not a shift + extend should be folded into an addressing mode. Returns
423 /// None when this is not profitable or possible.
424 ComplexRendererFns
425 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
426 MachineOperand &Offset, unsigned SizeInBytes,
427 bool WantsExt) const;
428 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
429 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
430 unsigned SizeInBytes) const;
431 template <int Width>
432 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
433 return selectAddrModeXRO(Root, Width / 8);
434 }
435
436 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
440 return selectAddrModeWRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
444 bool AllowROR = false) const;
445
446 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
447 return selectShiftedRegister(Root);
448 }
449
450 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
451 return selectShiftedRegister(Root, true);
452 }
453
454 /// Given an extend instruction, determine the correct shift-extend type for
455 /// that instruction.
456 ///
457 /// If the instruction is going to be used in a load or store, pass
458 /// \p IsLoadStore = true.
460 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
461 bool IsLoadStore = false) const;
462
463 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
464 ///
465 /// \returns Either \p Reg if no change was necessary, or the new register
466 /// created by moving \p Reg.
467 ///
468 /// Note: This uses emitCopy right now.
469 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
470 MachineIRBuilder &MIB) const;
471
472 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
473
474 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
475
476 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
477 int OpIdx = -1) const;
478 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
479 int OpIdx = -1) const;
480 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
481 int OpIdx = -1) const;
482 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
483 int OpIdx = -1) const;
484 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
485 int OpIdx = -1) const;
486 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
487 int OpIdx = -1) const;
488 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
489 const MachineInstr &MI,
490 int OpIdx = -1) const;
491
492 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
493 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
494
495 // Optimization methods.
496 bool tryOptSelect(GSelect &Sel);
497 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
498 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
499 MachineOperand &Predicate,
500 MachineIRBuilder &MIRBuilder) const;
501
502 /// Return true if \p MI is a load or store of \p NumBytes bytes.
503 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
504
505 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
506 /// register zeroed out. In other words, the result of MI has been explicitly
507 /// zero extended.
508 bool isDef32(const MachineInstr &MI) const;
509
511 const AArch64Subtarget &STI;
512 const AArch64InstrInfo &TII;
514 const AArch64RegisterBankInfo &RBI;
515
516 bool ProduceNonFlagSettingCondBr = false;
517
518 // Some cached values used during selection.
519 // We use LR as a live-in register, and we keep track of it here as it can be
520 // clobbered by calls.
521 Register MFReturnAddr;
522
524
525#define GET_GLOBALISEL_PREDICATES_DECL
526#include "AArch64GenGlobalISel.inc"
527#undef GET_GLOBALISEL_PREDICATES_DECL
528
529// We declare the temporaries used by selectImpl() in the class to minimize the
530// cost of constructing placeholder values.
531#define GET_GLOBALISEL_TEMPORARIES_DECL
532#include "AArch64GenGlobalISel.inc"
533#undef GET_GLOBALISEL_TEMPORARIES_DECL
534};
535
536} // end anonymous namespace
537
538#define GET_GLOBALISEL_IMPL
539#include "AArch64GenGlobalISel.inc"
540#undef GET_GLOBALISEL_IMPL
541
542AArch64InstructionSelector::AArch64InstructionSelector(
543 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
544 const AArch64RegisterBankInfo &RBI)
545 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
546 RBI(RBI),
548#include "AArch64GenGlobalISel.inc"
551#include "AArch64GenGlobalISel.inc"
553{
554}
555
556// FIXME: This should be target-independent, inferred from the types declared
557// for each class in the bank.
558//
559/// Given a register bank, and a type, return the smallest register class that
560/// can represent that combination.
561static const TargetRegisterClass *
562getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
563 bool GetAllRegSet = false) {
564 if (RB.getID() == AArch64::GPRRegBankID) {
565 if (Ty.getSizeInBits() <= 32)
566 return GetAllRegSet ? &AArch64::GPR32allRegClass
567 : &AArch64::GPR32RegClass;
568 if (Ty.getSizeInBits() == 64)
569 return GetAllRegSet ? &AArch64::GPR64allRegClass
570 : &AArch64::GPR64RegClass;
571 if (Ty.getSizeInBits() == 128)
572 return &AArch64::XSeqPairsClassRegClass;
573 return nullptr;
574 }
575
576 if (RB.getID() == AArch64::FPRRegBankID) {
577 switch (Ty.getSizeInBits()) {
578 case 8:
579 return &AArch64::FPR8RegClass;
580 case 16:
581 return &AArch64::FPR16RegClass;
582 case 32:
583 return &AArch64::FPR32RegClass;
584 case 64:
585 return &AArch64::FPR64RegClass;
586 case 128:
587 return &AArch64::FPR128RegClass;
588 }
589 return nullptr;
590 }
591
592 return nullptr;
593}
594
595/// Given a register bank, and size in bits, return the smallest register class
596/// that can represent that combination.
597static const TargetRegisterClass *
598getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
599 bool GetAllRegSet = false) {
600 unsigned RegBankID = RB.getID();
601
602 if (RegBankID == AArch64::GPRRegBankID) {
603 if (SizeInBits <= 32)
604 return GetAllRegSet ? &AArch64::GPR32allRegClass
605 : &AArch64::GPR32RegClass;
606 if (SizeInBits == 64)
607 return GetAllRegSet ? &AArch64::GPR64allRegClass
608 : &AArch64::GPR64RegClass;
609 if (SizeInBits == 128)
610 return &AArch64::XSeqPairsClassRegClass;
611 }
612
613 if (RegBankID == AArch64::FPRRegBankID) {
614 switch (SizeInBits) {
615 default:
616 return nullptr;
617 case 8:
618 return &AArch64::FPR8RegClass;
619 case 16:
620 return &AArch64::FPR16RegClass;
621 case 32:
622 return &AArch64::FPR32RegClass;
623 case 64:
624 return &AArch64::FPR64RegClass;
625 case 128:
626 return &AArch64::FPR128RegClass;
627 }
628 }
629
630 return nullptr;
631}
632
633/// Returns the correct subregister to use for a given register class.
635 const TargetRegisterInfo &TRI, unsigned &SubReg) {
636 switch (TRI.getRegSizeInBits(*RC)) {
637 case 8:
638 SubReg = AArch64::bsub;
639 break;
640 case 16:
641 SubReg = AArch64::hsub;
642 break;
643 case 32:
644 if (RC != &AArch64::FPR32RegClass)
645 SubReg = AArch64::sub_32;
646 else
647 SubReg = AArch64::ssub;
648 break;
649 case 64:
650 SubReg = AArch64::dsub;
651 break;
652 default:
654 dbgs() << "Couldn't find appropriate subregister for register class.");
655 return false;
656 }
657
658 return true;
659}
660
661/// Returns the minimum size the given register bank can hold.
662static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
663 switch (RB.getID()) {
664 case AArch64::GPRRegBankID:
665 return 32;
666 case AArch64::FPRRegBankID:
667 return 8;
668 default:
669 llvm_unreachable("Tried to get minimum size for unknown register bank.");
670 }
671}
672
673/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
674/// Helper function for functions like createDTuple and createQTuple.
675///
676/// \p RegClassIDs - The list of register class IDs available for some tuple of
677/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
678/// expected to contain between 2 and 4 tuple classes.
679///
680/// \p SubRegs - The list of subregister classes associated with each register
681/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
682/// subregister class. The index of each subregister class is expected to
683/// correspond with the index of each register class.
684///
685/// \returns Either the destination register of REG_SEQUENCE instruction that
686/// was created, or the 0th element of \p Regs if \p Regs contains a single
687/// element.
689 const unsigned RegClassIDs[],
690 const unsigned SubRegs[], MachineIRBuilder &MIB) {
691 unsigned NumRegs = Regs.size();
692 if (NumRegs == 1)
693 return Regs[0];
694 assert(NumRegs >= 2 && NumRegs <= 4 &&
695 "Only support between two and 4 registers in a tuple!");
697 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
698 auto RegSequence =
699 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
700 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
701 RegSequence.addUse(Regs[I]);
702 RegSequence.addImm(SubRegs[I]);
703 }
704 return RegSequence.getReg(0);
705}
706
707/// Create a tuple of D-registers using the registers in \p Regs.
709 static const unsigned RegClassIDs[] = {
710 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
711 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
712 AArch64::dsub2, AArch64::dsub3};
713 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
714}
715
716/// Create a tuple of Q-registers using the registers in \p Regs.
718 static const unsigned RegClassIDs[] = {
719 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
720 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
721 AArch64::qsub2, AArch64::qsub3};
722 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
723}
724
725static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
726 auto &MI = *Root.getParent();
727 auto &MBB = *MI.getParent();
728 auto &MF = *MBB.getParent();
729 auto &MRI = MF.getRegInfo();
730 uint64_t Immed;
731 if (Root.isImm())
732 Immed = Root.getImm();
733 else if (Root.isCImm())
734 Immed = Root.getCImm()->getZExtValue();
735 else if (Root.isReg()) {
736 auto ValAndVReg =
738 if (!ValAndVReg)
739 return std::nullopt;
740 Immed = ValAndVReg->Value.getSExtValue();
741 } else
742 return std::nullopt;
743 return Immed;
744}
745
746/// Check whether \p I is a currently unsupported binary operation:
747/// - it has an unsized type
748/// - an operand is not a vreg
749/// - all operands are not in the same bank
750/// These are checks that should someday live in the verifier, but right now,
751/// these are mostly limitations of the aarch64 selector.
752static bool unsupportedBinOp(const MachineInstr &I,
753 const AArch64RegisterBankInfo &RBI,
755 const AArch64RegisterInfo &TRI) {
756 LLT Ty = MRI.getType(I.getOperand(0).getReg());
757 if (!Ty.isValid()) {
758 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
759 return true;
760 }
761
762 const RegisterBank *PrevOpBank = nullptr;
763 for (auto &MO : I.operands()) {
764 // FIXME: Support non-register operands.
765 if (!MO.isReg()) {
766 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
767 return true;
768 }
769
770 // FIXME: Can generic operations have physical registers operands? If
771 // so, this will need to be taught about that, and we'll need to get the
772 // bank out of the minimal class for the register.
773 // Either way, this needs to be documented (and possibly verified).
774 if (!MO.getReg().isVirtual()) {
775 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
776 return true;
777 }
778
779 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
780 if (!OpBank) {
781 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
782 return true;
783 }
784
785 if (PrevOpBank && OpBank != PrevOpBank) {
786 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
787 return true;
788 }
789 PrevOpBank = OpBank;
790 }
791 return false;
792}
793
794/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
795/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
796/// and of size \p OpSize.
797/// \returns \p GenericOpc if the combination is unsupported.
798static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
799 unsigned OpSize) {
800 switch (RegBankID) {
801 case AArch64::GPRRegBankID:
802 if (OpSize == 32) {
803 switch (GenericOpc) {
804 case TargetOpcode::G_SHL:
805 return AArch64::LSLVWr;
806 case TargetOpcode::G_LSHR:
807 return AArch64::LSRVWr;
808 case TargetOpcode::G_ASHR:
809 return AArch64::ASRVWr;
810 default:
811 return GenericOpc;
812 }
813 } else if (OpSize == 64) {
814 switch (GenericOpc) {
815 case TargetOpcode::G_PTR_ADD:
816 return AArch64::ADDXrr;
817 case TargetOpcode::G_SHL:
818 return AArch64::LSLVXr;
819 case TargetOpcode::G_LSHR:
820 return AArch64::LSRVXr;
821 case TargetOpcode::G_ASHR:
822 return AArch64::ASRVXr;
823 default:
824 return GenericOpc;
825 }
826 }
827 break;
828 case AArch64::FPRRegBankID:
829 switch (OpSize) {
830 case 32:
831 switch (GenericOpc) {
832 case TargetOpcode::G_FADD:
833 return AArch64::FADDSrr;
834 case TargetOpcode::G_FSUB:
835 return AArch64::FSUBSrr;
836 case TargetOpcode::G_FMUL:
837 return AArch64::FMULSrr;
838 case TargetOpcode::G_FDIV:
839 return AArch64::FDIVSrr;
840 default:
841 return GenericOpc;
842 }
843 case 64:
844 switch (GenericOpc) {
845 case TargetOpcode::G_FADD:
846 return AArch64::FADDDrr;
847 case TargetOpcode::G_FSUB:
848 return AArch64::FSUBDrr;
849 case TargetOpcode::G_FMUL:
850 return AArch64::FMULDrr;
851 case TargetOpcode::G_FDIV:
852 return AArch64::FDIVDrr;
853 case TargetOpcode::G_OR:
854 return AArch64::ORRv8i8;
855 default:
856 return GenericOpc;
857 }
858 }
859 break;
860 }
861 return GenericOpc;
862}
863
864/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
865/// appropriate for the (value) register bank \p RegBankID and of memory access
866/// size \p OpSize. This returns the variant with the base+unsigned-immediate
867/// addressing mode (e.g., LDRXui).
868/// \returns \p GenericOpc if the combination is unsupported.
869static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
870 unsigned OpSize) {
871 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
872 switch (RegBankID) {
873 case AArch64::GPRRegBankID:
874 switch (OpSize) {
875 case 8:
876 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
877 case 16:
878 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
879 case 32:
880 return isStore ? AArch64::STRWui : AArch64::LDRWui;
881 case 64:
882 return isStore ? AArch64::STRXui : AArch64::LDRXui;
883 }
884 break;
885 case AArch64::FPRRegBankID:
886 switch (OpSize) {
887 case 8:
888 return isStore ? AArch64::STRBui : AArch64::LDRBui;
889 case 16:
890 return isStore ? AArch64::STRHui : AArch64::LDRHui;
891 case 32:
892 return isStore ? AArch64::STRSui : AArch64::LDRSui;
893 case 64:
894 return isStore ? AArch64::STRDui : AArch64::LDRDui;
895 case 128:
896 return isStore ? AArch64::STRQui : AArch64::LDRQui;
897 }
898 break;
899 }
900 return GenericOpc;
901}
902
903/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
904/// to \p *To.
905///
906/// E.g "To = COPY SrcReg:SubReg"
908 const RegisterBankInfo &RBI, Register SrcReg,
909 const TargetRegisterClass *To, unsigned SubReg) {
910 assert(SrcReg.isValid() && "Expected a valid source register?");
911 assert(To && "Destination register class cannot be null");
912 assert(SubReg && "Expected a valid subregister");
913
914 MachineIRBuilder MIB(I);
915 auto SubRegCopy =
916 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
917 MachineOperand &RegOp = I.getOperand(1);
918 RegOp.setReg(SubRegCopy.getReg(0));
919
920 // It's possible that the destination register won't be constrained. Make
921 // sure that happens.
922 if (!I.getOperand(0).getReg().isPhysical())
923 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
924
925 return true;
926}
927
928/// Helper function to get the source and destination register classes for a
929/// copy. Returns a std::pair containing the source register class for the
930/// copy, and the destination register class for the copy. If a register class
931/// cannot be determined, then it will be nullptr.
932static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
935 const RegisterBankInfo &RBI) {
936 Register DstReg = I.getOperand(0).getReg();
937 Register SrcReg = I.getOperand(1).getReg();
938 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
939 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
940 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
941 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
942
943 // Special casing for cross-bank copies of s1s. We can technically represent
944 // a 1-bit value with any size of register. The minimum size for a GPR is 32
945 // bits. So, we need to put the FPR on 32 bits as well.
946 //
947 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
948 // then we can pull it into the helpers that get the appropriate class for a
949 // register bank. Or make a new helper that carries along some constraint
950 // information.
951 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
952 SrcSize = DstSize = 32;
953
954 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
955 getMinClassForRegBank(DstRegBank, DstSize, true)};
956}
957
958// FIXME: We need some sort of API in RBI/TRI to allow generic code to
959// constrain operands of simple instructions given a TargetRegisterClass
960// and LLT
962 const RegisterBankInfo &RBI) {
963 for (MachineOperand &MO : I.operands()) {
964 if (!MO.isReg())
965 continue;
966 Register Reg = MO.getReg();
967 if (!Reg)
968 continue;
969 if (Reg.isPhysical())
970 continue;
971 LLT Ty = MRI.getType(Reg);
972 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
973 const TargetRegisterClass *RC =
974 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
975 if (!RC) {
976 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
977 RC = getRegClassForTypeOnBank(Ty, RB);
978 if (!RC) {
980 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
981 break;
982 }
983 }
984 RBI.constrainGenericRegister(Reg, *RC, MRI);
985 }
986
987 return true;
988}
989
992 const RegisterBankInfo &RBI) {
993 Register DstReg = I.getOperand(0).getReg();
994 Register SrcReg = I.getOperand(1).getReg();
995 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
996 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
997
998 // Find the correct register classes for the source and destination registers.
999 const TargetRegisterClass *SrcRC;
1000 const TargetRegisterClass *DstRC;
1001 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1002
1003 if (!DstRC) {
1004 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1005 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1006 return false;
1007 }
1008
1009 // Is this a copy? If so, then we may need to insert a subregister copy.
1010 if (I.isCopy()) {
1011 // Yes. Check if there's anything to fix up.
1012 if (!SrcRC) {
1013 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1014 return false;
1015 }
1016
1017 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
1018 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
1019 unsigned SubReg;
1020
1021 // If the source bank doesn't support a subregister copy small enough,
1022 // then we first need to copy to the destination bank.
1023 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1024 const TargetRegisterClass *DstTempRC =
1025 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1026 getSubRegForClass(DstRC, TRI, SubReg);
1027
1028 MachineIRBuilder MIB(I);
1029 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1030 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1031 } else if (SrcSize > DstSize) {
1032 // If the source register is bigger than the destination we need to
1033 // perform a subregister copy.
1034 const TargetRegisterClass *SubRegRC =
1035 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1036 getSubRegForClass(SubRegRC, TRI, SubReg);
1037 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1038 } else if (DstSize > SrcSize) {
1039 // If the destination register is bigger than the source we need to do
1040 // a promotion using SUBREG_TO_REG.
1041 const TargetRegisterClass *PromotionRC =
1042 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1043 getSubRegForClass(SrcRC, TRI, SubReg);
1044
1045 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1046 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1047 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1048 .addImm(0)
1049 .addUse(SrcReg)
1050 .addImm(SubReg);
1051 MachineOperand &RegOp = I.getOperand(1);
1052 RegOp.setReg(PromoteReg);
1053 }
1054
1055 // If the destination is a physical register, then there's nothing to
1056 // change, so we're done.
1057 if (DstReg.isPhysical())
1058 return true;
1059 }
1060
1061 // No need to constrain SrcReg. It will get constrained when we hit another
1062 // of its use or its defs. Copies do not have constraints.
1063 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1064 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1065 << " operand\n");
1066 return false;
1067 }
1068
1069 // If this a GPR ZEXT that we want to just reduce down into a copy.
1070 // The sizes will be mismatched with the source < 32b but that's ok.
1071 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1072 I.setDesc(TII.get(AArch64::COPY));
1073 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1074 return selectCopy(I, TII, MRI, TRI, RBI);
1075 }
1076
1077 I.setDesc(TII.get(AArch64::COPY));
1078 return true;
1079}
1080
1081static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1082 if (!DstTy.isScalar() || !SrcTy.isScalar())
1083 return GenericOpc;
1084
1085 const unsigned DstSize = DstTy.getSizeInBits();
1086 const unsigned SrcSize = SrcTy.getSizeInBits();
1087
1088 switch (DstSize) {
1089 case 32:
1090 switch (SrcSize) {
1091 case 32:
1092 switch (GenericOpc) {
1093 case TargetOpcode::G_SITOFP:
1094 return AArch64::SCVTFUWSri;
1095 case TargetOpcode::G_UITOFP:
1096 return AArch64::UCVTFUWSri;
1097 case TargetOpcode::G_FPTOSI:
1098 return AArch64::FCVTZSUWSr;
1099 case TargetOpcode::G_FPTOUI:
1100 return AArch64::FCVTZUUWSr;
1101 default:
1102 return GenericOpc;
1103 }
1104 case 64:
1105 switch (GenericOpc) {
1106 case TargetOpcode::G_SITOFP:
1107 return AArch64::SCVTFUXSri;
1108 case TargetOpcode::G_UITOFP:
1109 return AArch64::UCVTFUXSri;
1110 case TargetOpcode::G_FPTOSI:
1111 return AArch64::FCVTZSUWDr;
1112 case TargetOpcode::G_FPTOUI:
1113 return AArch64::FCVTZUUWDr;
1114 default:
1115 return GenericOpc;
1116 }
1117 default:
1118 return GenericOpc;
1119 }
1120 case 64:
1121 switch (SrcSize) {
1122 case 32:
1123 switch (GenericOpc) {
1124 case TargetOpcode::G_SITOFP:
1125 return AArch64::SCVTFUWDri;
1126 case TargetOpcode::G_UITOFP:
1127 return AArch64::UCVTFUWDri;
1128 case TargetOpcode::G_FPTOSI:
1129 return AArch64::FCVTZSUXSr;
1130 case TargetOpcode::G_FPTOUI:
1131 return AArch64::FCVTZUUXSr;
1132 default:
1133 return GenericOpc;
1134 }
1135 case 64:
1136 switch (GenericOpc) {
1137 case TargetOpcode::G_SITOFP:
1138 return AArch64::SCVTFUXDri;
1139 case TargetOpcode::G_UITOFP:
1140 return AArch64::UCVTFUXDri;
1141 case TargetOpcode::G_FPTOSI:
1142 return AArch64::FCVTZSUXDr;
1143 case TargetOpcode::G_FPTOUI:
1144 return AArch64::FCVTZUUXDr;
1145 default:
1146 return GenericOpc;
1147 }
1148 default:
1149 return GenericOpc;
1150 }
1151 default:
1152 return GenericOpc;
1153 };
1154 return GenericOpc;
1155}
1156
1158AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1160 MachineIRBuilder &MIB) const {
1161 MachineRegisterInfo &MRI = *MIB.getMRI();
1162 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1163 RBI.getRegBank(True, MRI, TRI)->getID() &&
1164 "Expected both select operands to have the same regbank?");
1165 LLT Ty = MRI.getType(True);
1166 if (Ty.isVector())
1167 return nullptr;
1168 const unsigned Size = Ty.getSizeInBits();
1169 assert((Size == 32 || Size == 64) &&
1170 "Expected 32 bit or 64 bit select only?");
1171 const bool Is32Bit = Size == 32;
1172 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1173 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1174 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1176 return &*FCSel;
1177 }
1178
1179 // By default, we'll try and emit a CSEL.
1180 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1181 bool Optimized = false;
1182 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1183 &Optimized](Register &Reg, Register &OtherReg,
1184 bool Invert) {
1185 if (Optimized)
1186 return false;
1187
1188 // Attempt to fold:
1189 //
1190 // %sub = G_SUB 0, %x
1191 // %select = G_SELECT cc, %reg, %sub
1192 //
1193 // Into:
1194 // %select = CSNEG %reg, %x, cc
1195 Register MatchReg;
1196 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1197 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1198 Reg = MatchReg;
1199 if (Invert) {
1201 std::swap(Reg, OtherReg);
1202 }
1203 return true;
1204 }
1205
1206 // Attempt to fold:
1207 //
1208 // %xor = G_XOR %x, -1
1209 // %select = G_SELECT cc, %reg, %xor
1210 //
1211 // Into:
1212 // %select = CSINV %reg, %x, cc
1213 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1214 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1215 Reg = MatchReg;
1216 if (Invert) {
1218 std::swap(Reg, OtherReg);
1219 }
1220 return true;
1221 }
1222
1223 // Attempt to fold:
1224 //
1225 // %add = G_ADD %x, 1
1226 // %select = G_SELECT cc, %reg, %add
1227 //
1228 // Into:
1229 // %select = CSINC %reg, %x, cc
1230 if (mi_match(Reg, MRI,
1231 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1232 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1233 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1234 Reg = MatchReg;
1235 if (Invert) {
1237 std::swap(Reg, OtherReg);
1238 }
1239 return true;
1240 }
1241
1242 return false;
1243 };
1244
1245 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1246 // true/false values are constants.
1247 // FIXME: All of these patterns already exist in tablegen. We should be
1248 // able to import these.
1249 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1250 &Optimized]() {
1251 if (Optimized)
1252 return false;
1253 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1254 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1255 if (!TrueCst && !FalseCst)
1256 return false;
1257
1258 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1259 if (TrueCst && FalseCst) {
1260 int64_t T = TrueCst->Value.getSExtValue();
1261 int64_t F = FalseCst->Value.getSExtValue();
1262
1263 if (T == 0 && F == 1) {
1264 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1265 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1266 True = ZReg;
1267 False = ZReg;
1268 return true;
1269 }
1270
1271 if (T == 0 && F == -1) {
1272 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1273 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1274 True = ZReg;
1275 False = ZReg;
1276 return true;
1277 }
1278 }
1279
1280 if (TrueCst) {
1281 int64_t T = TrueCst->Value.getSExtValue();
1282 if (T == 1) {
1283 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1284 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1285 True = False;
1286 False = ZReg;
1288 return true;
1289 }
1290
1291 if (T == -1) {
1292 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1293 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1294 True = False;
1295 False = ZReg;
1297 return true;
1298 }
1299 }
1300
1301 if (FalseCst) {
1302 int64_t F = FalseCst->Value.getSExtValue();
1303 if (F == 1) {
1304 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1305 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1306 False = ZReg;
1307 return true;
1308 }
1309
1310 if (F == -1) {
1311 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1312 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1313 False = ZReg;
1314 return true;
1315 }
1316 }
1317 return false;
1318 };
1319
1320 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1321 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1322 Optimized |= TryOptSelectCst();
1323 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1325 return &*SelectInst;
1326}
1327
1329 switch (P) {
1330 default:
1331 llvm_unreachable("Unknown condition code!");
1332 case CmpInst::ICMP_NE:
1333 return AArch64CC::NE;
1334 case CmpInst::ICMP_EQ:
1335 return AArch64CC::EQ;
1336 case CmpInst::ICMP_SGT:
1337 return AArch64CC::GT;
1338 case CmpInst::ICMP_SGE:
1339 return AArch64CC::GE;
1340 case CmpInst::ICMP_SLT:
1341 return AArch64CC::LT;
1342 case CmpInst::ICMP_SLE:
1343 return AArch64CC::LE;
1344 case CmpInst::ICMP_UGT:
1345 return AArch64CC::HI;
1346 case CmpInst::ICMP_UGE:
1347 return AArch64CC::HS;
1348 case CmpInst::ICMP_ULT:
1349 return AArch64CC::LO;
1350 case CmpInst::ICMP_ULE:
1351 return AArch64CC::LS;
1352 }
1353}
1354
1355/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1357 AArch64CC::CondCode &CondCode,
1358 AArch64CC::CondCode &CondCode2) {
1359 CondCode2 = AArch64CC::AL;
1360 switch (CC) {
1361 default:
1362 llvm_unreachable("Unknown FP condition!");
1363 case CmpInst::FCMP_OEQ:
1364 CondCode = AArch64CC::EQ;
1365 break;
1366 case CmpInst::FCMP_OGT:
1367 CondCode = AArch64CC::GT;
1368 break;
1369 case CmpInst::FCMP_OGE:
1370 CondCode = AArch64CC::GE;
1371 break;
1372 case CmpInst::FCMP_OLT:
1373 CondCode = AArch64CC::MI;
1374 break;
1375 case CmpInst::FCMP_OLE:
1376 CondCode = AArch64CC::LS;
1377 break;
1378 case CmpInst::FCMP_ONE:
1379 CondCode = AArch64CC::MI;
1380 CondCode2 = AArch64CC::GT;
1381 break;
1382 case CmpInst::FCMP_ORD:
1383 CondCode = AArch64CC::VC;
1384 break;
1385 case CmpInst::FCMP_UNO:
1386 CondCode = AArch64CC::VS;
1387 break;
1388 case CmpInst::FCMP_UEQ:
1389 CondCode = AArch64CC::EQ;
1390 CondCode2 = AArch64CC::VS;
1391 break;
1392 case CmpInst::FCMP_UGT:
1393 CondCode = AArch64CC::HI;
1394 break;
1395 case CmpInst::FCMP_UGE:
1396 CondCode = AArch64CC::PL;
1397 break;
1398 case CmpInst::FCMP_ULT:
1399 CondCode = AArch64CC::LT;
1400 break;
1401 case CmpInst::FCMP_ULE:
1402 CondCode = AArch64CC::LE;
1403 break;
1404 case CmpInst::FCMP_UNE:
1405 CondCode = AArch64CC::NE;
1406 break;
1407 }
1408}
1409
1410/// Convert an IR fp condition code to an AArch64 CC.
1411/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1412/// should be AND'ed instead of OR'ed.
1414 AArch64CC::CondCode &CondCode,
1415 AArch64CC::CondCode &CondCode2) {
1416 CondCode2 = AArch64CC::AL;
1417 switch (CC) {
1418 default:
1419 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1420 assert(CondCode2 == AArch64CC::AL);
1421 break;
1422 case CmpInst::FCMP_ONE:
1423 // (a one b)
1424 // == ((a olt b) || (a ogt b))
1425 // == ((a ord b) && (a une b))
1426 CondCode = AArch64CC::VC;
1427 CondCode2 = AArch64CC::NE;
1428 break;
1429 case CmpInst::FCMP_UEQ:
1430 // (a ueq b)
1431 // == ((a uno b) || (a oeq b))
1432 // == ((a ule b) && (a uge b))
1433 CondCode = AArch64CC::PL;
1434 CondCode2 = AArch64CC::LE;
1435 break;
1436 }
1437}
1438
1439/// Return a register which can be used as a bit to test in a TB(N)Z.
1440static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1442 assert(Reg.isValid() && "Expected valid register!");
1443 bool HasZext = false;
1444 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1445 unsigned Opc = MI->getOpcode();
1446
1447 if (!MI->getOperand(0).isReg() ||
1448 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1449 break;
1450
1451 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1452 //
1453 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1454 // on the truncated x is the same as the bit number on x.
1455 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1456 Opc == TargetOpcode::G_TRUNC) {
1457 if (Opc == TargetOpcode::G_ZEXT)
1458 HasZext = true;
1459
1460 Register NextReg = MI->getOperand(1).getReg();
1461 // Did we find something worth folding?
1462 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1463 break;
1464
1465 // NextReg is worth folding. Keep looking.
1466 Reg = NextReg;
1467 continue;
1468 }
1469
1470 // Attempt to find a suitable operation with a constant on one side.
1471 std::optional<uint64_t> C;
1472 Register TestReg;
1473 switch (Opc) {
1474 default:
1475 break;
1476 case TargetOpcode::G_AND:
1477 case TargetOpcode::G_XOR: {
1478 TestReg = MI->getOperand(1).getReg();
1479 Register ConstantReg = MI->getOperand(2).getReg();
1480 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1481 if (!VRegAndVal) {
1482 // AND commutes, check the other side for a constant.
1483 // FIXME: Can we canonicalize the constant so that it's always on the
1484 // same side at some point earlier?
1485 std::swap(ConstantReg, TestReg);
1486 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1487 }
1488 if (VRegAndVal) {
1489 if (HasZext)
1490 C = VRegAndVal->Value.getZExtValue();
1491 else
1492 C = VRegAndVal->Value.getSExtValue();
1493 }
1494 break;
1495 }
1496 case TargetOpcode::G_ASHR:
1497 case TargetOpcode::G_LSHR:
1498 case TargetOpcode::G_SHL: {
1499 TestReg = MI->getOperand(1).getReg();
1500 auto VRegAndVal =
1501 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1502 if (VRegAndVal)
1503 C = VRegAndVal->Value.getSExtValue();
1504 break;
1505 }
1506 }
1507
1508 // Didn't find a constant or viable register. Bail out of the loop.
1509 if (!C || !TestReg.isValid())
1510 break;
1511
1512 // We found a suitable instruction with a constant. Check to see if we can
1513 // walk through the instruction.
1514 Register NextReg;
1515 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1516 switch (Opc) {
1517 default:
1518 break;
1519 case TargetOpcode::G_AND:
1520 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1521 if ((*C >> Bit) & 1)
1522 NextReg = TestReg;
1523 break;
1524 case TargetOpcode::G_SHL:
1525 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1526 // the type of the register.
1527 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1528 NextReg = TestReg;
1529 Bit = Bit - *C;
1530 }
1531 break;
1532 case TargetOpcode::G_ASHR:
1533 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1534 // in x
1535 NextReg = TestReg;
1536 Bit = Bit + *C;
1537 if (Bit >= TestRegSize)
1538 Bit = TestRegSize - 1;
1539 break;
1540 case TargetOpcode::G_LSHR:
1541 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1542 if ((Bit + *C) < TestRegSize) {
1543 NextReg = TestReg;
1544 Bit = Bit + *C;
1545 }
1546 break;
1547 case TargetOpcode::G_XOR:
1548 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1549 // appropriate.
1550 //
1551 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1552 //
1553 // tbz x', b -> tbnz x, b
1554 //
1555 // Because x' only has the b-th bit set if x does not.
1556 if ((*C >> Bit) & 1)
1557 Invert = !Invert;
1558 NextReg = TestReg;
1559 break;
1560 }
1561
1562 // Check if we found anything worth folding.
1563 if (!NextReg.isValid())
1564 return Reg;
1565 Reg = NextReg;
1566 }
1567
1568 return Reg;
1569}
1570
1571MachineInstr *AArch64InstructionSelector::emitTestBit(
1572 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1573 MachineIRBuilder &MIB) const {
1574 assert(TestReg.isValid());
1575 assert(ProduceNonFlagSettingCondBr &&
1576 "Cannot emit TB(N)Z with speculation tracking!");
1577 MachineRegisterInfo &MRI = *MIB.getMRI();
1578
1579 // Attempt to optimize the test bit by walking over instructions.
1580 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1581 LLT Ty = MRI.getType(TestReg);
1582 unsigned Size = Ty.getSizeInBits();
1583 assert(!Ty.isVector() && "Expected a scalar!");
1584 assert(Bit < 64 && "Bit is too large!");
1585
1586 // When the test register is a 64-bit register, we have to narrow to make
1587 // TBNZW work.
1588 bool UseWReg = Bit < 32;
1589 unsigned NecessarySize = UseWReg ? 32 : 64;
1590 if (Size != NecessarySize)
1591 TestReg = moveScalarRegClass(
1592 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1593 MIB);
1594
1595 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1596 {AArch64::TBZW, AArch64::TBNZW}};
1597 unsigned Opc = OpcTable[UseWReg][IsNegative];
1598 auto TestBitMI =
1599 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1600 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1601 return &*TestBitMI;
1602}
1603
1604bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1605 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1606 MachineIRBuilder &MIB) const {
1607 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1608 // Given something like this:
1609 //
1610 // %x = ...Something...
1611 // %one = G_CONSTANT i64 1
1612 // %zero = G_CONSTANT i64 0
1613 // %and = G_AND %x, %one
1614 // %cmp = G_ICMP intpred(ne), %and, %zero
1615 // %cmp_trunc = G_TRUNC %cmp
1616 // G_BRCOND %cmp_trunc, %bb.3
1617 //
1618 // We want to try and fold the AND into the G_BRCOND and produce either a
1619 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1620 //
1621 // In this case, we'd get
1622 //
1623 // TBNZ %x %bb.3
1624 //
1625
1626 // Check if the AND has a constant on its RHS which we can use as a mask.
1627 // If it's a power of 2, then it's the same as checking a specific bit.
1628 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1629 auto MaybeBit = getIConstantVRegValWithLookThrough(
1630 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1631 if (!MaybeBit)
1632 return false;
1633
1634 int32_t Bit = MaybeBit->Value.exactLogBase2();
1635 if (Bit < 0)
1636 return false;
1637
1638 Register TestReg = AndInst.getOperand(1).getReg();
1639
1640 // Emit a TB(N)Z.
1641 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1642 return true;
1643}
1644
1645MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1646 bool IsNegative,
1647 MachineBasicBlock *DestMBB,
1648 MachineIRBuilder &MIB) const {
1649 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1650 MachineRegisterInfo &MRI = *MIB.getMRI();
1651 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1652 AArch64::GPRRegBankID &&
1653 "Expected GPRs only?");
1654 auto Ty = MRI.getType(CompareReg);
1655 unsigned Width = Ty.getSizeInBits();
1656 assert(!Ty.isVector() && "Expected scalar only?");
1657 assert(Width <= 64 && "Expected width to be at most 64?");
1658 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1659 {AArch64::CBNZW, AArch64::CBNZX}};
1660 unsigned Opc = OpcTable[IsNegative][Width == 64];
1661 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1662 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1663 return &*BranchMI;
1664}
1665
1666bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1667 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1668 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1669 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1670 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1671 // totally clean. Some of them require two branches to implement.
1672 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1673 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1674 Pred);
1675 AArch64CC::CondCode CC1, CC2;
1676 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1677 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1678 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1679 if (CC2 != AArch64CC::AL)
1680 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1681 I.eraseFromParent();
1682 return true;
1683}
1684
1685bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1686 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1687 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1688 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1689 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1690 //
1691 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1692 // instructions will not be produced, as they are conditional branch
1693 // instructions that do not set flags.
1694 if (!ProduceNonFlagSettingCondBr)
1695 return false;
1696
1697 MachineRegisterInfo &MRI = *MIB.getMRI();
1698 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1699 auto Pred =
1700 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1701 Register LHS = ICmp.getOperand(2).getReg();
1702 Register RHS = ICmp.getOperand(3).getReg();
1703
1704 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1705 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1706 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1707
1708 // When we can emit a TB(N)Z, prefer that.
1709 //
1710 // Handle non-commutative condition codes first.
1711 // Note that we don't want to do this when we have a G_AND because it can
1712 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1713 if (VRegAndVal && !AndInst) {
1714 int64_t C = VRegAndVal->Value.getSExtValue();
1715
1716 // When we have a greater-than comparison, we can just test if the msb is
1717 // zero.
1718 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1719 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1720 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1721 I.eraseFromParent();
1722 return true;
1723 }
1724
1725 // When we have a less than comparison, we can just test if the msb is not
1726 // zero.
1727 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1728 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1729 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1730 I.eraseFromParent();
1731 return true;
1732 }
1733
1734 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1735 // we can test if the msb is zero.
1736 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1737 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1738 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1739 I.eraseFromParent();
1740 return true;
1741 }
1742 }
1743
1744 // Attempt to handle commutative condition codes. Right now, that's only
1745 // eq/ne.
1746 if (ICmpInst::isEquality(Pred)) {
1747 if (!VRegAndVal) {
1748 std::swap(RHS, LHS);
1749 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1750 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1751 }
1752
1753 if (VRegAndVal && VRegAndVal->Value == 0) {
1754 // If there's a G_AND feeding into this branch, try to fold it away by
1755 // emitting a TB(N)Z instead.
1756 //
1757 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1758 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1759 // would be redundant.
1760 if (AndInst &&
1761 tryOptAndIntoCompareBranch(
1762 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1763 I.eraseFromParent();
1764 return true;
1765 }
1766
1767 // Otherwise, try to emit a CB(N)Z instead.
1768 auto LHSTy = MRI.getType(LHS);
1769 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1770 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1771 I.eraseFromParent();
1772 return true;
1773 }
1774 }
1775 }
1776
1777 return false;
1778}
1779
1780bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1781 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1782 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1783 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1784 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1785 return true;
1786
1787 // Couldn't optimize. Emit a compare + a Bcc.
1788 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1789 auto PredOp = ICmp.getOperand(1);
1790 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1792 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1793 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1794 I.eraseFromParent();
1795 return true;
1796}
1797
1798bool AArch64InstructionSelector::selectCompareBranch(
1800 Register CondReg = I.getOperand(0).getReg();
1801 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1802 // Try to select the G_BRCOND using whatever is feeding the condition if
1803 // possible.
1804 unsigned CCMIOpc = CCMI->getOpcode();
1805 if (CCMIOpc == TargetOpcode::G_FCMP)
1806 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1807 if (CCMIOpc == TargetOpcode::G_ICMP)
1808 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1809
1810 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1811 // instructions will not be produced, as they are conditional branch
1812 // instructions that do not set flags.
1813 if (ProduceNonFlagSettingCondBr) {
1814 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1815 I.getOperand(1).getMBB(), MIB);
1816 I.eraseFromParent();
1817 return true;
1818 }
1819
1820 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1821 auto TstMI =
1822 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1824 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1826 .addMBB(I.getOperand(1).getMBB());
1827 I.eraseFromParent();
1828 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1829}
1830
1831/// Returns the element immediate value of a vector shift operand if found.
1832/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1833static std::optional<int64_t> getVectorShiftImm(Register Reg,
1835 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1836 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1837 return getAArch64VectorSplatScalar(*OpMI, MRI);
1838}
1839
1840/// Matches and returns the shift immediate value for a SHL instruction given
1841/// a shift operand.
1842static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1844 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1845 if (!ShiftImm)
1846 return std::nullopt;
1847 // Check the immediate is in range for a SHL.
1848 int64_t Imm = *ShiftImm;
1849 if (Imm < 0)
1850 return std::nullopt;
1851 switch (SrcTy.getElementType().getSizeInBits()) {
1852 default:
1853 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1854 return std::nullopt;
1855 case 8:
1856 if (Imm > 7)
1857 return std::nullopt;
1858 break;
1859 case 16:
1860 if (Imm > 15)
1861 return std::nullopt;
1862 break;
1863 case 32:
1864 if (Imm > 31)
1865 return std::nullopt;
1866 break;
1867 case 64:
1868 if (Imm > 63)
1869 return std::nullopt;
1870 break;
1871 }
1872 return Imm;
1873}
1874
1875bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1877 assert(I.getOpcode() == TargetOpcode::G_SHL);
1878 Register DstReg = I.getOperand(0).getReg();
1879 const LLT Ty = MRI.getType(DstReg);
1880 Register Src1Reg = I.getOperand(1).getReg();
1881 Register Src2Reg = I.getOperand(2).getReg();
1882
1883 if (!Ty.isVector())
1884 return false;
1885
1886 // Check if we have a vector of constants on RHS that we can select as the
1887 // immediate form.
1888 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1889
1890 unsigned Opc = 0;
1891 if (Ty == LLT::fixed_vector(2, 64)) {
1892 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1893 } else if (Ty == LLT::fixed_vector(4, 32)) {
1894 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1895 } else if (Ty == LLT::fixed_vector(2, 32)) {
1896 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1897 } else if (Ty == LLT::fixed_vector(4, 16)) {
1898 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1899 } else if (Ty == LLT::fixed_vector(8, 16)) {
1900 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1901 } else if (Ty == LLT::fixed_vector(16, 8)) {
1902 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1903 } else if (Ty == LLT::fixed_vector(8, 8)) {
1904 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1905 } else {
1906 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1907 return false;
1908 }
1909
1910 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1911 if (ImmVal)
1912 Shl.addImm(*ImmVal);
1913 else
1914 Shl.addUse(Src2Reg);
1916 I.eraseFromParent();
1917 return true;
1918}
1919
1920bool AArch64InstructionSelector::selectVectorAshrLshr(
1922 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1923 I.getOpcode() == TargetOpcode::G_LSHR);
1924 Register DstReg = I.getOperand(0).getReg();
1925 const LLT Ty = MRI.getType(DstReg);
1926 Register Src1Reg = I.getOperand(1).getReg();
1927 Register Src2Reg = I.getOperand(2).getReg();
1928
1929 if (!Ty.isVector())
1930 return false;
1931
1932 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1933
1934 // We expect the immediate case to be lowered in the PostLegalCombiner to
1935 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1936
1937 // There is not a shift right register instruction, but the shift left
1938 // register instruction takes a signed value, where negative numbers specify a
1939 // right shift.
1940
1941 unsigned Opc = 0;
1942 unsigned NegOpc = 0;
1943 const TargetRegisterClass *RC =
1944 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1945 if (Ty == LLT::fixed_vector(2, 64)) {
1946 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1947 NegOpc = AArch64::NEGv2i64;
1948 } else if (Ty == LLT::fixed_vector(4, 32)) {
1949 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1950 NegOpc = AArch64::NEGv4i32;
1951 } else if (Ty == LLT::fixed_vector(2, 32)) {
1952 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1953 NegOpc = AArch64::NEGv2i32;
1954 } else if (Ty == LLT::fixed_vector(4, 16)) {
1955 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1956 NegOpc = AArch64::NEGv4i16;
1957 } else if (Ty == LLT::fixed_vector(8, 16)) {
1958 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1959 NegOpc = AArch64::NEGv8i16;
1960 } else if (Ty == LLT::fixed_vector(16, 8)) {
1961 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1962 NegOpc = AArch64::NEGv16i8;
1963 } else if (Ty == LLT::fixed_vector(8, 8)) {
1964 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1965 NegOpc = AArch64::NEGv8i8;
1966 } else {
1967 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1968 return false;
1969 }
1970
1971 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1973 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1975 I.eraseFromParent();
1976 return true;
1977}
1978
1979bool AArch64InstructionSelector::selectVaStartAAPCS(
1981 return false;
1982}
1983
1984bool AArch64InstructionSelector::selectVaStartDarwin(
1987 Register ListReg = I.getOperand(0).getReg();
1988
1989 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1990
1991 int FrameIdx = FuncInfo->getVarArgsStackIndex();
1993 MF.getFunction().getCallingConv())) {
1994 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
1995 ? FuncInfo->getVarArgsGPRIndex()
1996 : FuncInfo->getVarArgsStackIndex();
1997 }
1998
1999 auto MIB =
2000 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2001 .addDef(ArgsAddrReg)
2002 .addFrameIndex(FrameIdx)
2003 .addImm(0)
2004 .addImm(0);
2005
2007
2008 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2009 .addUse(ArgsAddrReg)
2010 .addUse(ListReg)
2011 .addImm(0)
2012 .addMemOperand(*I.memoperands_begin());
2013
2015 I.eraseFromParent();
2016 return true;
2017}
2018
2019void AArch64InstructionSelector::materializeLargeCMVal(
2020 MachineInstr &I, const Value *V, unsigned OpFlags) {
2021 MachineBasicBlock &MBB = *I.getParent();
2022 MachineFunction &MF = *MBB.getParent();
2024
2025 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2026 MovZ->addOperand(MF, I.getOperand(1));
2027 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2029 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2031
2032 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2033 Register ForceDstReg) {
2034 Register DstReg = ForceDstReg
2035 ? ForceDstReg
2036 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2037 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2038 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2040 GV, MovZ->getOperand(1).getOffset(), Flags));
2041 } else {
2042 MovI->addOperand(
2043 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2044 MovZ->getOperand(1).getOffset(), Flags));
2045 }
2046 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2048 return DstReg;
2049 };
2050 Register DstReg = BuildMovK(MovZ.getReg(0),
2052 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2053 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2054}
2055
2056bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2057 MachineBasicBlock &MBB = *I.getParent();
2058 MachineFunction &MF = *MBB.getParent();
2060
2061 switch (I.getOpcode()) {
2062 case TargetOpcode::G_STORE: {
2063 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2064 MachineOperand &SrcOp = I.getOperand(0);
2065 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2066 // Allow matching with imported patterns for stores of pointers. Unlike
2067 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2068 // and constrain.
2069 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2070 Register NewSrc = Copy.getReg(0);
2071 SrcOp.setReg(NewSrc);
2072 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2073 Changed = true;
2074 }
2075 return Changed;
2076 }
2077 case TargetOpcode::G_PTR_ADD:
2078 return convertPtrAddToAdd(I, MRI);
2079 case TargetOpcode::G_LOAD: {
2080 // For scalar loads of pointers, we try to convert the dest type from p0
2081 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2082 // conversion, this should be ok because all users should have been
2083 // selected already, so the type doesn't matter for them.
2084 Register DstReg = I.getOperand(0).getReg();
2085 const LLT DstTy = MRI.getType(DstReg);
2086 if (!DstTy.isPointer())
2087 return false;
2088 MRI.setType(DstReg, LLT::scalar(64));
2089 return true;
2090 }
2091 case AArch64::G_DUP: {
2092 // Convert the type from p0 to s64 to help selection.
2093 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2094 if (!DstTy.isPointerVector())
2095 return false;
2096 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2097 MRI.setType(I.getOperand(0).getReg(),
2098 DstTy.changeElementType(LLT::scalar(64)));
2099 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2100 I.getOperand(1).setReg(NewSrc.getReg(0));
2101 return true;
2102 }
2103 case TargetOpcode::G_UITOFP:
2104 case TargetOpcode::G_SITOFP: {
2105 // If both source and destination regbanks are FPR, then convert the opcode
2106 // to G_SITOF so that the importer can select it to an fpr variant.
2107 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2108 // copy.
2109 Register SrcReg = I.getOperand(1).getReg();
2110 LLT SrcTy = MRI.getType(SrcReg);
2111 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2112 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2113 return false;
2114
2115 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2116 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2117 I.setDesc(TII.get(AArch64::G_SITOF));
2118 else
2119 I.setDesc(TII.get(AArch64::G_UITOF));
2120 return true;
2121 }
2122 return false;
2123 }
2124 default:
2125 return false;
2126 }
2127}
2128
2129/// This lowering tries to look for G_PTR_ADD instructions and then converts
2130/// them to a standard G_ADD with a COPY on the source.
2131///
2132/// The motivation behind this is to expose the add semantics to the imported
2133/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2134/// because the selector works bottom up, uses before defs. By the time we
2135/// end up trying to select a G_PTR_ADD, we should have already attempted to
2136/// fold this into addressing modes and were therefore unsuccessful.
2137bool AArch64InstructionSelector::convertPtrAddToAdd(
2139 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2140 Register DstReg = I.getOperand(0).getReg();
2141 Register AddOp1Reg = I.getOperand(1).getReg();
2142 const LLT PtrTy = MRI.getType(DstReg);
2143 if (PtrTy.getAddressSpace() != 0)
2144 return false;
2145
2146 const LLT CastPtrTy =
2147 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2148 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2149 // Set regbanks on the registers.
2150 if (PtrTy.isVector())
2151 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2152 else
2153 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2154
2155 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2156 // %dst(intty) = G_ADD %intbase, off
2157 I.setDesc(TII.get(TargetOpcode::G_ADD));
2158 MRI.setType(DstReg, CastPtrTy);
2159 I.getOperand(1).setReg(PtrToInt.getReg(0));
2160 if (!select(*PtrToInt)) {
2161 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2162 return false;
2163 }
2164
2165 // Also take the opportunity here to try to do some optimization.
2166 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2167 Register NegatedReg;
2168 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2169 return true;
2170 I.getOperand(2).setReg(NegatedReg);
2171 I.setDesc(TII.get(TargetOpcode::G_SUB));
2172 return true;
2173}
2174
2175bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2177 // We try to match the immediate variant of LSL, which is actually an alias
2178 // for a special case of UBFM. Otherwise, we fall back to the imported
2179 // selector which will match the register variant.
2180 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2181 const auto &MO = I.getOperand(2);
2182 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2183 if (!VRegAndVal)
2184 return false;
2185
2186 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2187 if (DstTy.isVector())
2188 return false;
2189 bool Is64Bit = DstTy.getSizeInBits() == 64;
2190 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2191 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2192
2193 if (!Imm1Fn || !Imm2Fn)
2194 return false;
2195
2196 auto NewI =
2197 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2198 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2199
2200 for (auto &RenderFn : *Imm1Fn)
2201 RenderFn(NewI);
2202 for (auto &RenderFn : *Imm2Fn)
2203 RenderFn(NewI);
2204
2205 I.eraseFromParent();
2206 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2207}
2208
2209bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2211 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2212 // If we're storing a scalar, it doesn't matter what register bank that
2213 // scalar is on. All that matters is the size.
2214 //
2215 // So, if we see something like this (with a 32-bit scalar as an example):
2216 //
2217 // %x:gpr(s32) = ... something ...
2218 // %y:fpr(s32) = COPY %x:gpr(s32)
2219 // G_STORE %y:fpr(s32)
2220 //
2221 // We can fix this up into something like this:
2222 //
2223 // G_STORE %x:gpr(s32)
2224 //
2225 // And then continue the selection process normally.
2226 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2227 if (!DefDstReg.isValid())
2228 return false;
2229 LLT DefDstTy = MRI.getType(DefDstReg);
2230 Register StoreSrcReg = I.getOperand(0).getReg();
2231 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2232
2233 // If we get something strange like a physical register, then we shouldn't
2234 // go any further.
2235 if (!DefDstTy.isValid())
2236 return false;
2237
2238 // Are the source and dst types the same size?
2239 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2240 return false;
2241
2242 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2243 RBI.getRegBank(DefDstReg, MRI, TRI))
2244 return false;
2245
2246 // We have a cross-bank copy, which is entering a store. Let's fold it.
2247 I.getOperand(0).setReg(DefDstReg);
2248 return true;
2249}
2250
2251bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2252 assert(I.getParent() && "Instruction should be in a basic block!");
2253 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2254
2255 MachineBasicBlock &MBB = *I.getParent();
2256 MachineFunction &MF = *MBB.getParent();
2258
2259 switch (I.getOpcode()) {
2260 case AArch64::G_DUP: {
2261 // Before selecting a DUP instruction, check if it is better selected as a
2262 // MOV or load from a constant pool.
2263 Register Src = I.getOperand(1).getReg();
2264 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2265 if (!ValAndVReg)
2266 return false;
2267 LLVMContext &Ctx = MF.getFunction().getContext();
2268 Register Dst = I.getOperand(0).getReg();
2270 MRI.getType(Dst).getNumElements(),
2271 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2272 ValAndVReg->Value));
2273 if (!emitConstantVector(Dst, CV, MIB, MRI))
2274 return false;
2275 I.eraseFromParent();
2276 return true;
2277 }
2278 case TargetOpcode::G_SEXT:
2279 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2280 // over a normal extend.
2281 if (selectUSMovFromExtend(I, MRI))
2282 return true;
2283 return false;
2284 case TargetOpcode::G_BR:
2285 return false;
2286 case TargetOpcode::G_SHL:
2287 return earlySelectSHL(I, MRI);
2288 case TargetOpcode::G_CONSTANT: {
2289 bool IsZero = false;
2290 if (I.getOperand(1).isCImm())
2291 IsZero = I.getOperand(1).getCImm()->isZero();
2292 else if (I.getOperand(1).isImm())
2293 IsZero = I.getOperand(1).getImm() == 0;
2294
2295 if (!IsZero)
2296 return false;
2297
2298 Register DefReg = I.getOperand(0).getReg();
2299 LLT Ty = MRI.getType(DefReg);
2300 if (Ty.getSizeInBits() == 64) {
2301 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2302 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2303 } else if (Ty.getSizeInBits() == 32) {
2304 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2305 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2306 } else
2307 return false;
2308
2309 I.setDesc(TII.get(TargetOpcode::COPY));
2310 return true;
2311 }
2312
2313 case TargetOpcode::G_ADD: {
2314 // Check if this is being fed by a G_ICMP on either side.
2315 //
2316 // (cmp pred, x, y) + z
2317 //
2318 // In the above case, when the cmp is true, we increment z by 1. So, we can
2319 // fold the add into the cset for the cmp by using cinc.
2320 //
2321 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2322 Register AddDst = I.getOperand(0).getReg();
2323 Register AddLHS = I.getOperand(1).getReg();
2324 Register AddRHS = I.getOperand(2).getReg();
2325 // Only handle scalars.
2326 LLT Ty = MRI.getType(AddLHS);
2327 if (Ty.isVector())
2328 return false;
2329 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2330 // bits.
2331 unsigned Size = Ty.getSizeInBits();
2332 if (Size != 32 && Size != 64)
2333 return false;
2334 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2335 if (!MRI.hasOneNonDBGUse(Reg))
2336 return nullptr;
2337 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2338 // compare.
2339 if (Size == 32)
2340 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2341 // We model scalar compares using 32-bit destinations right now.
2342 // If it's a 64-bit compare, it'll have 64-bit sources.
2343 Register ZExt;
2344 if (!mi_match(Reg, MRI,
2346 return nullptr;
2347 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2348 if (!Cmp ||
2349 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2350 return nullptr;
2351 return Cmp;
2352 };
2353 // Try to match
2354 // z + (cmp pred, x, y)
2355 MachineInstr *Cmp = MatchCmp(AddRHS);
2356 if (!Cmp) {
2357 // (cmp pred, x, y) + z
2358 std::swap(AddLHS, AddRHS);
2359 Cmp = MatchCmp(AddRHS);
2360 if (!Cmp)
2361 return false;
2362 }
2363 auto &PredOp = Cmp->getOperand(1);
2364 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2365 const AArch64CC::CondCode InvCC =
2368 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2369 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2370 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2371 I.eraseFromParent();
2372 return true;
2373 }
2374 case TargetOpcode::G_OR: {
2375 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2376 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2377 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2378 Register Dst = I.getOperand(0).getReg();
2379 LLT Ty = MRI.getType(Dst);
2380
2381 if (!Ty.isScalar())
2382 return false;
2383
2384 unsigned Size = Ty.getSizeInBits();
2385 if (Size != 32 && Size != 64)
2386 return false;
2387
2388 Register ShiftSrc;
2389 int64_t ShiftImm;
2390 Register MaskSrc;
2391 int64_t MaskImm;
2392 if (!mi_match(
2393 Dst, MRI,
2394 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2395 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2396 return false;
2397
2398 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2399 return false;
2400
2401 int64_t Immr = Size - ShiftImm;
2402 int64_t Imms = Size - ShiftImm - 1;
2403 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2404 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2405 I.eraseFromParent();
2406 return true;
2407 }
2408 case TargetOpcode::G_FENCE: {
2409 if (I.getOperand(1).getImm() == 0)
2410 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2411 else
2412 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2413 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2414 I.eraseFromParent();
2415 return true;
2416 }
2417 default:
2418 return false;
2419 }
2420}
2421
2422bool AArch64InstructionSelector::select(MachineInstr &I) {
2423 assert(I.getParent() && "Instruction should be in a basic block!");
2424 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2425
2426 MachineBasicBlock &MBB = *I.getParent();
2427 MachineFunction &MF = *MBB.getParent();
2429
2430 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2431 if (Subtarget->requiresStrictAlign()) {
2432 // We don't support this feature yet.
2433 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2434 return false;
2435 }
2436
2438
2439 unsigned Opcode = I.getOpcode();
2440 // G_PHI requires same handling as PHI
2441 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2442 // Certain non-generic instructions also need some special handling.
2443
2444 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2446
2447 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2448 const Register DefReg = I.getOperand(0).getReg();
2449 const LLT DefTy = MRI.getType(DefReg);
2450
2451 const RegClassOrRegBank &RegClassOrBank =
2452 MRI.getRegClassOrRegBank(DefReg);
2453
2454 const TargetRegisterClass *DefRC
2455 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2456 if (!DefRC) {
2457 if (!DefTy.isValid()) {
2458 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2459 return false;
2460 }
2461 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2462 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2463 if (!DefRC) {
2464 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2465 return false;
2466 }
2467 }
2468
2469 I.setDesc(TII.get(TargetOpcode::PHI));
2470
2471 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2472 }
2473
2474 if (I.isCopy())
2475 return selectCopy(I, TII, MRI, TRI, RBI);
2476
2477 if (I.isDebugInstr())
2478 return selectDebugInstr(I, MRI, RBI);
2479
2480 return true;
2481 }
2482
2483
2484 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2485 LLVM_DEBUG(
2486 dbgs() << "Generic instruction has unexpected implicit operands\n");
2487 return false;
2488 }
2489
2490 // Try to do some lowering before we start instruction selecting. These
2491 // lowerings are purely transformations on the input G_MIR and so selection
2492 // must continue after any modification of the instruction.
2493 if (preISelLower(I)) {
2494 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2495 }
2496
2497 // There may be patterns where the importer can't deal with them optimally,
2498 // but does select it to a suboptimal sequence so our custom C++ selection
2499 // code later never has a chance to work on it. Therefore, we have an early
2500 // selection attempt here to give priority to certain selection routines
2501 // over the imported ones.
2502 if (earlySelect(I))
2503 return true;
2504
2505 if (selectImpl(I, *CoverageInfo))
2506 return true;
2507
2508 LLT Ty =
2509 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2510
2511 switch (Opcode) {
2512 case TargetOpcode::G_SBFX:
2513 case TargetOpcode::G_UBFX: {
2514 static const unsigned OpcTable[2][2] = {
2515 {AArch64::UBFMWri, AArch64::UBFMXri},
2516 {AArch64::SBFMWri, AArch64::SBFMXri}};
2517 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2518 unsigned Size = Ty.getSizeInBits();
2519 unsigned Opc = OpcTable[IsSigned][Size == 64];
2520 auto Cst1 =
2521 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2522 assert(Cst1 && "Should have gotten a constant for src 1?");
2523 auto Cst2 =
2524 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2525 assert(Cst2 && "Should have gotten a constant for src 2?");
2526 auto LSB = Cst1->Value.getZExtValue();
2527 auto Width = Cst2->Value.getZExtValue();
2528 auto BitfieldInst =
2529 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2530 .addImm(LSB)
2531 .addImm(LSB + Width - 1);
2532 I.eraseFromParent();
2533 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2534 }
2535 case TargetOpcode::G_BRCOND:
2536 return selectCompareBranch(I, MF, MRI);
2537
2538 case TargetOpcode::G_BRINDIRECT: {
2539 I.setDesc(TII.get(AArch64::BR));
2541 }
2542
2543 case TargetOpcode::G_BRJT:
2544 return selectBrJT(I, MRI);
2545
2546 case AArch64::G_ADD_LOW: {
2547 // This op may have been separated from it's ADRP companion by the localizer
2548 // or some other code motion pass. Given that many CPUs will try to
2549 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2550 // which will later be expanded into an ADRP+ADD pair after scheduling.
2551 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2552 if (BaseMI->getOpcode() != AArch64::ADRP) {
2553 I.setDesc(TII.get(AArch64::ADDXri));
2554 I.addOperand(MachineOperand::CreateImm(0));
2556 }
2557 assert(TM.getCodeModel() == CodeModel::Small &&
2558 "Expected small code model");
2559 auto Op1 = BaseMI->getOperand(1);
2560 auto Op2 = I.getOperand(2);
2561 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2562 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2563 Op1.getTargetFlags())
2564 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2565 Op2.getTargetFlags());
2566 I.eraseFromParent();
2567 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2568 }
2569
2570 case TargetOpcode::G_FCONSTANT:
2571 case TargetOpcode::G_CONSTANT: {
2572 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2573
2574 const LLT s8 = LLT::scalar(8);
2575 const LLT s16 = LLT::scalar(16);
2576 const LLT s32 = LLT::scalar(32);
2577 const LLT s64 = LLT::scalar(64);
2578 const LLT s128 = LLT::scalar(128);
2579 const LLT p0 = LLT::pointer(0, 64);
2580
2581 const Register DefReg = I.getOperand(0).getReg();
2582 const LLT DefTy = MRI.getType(DefReg);
2583 const unsigned DefSize = DefTy.getSizeInBits();
2584 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2585
2586 // FIXME: Redundant check, but even less readable when factored out.
2587 if (isFP) {
2588 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2589 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2590 << " constant, expected: " << s16 << " or " << s32
2591 << " or " << s64 << " or " << s128 << '\n');
2592 return false;
2593 }
2594
2595 if (RB.getID() != AArch64::FPRRegBankID) {
2596 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2597 << " constant on bank: " << RB
2598 << ", expected: FPR\n");
2599 return false;
2600 }
2601
2602 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2603 // can be sure tablegen works correctly and isn't rescued by this code.
2604 // 0.0 is not covered by tablegen for FP128. So we will handle this
2605 // scenario in the code here.
2606 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2607 return false;
2608 } else {
2609 // s32 and s64 are covered by tablegen.
2610 if (Ty != p0 && Ty != s8 && Ty != s16) {
2611 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2612 << " constant, expected: " << s32 << ", " << s64
2613 << ", or " << p0 << '\n');
2614 return false;
2615 }
2616
2617 if (RB.getID() != AArch64::GPRRegBankID) {
2618 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2619 << " constant on bank: " << RB
2620 << ", expected: GPR\n");
2621 return false;
2622 }
2623 }
2624
2625 if (isFP) {
2626 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2627 // For 16, 64, and 128b values, emit a constant pool load.
2628 switch (DefSize) {
2629 default:
2630 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2631 case 32:
2632 case 64: {
2633 bool OptForSize = shouldOptForSize(&MF);
2634 const auto &TLI = MF.getSubtarget().getTargetLowering();
2635 // If TLI says that this fpimm is illegal, then we'll expand to a
2636 // constant pool load.
2637 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2638 EVT::getFloatingPointVT(DefSize), OptForSize))
2639 break;
2640 [[fallthrough]];
2641 }
2642 case 16:
2643 case 128: {
2644 auto *FPImm = I.getOperand(1).getFPImm();
2645 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2646 if (!LoadMI) {
2647 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2648 return false;
2649 }
2650 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2651 I.eraseFromParent();
2652 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2653 }
2654 }
2655
2656 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2657 // Either emit a FMOV, or emit a copy to emit a normal mov.
2658 const Register DefGPRReg = MRI.createVirtualRegister(
2659 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2660 MachineOperand &RegOp = I.getOperand(0);
2661 RegOp.setReg(DefGPRReg);
2662 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2663 MIB.buildCopy({DefReg}, {DefGPRReg});
2664
2665 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2666 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2667 return false;
2668 }
2669
2670 MachineOperand &ImmOp = I.getOperand(1);
2671 // FIXME: Is going through int64_t always correct?
2672 ImmOp.ChangeToImmediate(
2674 } else if (I.getOperand(1).isCImm()) {
2675 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2676 I.getOperand(1).ChangeToImmediate(Val);
2677 } else if (I.getOperand(1).isImm()) {
2678 uint64_t Val = I.getOperand(1).getImm();
2679 I.getOperand(1).ChangeToImmediate(Val);
2680 }
2681
2682 const unsigned MovOpc =
2683 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2684 I.setDesc(TII.get(MovOpc));
2686 return true;
2687 }
2688 case TargetOpcode::G_EXTRACT: {
2689 Register DstReg = I.getOperand(0).getReg();
2690 Register SrcReg = I.getOperand(1).getReg();
2691 LLT SrcTy = MRI.getType(SrcReg);
2692 LLT DstTy = MRI.getType(DstReg);
2693 (void)DstTy;
2694 unsigned SrcSize = SrcTy.getSizeInBits();
2695
2696 if (SrcTy.getSizeInBits() > 64) {
2697 // This should be an extract of an s128, which is like a vector extract.
2698 if (SrcTy.getSizeInBits() != 128)
2699 return false;
2700 // Only support extracting 64 bits from an s128 at the moment.
2701 if (DstTy.getSizeInBits() != 64)
2702 return false;
2703
2704 unsigned Offset = I.getOperand(2).getImm();
2705 if (Offset % 64 != 0)
2706 return false;
2707
2708 // Check we have the right regbank always.
2709 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2710 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2711 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2712
2713 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2714 auto NewI =
2715 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2716 .addUse(SrcReg, 0,
2717 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2718 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2719 AArch64::GPR64RegClass, NewI->getOperand(0));
2720 I.eraseFromParent();
2721 return true;
2722 }
2723
2724 // Emit the same code as a vector extract.
2725 // Offset must be a multiple of 64.
2726 unsigned LaneIdx = Offset / 64;
2727 MachineInstr *Extract = emitExtractVectorElt(
2728 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2729 if (!Extract)
2730 return false;
2731 I.eraseFromParent();
2732 return true;
2733 }
2734
2735 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2736 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2737 Ty.getSizeInBits() - 1);
2738
2739 if (SrcSize < 64) {
2740 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2741 "unexpected G_EXTRACT types");
2743 }
2744
2745 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2746 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2747 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2748 .addReg(DstReg, 0, AArch64::sub_32);
2749 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2750 AArch64::GPR32RegClass, MRI);
2751 I.getOperand(0).setReg(DstReg);
2752
2754 }
2755
2756 case TargetOpcode::G_INSERT: {
2757 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2758 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2759 unsigned DstSize = DstTy.getSizeInBits();
2760 // Larger inserts are vectors, same-size ones should be something else by
2761 // now (split up or turned into COPYs).
2762 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2763 return false;
2764
2765 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2766 unsigned LSB = I.getOperand(3).getImm();
2767 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2768 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2769 MachineInstrBuilder(MF, I).addImm(Width - 1);
2770
2771 if (DstSize < 64) {
2772 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2773 "unexpected G_INSERT types");
2775 }
2776
2777 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2778 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2779 TII.get(AArch64::SUBREG_TO_REG))
2780 .addDef(SrcReg)
2781 .addImm(0)
2782 .addUse(I.getOperand(2).getReg())
2783 .addImm(AArch64::sub_32);
2784 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2785 AArch64::GPR32RegClass, MRI);
2786 I.getOperand(2).setReg(SrcReg);
2787
2789 }
2790 case TargetOpcode::G_FRAME_INDEX: {
2791 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2792 if (Ty != LLT::pointer(0, 64)) {
2793 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2794 << ", expected: " << LLT::pointer(0, 64) << '\n');
2795 return false;
2796 }
2797 I.setDesc(TII.get(AArch64::ADDXri));
2798
2799 // MOs for a #0 shifted immediate.
2800 I.addOperand(MachineOperand::CreateImm(0));
2801 I.addOperand(MachineOperand::CreateImm(0));
2802
2804 }
2805
2806 case TargetOpcode::G_GLOBAL_VALUE: {
2807 const GlobalValue *GV = nullptr;
2808 unsigned OpFlags;
2809 if (I.getOperand(1).isSymbol()) {
2810 OpFlags = I.getOperand(1).getTargetFlags();
2811 // Currently only used by "RtLibUseGOT".
2812 assert(OpFlags == AArch64II::MO_GOT);
2813 } else {
2814 GV = I.getOperand(1).getGlobal();
2815 if (GV->isThreadLocal())
2816 return selectTLSGlobalValue(I, MRI);
2817 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2818 }
2819
2820 if (OpFlags & AArch64II::MO_GOT) {
2821 I.setDesc(TII.get(AArch64::LOADgot));
2822 I.getOperand(1).setTargetFlags(OpFlags);
2823 } else if (TM.getCodeModel() == CodeModel::Large &&
2824 !TM.isPositionIndependent()) {
2825 // Materialize the global using movz/movk instructions.
2826 materializeLargeCMVal(I, GV, OpFlags);
2827 I.eraseFromParent();
2828 return true;
2829 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2830 I.setDesc(TII.get(AArch64::ADR));
2831 I.getOperand(1).setTargetFlags(OpFlags);
2832 } else {
2833 I.setDesc(TII.get(AArch64::MOVaddr));
2834 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2835 MachineInstrBuilder MIB(MF, I);
2836 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2838 }
2840 }
2841
2842 case TargetOpcode::G_ZEXTLOAD:
2843 case TargetOpcode::G_LOAD:
2844 case TargetOpcode::G_STORE: {
2845 GLoadStore &LdSt = cast<GLoadStore>(I);
2846 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2847 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2848
2849 if (PtrTy != LLT::pointer(0, 64)) {
2850 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2851 << ", expected: " << LLT::pointer(0, 64) << '\n');
2852 return false;
2853 }
2854
2855 uint64_t MemSizeInBytes = LdSt.getMemSize();
2856 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2857 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2858
2859 // Need special instructions for atomics that affect ordering.
2860 if (Order != AtomicOrdering::NotAtomic &&
2861 Order != AtomicOrdering::Unordered &&
2862 Order != AtomicOrdering::Monotonic) {
2863 assert(!isa<GZExtLoad>(LdSt));
2864 if (MemSizeInBytes > 64)
2865 return false;
2866
2867 if (isa<GLoad>(LdSt)) {
2868 static constexpr unsigned LDAPROpcodes[] = {
2869 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2870 static constexpr unsigned LDAROpcodes[] = {
2871 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2872 ArrayRef<unsigned> Opcodes =
2873 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2874 ? LDAPROpcodes
2875 : LDAROpcodes;
2876 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2877 } else {
2878 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2879 AArch64::STLRW, AArch64::STLRX};
2880 Register ValReg = LdSt.getReg(0);
2881 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2882 // Emit a subreg copy of 32 bits.
2883 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2884 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2885 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2886 I.getOperand(0).setReg(NewVal);
2887 }
2888 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2889 }
2891 return true;
2892 }
2893
2894#ifndef NDEBUG
2895 const Register PtrReg = LdSt.getPointerReg();
2896 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2897 // Check that the pointer register is valid.
2898 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2899 "Load/Store pointer operand isn't a GPR");
2900 assert(MRI.getType(PtrReg).isPointer() &&
2901 "Load/Store pointer operand isn't a pointer");
2902#endif
2903
2904 const Register ValReg = LdSt.getReg(0);
2905 const LLT ValTy = MRI.getType(ValReg);
2906 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2907
2908 // The code below doesn't support truncating stores, so we need to split it
2909 // again.
2910 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2911 unsigned SubReg;
2912 LLT MemTy = LdSt.getMMO().getMemoryType();
2913 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2914 if (!getSubRegForClass(RC, TRI, SubReg))
2915 return false;
2916
2917 // Generate a subreg copy.
2918 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2919 .addReg(ValReg, 0, SubReg)
2920 .getReg(0);
2921 RBI.constrainGenericRegister(Copy, *RC, MRI);
2922 LdSt.getOperand(0).setReg(Copy);
2923 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2924 // If this is an any-extending load from the FPR bank, split it into a regular
2925 // load + extend.
2926 if (RB.getID() == AArch64::FPRRegBankID) {
2927 unsigned SubReg;
2928 LLT MemTy = LdSt.getMMO().getMemoryType();
2929 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2930 if (!getSubRegForClass(RC, TRI, SubReg))
2931 return false;
2932 Register OldDst = LdSt.getReg(0);
2933 Register NewDst =
2934 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2935 LdSt.getOperand(0).setReg(NewDst);
2936 MRI.setRegBank(NewDst, RB);
2937 // Generate a SUBREG_TO_REG to extend it.
2938 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2939 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2940 .addImm(0)
2941 .addUse(NewDst)
2942 .addImm(SubReg);
2943 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2944 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2945 MIB.setInstr(LdSt);
2946 }
2947 }
2948
2949 // Helper lambda for partially selecting I. Either returns the original
2950 // instruction with an updated opcode, or a new instruction.
2951 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2952 bool IsStore = isa<GStore>(I);
2953 const unsigned NewOpc =
2954 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2955 if (NewOpc == I.getOpcode())
2956 return nullptr;
2957 // Check if we can fold anything into the addressing mode.
2958 auto AddrModeFns =
2959 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2960 if (!AddrModeFns) {
2961 // Can't fold anything. Use the original instruction.
2962 I.setDesc(TII.get(NewOpc));
2963 I.addOperand(MachineOperand::CreateImm(0));
2964 return &I;
2965 }
2966
2967 // Folded something. Create a new instruction and return it.
2968 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2969 Register CurValReg = I.getOperand(0).getReg();
2970 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2971 NewInst.cloneMemRefs(I);
2972 for (auto &Fn : *AddrModeFns)
2973 Fn(NewInst);
2974 I.eraseFromParent();
2975 return &*NewInst;
2976 };
2977
2978 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2979 if (!LoadStore)
2980 return false;
2981
2982 // If we're storing a 0, use WZR/XZR.
2983 if (Opcode == TargetOpcode::G_STORE) {
2985 LoadStore->getOperand(0).getReg(), MRI);
2986 if (CVal && CVal->Value == 0) {
2987 switch (LoadStore->getOpcode()) {
2988 case AArch64::STRWui:
2989 case AArch64::STRHHui:
2990 case AArch64::STRBBui:
2991 LoadStore->getOperand(0).setReg(AArch64::WZR);
2992 break;
2993 case AArch64::STRXui:
2994 LoadStore->getOperand(0).setReg(AArch64::XZR);
2995 break;
2996 }
2997 }
2998 }
2999
3000 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3001 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3002 // The any/zextload from a smaller type to i32 should be handled by the
3003 // importer.
3004 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3005 return false;
3006 // If we have an extending load then change the load's type to be a
3007 // narrower reg and zero_extend with SUBREG_TO_REG.
3008 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3009 Register DstReg = LoadStore->getOperand(0).getReg();
3010 LoadStore->getOperand(0).setReg(LdReg);
3011
3012 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3013 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3014 .addImm(0)
3015 .addUse(LdReg)
3016 .addImm(AArch64::sub_32);
3017 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3018 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3019 MRI);
3020 }
3021 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3022 }
3023
3024 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3025 case TargetOpcode::G_INDEXED_SEXTLOAD:
3026 return selectIndexedExtLoad(I, MRI);
3027 case TargetOpcode::G_INDEXED_LOAD:
3028 return selectIndexedLoad(I, MRI);
3029 case TargetOpcode::G_INDEXED_STORE:
3030 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3031
3032 case TargetOpcode::G_LSHR:
3033 case TargetOpcode::G_ASHR:
3034 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3035 return selectVectorAshrLshr(I, MRI);
3036 [[fallthrough]];
3037 case TargetOpcode::G_SHL:
3038 if (Opcode == TargetOpcode::G_SHL &&
3039 MRI.getType(I.getOperand(0).getReg()).isVector())
3040 return selectVectorSHL(I, MRI);
3041
3042 // These shifts were legalized to have 64 bit shift amounts because we
3043 // want to take advantage of the selection patterns that assume the
3044 // immediates are s64s, however, selectBinaryOp will assume both operands
3045 // will have the same bit size.
3046 {
3047 Register SrcReg = I.getOperand(1).getReg();
3048 Register ShiftReg = I.getOperand(2).getReg();
3049 const LLT ShiftTy = MRI.getType(ShiftReg);
3050 const LLT SrcTy = MRI.getType(SrcReg);
3051 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3052 ShiftTy.getSizeInBits() == 64) {
3053 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3054 // Insert a subregister copy to implement a 64->32 trunc
3055 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3056 .addReg(ShiftReg, 0, AArch64::sub_32);
3057 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3058 I.getOperand(2).setReg(Trunc.getReg(0));
3059 }
3060 }
3061 [[fallthrough]];
3062 case TargetOpcode::G_OR: {
3063 // Reject the various things we don't support yet.
3064 if (unsupportedBinOp(I, RBI, MRI, TRI))
3065 return false;
3066
3067 const unsigned OpSize = Ty.getSizeInBits();
3068
3069 const Register DefReg = I.getOperand(0).getReg();
3070 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3071
3072 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3073 if (NewOpc == I.getOpcode())
3074 return false;
3075
3076 I.setDesc(TII.get(NewOpc));
3077 // FIXME: Should the type be always reset in setDesc?
3078
3079 // Now that we selected an opcode, we need to constrain the register
3080 // operands to use appropriate classes.
3082 }
3083
3084 case TargetOpcode::G_PTR_ADD: {
3085 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3086 I.eraseFromParent();
3087 return true;
3088 }
3089
3090 case TargetOpcode::G_SADDE:
3091 case TargetOpcode::G_UADDE:
3092 case TargetOpcode::G_SSUBE:
3093 case TargetOpcode::G_USUBE:
3094 case TargetOpcode::G_SADDO:
3095 case TargetOpcode::G_UADDO:
3096 case TargetOpcode::G_SSUBO:
3097 case TargetOpcode::G_USUBO:
3098 return selectOverflowOp(I, MRI);
3099
3100 case TargetOpcode::G_PTRMASK: {
3101 Register MaskReg = I.getOperand(2).getReg();
3102 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3103 // TODO: Implement arbitrary cases
3104 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3105 return false;
3106
3107 uint64_t Mask = *MaskVal;
3108 I.setDesc(TII.get(AArch64::ANDXri));
3109 I.getOperand(2).ChangeToImmediate(
3111
3113 }
3114 case TargetOpcode::G_PTRTOINT:
3115 case TargetOpcode::G_TRUNC: {
3116 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3117 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3118
3119 const Register DstReg = I.getOperand(0).getReg();
3120 const Register SrcReg = I.getOperand(1).getReg();
3121
3122 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3123 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3124
3125 if (DstRB.getID() != SrcRB.getID()) {
3126 LLVM_DEBUG(
3127 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3128 return false;
3129 }
3130
3131 if (DstRB.getID() == AArch64::GPRRegBankID) {
3132 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3133 if (!DstRC)
3134 return false;
3135
3136 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3137 if (!SrcRC)
3138 return false;
3139
3140 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3141 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3142 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3143 return false;
3144 }
3145
3146 if (DstRC == SrcRC) {
3147 // Nothing to be done
3148 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3149 SrcTy == LLT::scalar(64)) {
3150 llvm_unreachable("TableGen can import this case");
3151 return false;
3152 } else if (DstRC == &AArch64::GPR32RegClass &&
3153 SrcRC == &AArch64::GPR64RegClass) {
3154 I.getOperand(1).setSubReg(AArch64::sub_32);
3155 } else {
3156 LLVM_DEBUG(
3157 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3158 return false;
3159 }
3160
3161 I.setDesc(TII.get(TargetOpcode::COPY));
3162 return true;
3163 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3164 if (DstTy == LLT::fixed_vector(4, 16) &&
3165 SrcTy == LLT::fixed_vector(4, 32)) {
3166 I.setDesc(TII.get(AArch64::XTNv4i16));
3168 return true;
3169 }
3170
3171 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3172 MachineInstr *Extract = emitExtractVectorElt(
3173 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3174 if (!Extract)
3175 return false;
3176 I.eraseFromParent();
3177 return true;
3178 }
3179
3180 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3181 if (Opcode == TargetOpcode::G_PTRTOINT) {
3182 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3183 I.setDesc(TII.get(TargetOpcode::COPY));
3184 return selectCopy(I, TII, MRI, TRI, RBI);
3185 }
3186 }
3187
3188 return false;
3189 }
3190
3191 case TargetOpcode::G_ANYEXT: {
3192 if (selectUSMovFromExtend(I, MRI))
3193 return true;
3194
3195 const Register DstReg = I.getOperand(0).getReg();
3196 const Register SrcReg = I.getOperand(1).getReg();
3197
3198 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3199 if (RBDst.getID() != AArch64::GPRRegBankID) {
3200 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3201 << ", expected: GPR\n");
3202 return false;
3203 }
3204
3205 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3206 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3207 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3208 << ", expected: GPR\n");
3209 return false;
3210 }
3211
3212 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3213
3214 if (DstSize == 0) {
3215 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3216 return false;
3217 }
3218
3219 if (DstSize != 64 && DstSize > 32) {
3220 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3221 << ", expected: 32 or 64\n");
3222 return false;
3223 }
3224 // At this point G_ANYEXT is just like a plain COPY, but we need
3225 // to explicitly form the 64-bit value if any.
3226 if (DstSize > 32) {
3227 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3228 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3229 .addDef(ExtSrc)
3230 .addImm(0)
3231 .addUse(SrcReg)
3232 .addImm(AArch64::sub_32);
3233 I.getOperand(1).setReg(ExtSrc);
3234 }
3235 return selectCopy(I, TII, MRI, TRI, RBI);
3236 }
3237
3238 case TargetOpcode::G_ZEXT:
3239 case TargetOpcode::G_SEXT_INREG:
3240 case TargetOpcode::G_SEXT: {
3241 if (selectUSMovFromExtend(I, MRI))
3242 return true;
3243
3244 unsigned Opcode = I.getOpcode();
3245 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3246 const Register DefReg = I.getOperand(0).getReg();
3247 Register SrcReg = I.getOperand(1).getReg();
3248 const LLT DstTy = MRI.getType(DefReg);
3249 const LLT SrcTy = MRI.getType(SrcReg);
3250 unsigned DstSize = DstTy.getSizeInBits();
3251 unsigned SrcSize = SrcTy.getSizeInBits();
3252
3253 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3254 // extended is encoded in the imm.
3255 if (Opcode == TargetOpcode::G_SEXT_INREG)
3256 SrcSize = I.getOperand(2).getImm();
3257
3258 if (DstTy.isVector())
3259 return false; // Should be handled by imported patterns.
3260
3261 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3262 AArch64::GPRRegBankID &&
3263 "Unexpected ext regbank");
3264
3265 MachineInstr *ExtI;
3266
3267 // First check if we're extending the result of a load which has a dest type
3268 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3269 // GPR register on AArch64 and all loads which are smaller automatically
3270 // zero-extend the upper bits. E.g.
3271 // %v(s8) = G_LOAD %p, :: (load 1)
3272 // %v2(s32) = G_ZEXT %v(s8)
3273 if (!IsSigned) {
3274 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3275 bool IsGPR =
3276 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3277 if (LoadMI && IsGPR) {
3278 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3279 unsigned BytesLoaded = MemOp->getSize();
3280 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3281 return selectCopy(I, TII, MRI, TRI, RBI);
3282 }
3283
3284 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3285 // + SUBREG_TO_REG.
3286 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3287 Register SubregToRegSrc =
3288 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3289 const Register ZReg = AArch64::WZR;
3290 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3291 .addImm(0);
3292
3293 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3294 .addImm(0)
3295 .addUse(SubregToRegSrc)
3296 .addImm(AArch64::sub_32);
3297
3298 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3299 MRI)) {
3300 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3301 return false;
3302 }
3303
3304 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3305 MRI)) {
3306 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3307 return false;
3308 }
3309
3310 I.eraseFromParent();
3311 return true;
3312 }
3313 }
3314
3315 if (DstSize == 64) {
3316 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3317 // FIXME: Can we avoid manually doing this?
3318 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3319 MRI)) {
3320 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3321 << " operand\n");
3322 return false;
3323 }
3324 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3325 {&AArch64::GPR64RegClass}, {})
3326 .addImm(0)
3327 .addUse(SrcReg)
3328 .addImm(AArch64::sub_32)
3329 .getReg(0);
3330 }
3331
3332 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3333 {DefReg}, {SrcReg})
3334 .addImm(0)
3335 .addImm(SrcSize - 1);
3336 } else if (DstSize <= 32) {
3337 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3338 {DefReg}, {SrcReg})
3339 .addImm(0)
3340 .addImm(SrcSize - 1);
3341 } else {
3342 return false;
3343 }
3344
3346 I.eraseFromParent();
3347 return true;
3348 }
3349
3350 case TargetOpcode::G_SITOFP:
3351 case TargetOpcode::G_UITOFP:
3352 case TargetOpcode::G_FPTOSI:
3353 case TargetOpcode::G_FPTOUI: {
3354 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3355 SrcTy = MRI.getType(I.getOperand(1).getReg());
3356 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3357 if (NewOpc == Opcode)
3358 return false;
3359
3360 I.setDesc(TII.get(NewOpc));
3362 I.setFlags(MachineInstr::NoFPExcept);
3363
3364 return true;
3365 }
3366
3367 case TargetOpcode::G_FREEZE:
3368 return selectCopy(I, TII, MRI, TRI, RBI);
3369
3370 case TargetOpcode::G_INTTOPTR:
3371 // The importer is currently unable to import pointer types since they
3372 // didn't exist in SelectionDAG.
3373 return selectCopy(I, TII, MRI, TRI, RBI);
3374
3375 case TargetOpcode::G_BITCAST:
3376 // Imported SelectionDAG rules can handle every bitcast except those that
3377 // bitcast from a type to the same type. Ideally, these shouldn't occur
3378 // but we might not run an optimizer that deletes them. The other exception
3379 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3380 // of them.
3381 return selectCopy(I, TII, MRI, TRI, RBI);
3382
3383 case TargetOpcode::G_SELECT: {
3384 auto &Sel = cast<GSelect>(I);
3385 const Register CondReg = Sel.getCondReg();
3386 const Register TReg = Sel.getTrueReg();
3387 const Register FReg = Sel.getFalseReg();
3388
3389 if (tryOptSelect(Sel))
3390 return true;
3391
3392 // Make sure to use an unused vreg instead of wzr, so that the peephole
3393 // optimizations will be able to optimize these.
3394 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3395 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3396 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3398 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3399 return false;
3400 Sel.eraseFromParent();
3401 return true;
3402 }
3403 case TargetOpcode::G_ICMP: {
3404 if (Ty.isVector())
3405 return selectVectorICmp(I, MRI);
3406
3407 if (Ty != LLT::scalar(32)) {
3408 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3409 << ", expected: " << LLT::scalar(32) << '\n');
3410 return false;
3411 }
3412
3413 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3414 const AArch64CC::CondCode InvCC =
3416 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3417 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3418 /*Src2=*/AArch64::WZR, InvCC, MIB);
3419 I.eraseFromParent();
3420 return true;
3421 }
3422
3423 case TargetOpcode::G_FCMP: {
3424 CmpInst::Predicate Pred =
3425 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3426 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3427 Pred) ||
3428 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3429 return false;
3430 I.eraseFromParent();
3431 return true;
3432 }
3433 case TargetOpcode::G_VASTART:
3434 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3435 : selectVaStartAAPCS(I, MF, MRI);
3436 case TargetOpcode::G_INTRINSIC:
3437 return selectIntrinsic(I, MRI);
3438 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3439 return selectIntrinsicWithSideEffects(I, MRI);
3440 case TargetOpcode::G_IMPLICIT_DEF: {
3441 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3442 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3443 const Register DstReg = I.getOperand(0).getReg();
3444 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3445 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3446 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3447 return true;
3448 }
3449 case TargetOpcode::G_BLOCK_ADDR: {
3450 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3451 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3452 I.eraseFromParent();
3453 return true;
3454 } else {
3455 I.setDesc(TII.get(AArch64::MOVaddrBA));
3456 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3457 I.getOperand(0).getReg())
3458 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3459 /* Offset */ 0, AArch64II::MO_PAGE)
3461 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3463 I.eraseFromParent();
3464 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3465 }
3466 }
3467 case AArch64::G_DUP: {
3468 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3469 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3470 // difficult because at RBS we may end up pessimizing the fpr case if we
3471 // decided to add an anyextend to fix this. Manual selection is the most
3472 // robust solution for now.
3473 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3474 AArch64::GPRRegBankID)
3475 return false; // We expect the fpr regbank case to be imported.
3476 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3477 if (VecTy == LLT::fixed_vector(8, 8))
3478 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3479 else if (VecTy == LLT::fixed_vector(16, 8))
3480 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3481 else if (VecTy == LLT::fixed_vector(4, 16))
3482 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3483 else if (VecTy == LLT::fixed_vector(8, 16))
3484 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3485 else
3486 return false;
3488 }
3489 case TargetOpcode::G_BUILD_VECTOR:
3490 return selectBuildVector(I, MRI);
3491 case TargetOpcode::G_MERGE_VALUES:
3492 return selectMergeValues(I, MRI);
3493 case TargetOpcode::G_UNMERGE_VALUES:
3494 return selectUnmergeValues(I, MRI);
3495 case TargetOpcode::G_SHUFFLE_VECTOR:
3496 return selectShuffleVector(I, MRI);
3497 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3498 return selectExtractElt(I, MRI);
3499 case TargetOpcode::G_INSERT_VECTOR_ELT:
3500 return selectInsertElt(I, MRI);
3501 case TargetOpcode::G_CONCAT_VECTORS:
3502 return selectConcatVectors(I, MRI);
3503 case TargetOpcode::G_JUMP_TABLE:
3504 return selectJumpTable(I, MRI);
3505 case TargetOpcode::G_MEMCPY:
3506 case TargetOpcode::G_MEMCPY_INLINE:
3507 case TargetOpcode::G_MEMMOVE:
3508 case TargetOpcode::G_MEMSET:
3509 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3510 return selectMOPS(I, MRI);
3511 }
3512
3513 return false;
3514}
3515
3516bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3517 MachineIRBuilderState OldMIBState = MIB.getState();
3518 bool Success = select(I);
3519 MIB.setState(OldMIBState);
3520 return Success;
3521}
3522
3523bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3525 unsigned Mopcode;
3526 switch (GI.getOpcode()) {
3527 case TargetOpcode::G_MEMCPY:
3528 case TargetOpcode::G_MEMCPY_INLINE:
3529 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3530 break;
3531 case TargetOpcode::G_MEMMOVE:
3532 Mopcode = AArch64::MOPSMemoryMovePseudo;
3533 break;
3534 case TargetOpcode::G_MEMSET:
3535 // For tagged memset see llvm.aarch64.mops.memset.tag
3536 Mopcode = AArch64::MOPSMemorySetPseudo;
3537 break;
3538 }
3539
3540 auto &DstPtr = GI.getOperand(0);
3541 auto &SrcOrVal = GI.getOperand(1);
3542 auto &Size = GI.getOperand(2);
3543
3544 // Create copies of the registers that can be clobbered.
3545 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3546 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3547 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3548
3549 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3550 const auto &SrcValRegClass =
3551 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3552
3553 // Constrain to specific registers
3554 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3555 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3556 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3557
3558 MIB.buildCopy(DstPtrCopy, DstPtr);
3559 MIB.buildCopy(SrcValCopy, SrcOrVal);
3560 MIB.buildCopy(SizeCopy, Size);
3561
3562 // New instruction uses the copied registers because it must update them.
3563 // The defs are not used since they don't exist in G_MEM*. They are still
3564 // tied.
3565 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3566 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3567 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3568 if (IsSet) {
3569 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3570 {DstPtrCopy, SizeCopy, SrcValCopy});
3571 } else {
3572 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3573 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3574 {DstPtrCopy, SrcValCopy, SizeCopy});
3575 }
3576
3577 GI.eraseFromParent();
3578 return true;
3579}
3580
3581bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3583 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3584 Register JTAddr = I.getOperand(0).getReg();
3585 unsigned JTI = I.getOperand(1).getIndex();
3586 Register Index = I.getOperand(2).getReg();
3587
3588 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3589 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3590
3591 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3592 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3593 {TargetReg, ScratchReg}, {JTAddr, Index})
3594 .addJumpTableIndex(JTI);
3595 // Save the jump table info.
3596 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3597 {static_cast<int64_t>(JTI)});
3598 // Build the indirect branch.
3599 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3600 I.eraseFromParent();
3601 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3602}
3603
3604bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3606 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3607 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3608
3609 Register DstReg = I.getOperand(0).getReg();
3610 unsigned JTI = I.getOperand(1).getIndex();
3611 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3612 auto MovMI =
3613 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3614 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3616 I.eraseFromParent();
3617 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3618}
3619
3620bool AArch64InstructionSelector::selectTLSGlobalValue(
3622 if (!STI.isTargetMachO())
3623 return false;
3624 MachineFunction &MF = *I.getParent()->getParent();
3625 MF.getFrameInfo().setAdjustsStack(true);
3626
3627 const auto &GlobalOp = I.getOperand(1);
3628 assert(GlobalOp.getOffset() == 0 &&
3629 "Shouldn't have an offset on TLS globals!");
3630 const GlobalValue &GV = *GlobalOp.getGlobal();
3631
3632 auto LoadGOT =
3633 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3634 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3635
3636 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3637 {LoadGOT.getReg(0)})
3638 .addImm(0);
3639
3640 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3641 // TLS calls preserve all registers except those that absolutely must be
3642 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3643 // silly).
3644 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3645 .addUse(AArch64::X0, RegState::Implicit)
3646 .addDef(AArch64::X0, RegState::Implicit)
3647 .addRegMask(TRI.getTLSCallPreservedMask());
3648
3649 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3650 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3651 MRI);
3652 I.eraseFromParent();
3653 return true;
3654}
3655
3656bool AArch64InstructionSelector::selectVectorICmp(
3658 Register DstReg = I.getOperand(0).getReg();
3659 LLT DstTy = MRI.getType(DstReg);
3660 Register SrcReg = I.getOperand(2).getReg();
3661 Register Src2Reg = I.getOperand(3).getReg();
3662 LLT SrcTy = MRI.getType(SrcReg);
3663
3664 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3665 unsigned NumElts = DstTy.getNumElements();
3666
3667 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3668 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3669 // Third index is cc opcode:
3670 // 0 == eq
3671 // 1 == ugt
3672 // 2 == uge
3673 // 3 == ult
3674 // 4 == ule
3675 // 5 == sgt
3676 // 6 == sge
3677 // 7 == slt
3678 // 8 == sle
3679 // ne is done by negating 'eq' result.
3680
3681 // This table below assumes that for some comparisons the operands will be
3682 // commuted.
3683 // ult op == commute + ugt op
3684 // ule op == commute + uge op
3685 // slt op == commute + sgt op
3686 // sle op == commute + sge op
3687 unsigned PredIdx = 0;
3688 bool SwapOperands = false;
3689 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3690 switch (Pred) {
3691 case CmpInst::ICMP_NE:
3692 case CmpInst::ICMP_EQ:
3693 PredIdx = 0;
3694 break;
3695 case CmpInst::ICMP_UGT:
3696 PredIdx = 1;
3697 break;
3698 case CmpInst::ICMP_UGE:
3699 PredIdx = 2;
3700 break;
3701 case CmpInst::ICMP_ULT:
3702 PredIdx = 3;
3703 SwapOperands = true;
3704 break;
3705 case CmpInst::ICMP_ULE:
3706 PredIdx = 4;
3707 SwapOperands = true;
3708 break;
3709 case CmpInst::ICMP_SGT:
3710 PredIdx = 5;
3711 break;
3712 case CmpInst::ICMP_SGE:
3713 PredIdx = 6;
3714 break;
3715 case CmpInst::ICMP_SLT:
3716 PredIdx = 7;
3717 SwapOperands = true;
3718 break;
3719 case CmpInst::ICMP_SLE:
3720 PredIdx = 8;
3721 SwapOperands = true;
3722 break;
3723 default:
3724 llvm_unreachable("Unhandled icmp predicate");
3725 return false;
3726 }
3727
3728 // This table obviously should be tablegen'd when we have our GISel native
3729 // tablegen selector.
3730
3731 static const unsigned OpcTable[4][4][9] = {
3732 {
3733 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3734 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3735 0 /* invalid */},
3736 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3737 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3738 0 /* invalid */},
3739 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3740 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3741 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3742 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3743 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3744 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3745 },
3746 {
3747 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3748 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3749 0 /* invalid */},
3750 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3751 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3752 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3753 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3754 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3755 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3756 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3757 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3758 0 /* invalid */}
3759 },
3760 {
3761 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3762 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3763 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3764 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3765 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3766 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3767 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3769 0 /* invalid */},
3770 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3772 0 /* invalid */}
3773 },
3774 {
3775 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3776 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3777 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3778 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3779 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3780 0 /* invalid */},
3781 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3782 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3783 0 /* invalid */},
3784 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3785 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3786 0 /* invalid */}
3787 },
3788 };
3789 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3790 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3791 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3792 if (!Opc) {
3793 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3794 return false;
3795 }
3796
3797 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3798 const TargetRegisterClass *SrcRC =
3799 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3800 if (!SrcRC) {
3801 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3802 return false;
3803 }
3804
3805 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3806 if (SrcTy.getSizeInBits() == 128)
3807 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3808
3809 if (SwapOperands)
3810 std::swap(SrcReg, Src2Reg);
3811
3812 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3814
3815 // Invert if we had a 'ne' cc.
3816 if (NotOpc) {
3817 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3819 } else {
3820 MIB.buildCopy(DstReg, Cmp.getReg(0));
3821 }
3822 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3823 I.eraseFromParent();
3824 return true;
3825}
3826
3827MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3828 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3829 MachineIRBuilder &MIRBuilder) const {
3830 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3831
3832 auto BuildFn = [&](unsigned SubregIndex) {
3833 auto Ins =
3834 MIRBuilder
3835 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3836 .addImm(SubregIndex);
3839 return &*Ins;
3840 };
3841
3842 switch (EltSize) {
3843 case 8:
3844 return BuildFn(AArch64::bsub);
3845 case 16:
3846 return BuildFn(AArch64::hsub);
3847 case 32:
3848 return BuildFn(AArch64::ssub);
3849 case 64:
3850 return BuildFn(AArch64::dsub);
3851 default:
3852 return nullptr;
3853 }
3854}
3855
3857AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3858 MachineIRBuilder &MIB,
3859 MachineRegisterInfo &MRI) const {
3860 LLT DstTy = MRI.getType(DstReg);
3861 const TargetRegisterClass *RC =
3862 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3863 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3864 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3865 return nullptr;
3866 }
3867 unsigned SubReg = 0;
3868 if (!getSubRegForClass(RC, TRI, SubReg))
3869 return nullptr;
3870 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3871 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3872 << DstTy.getSizeInBits() << "\n");
3873 return nullptr;
3874 }
3875 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3876 .addReg(SrcReg, 0, SubReg);
3877 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3878 return Copy;
3879}
3880
3881bool AArch64InstructionSelector::selectMergeValues(
3883 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3884 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3885 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3886 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3887 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3888
3889 if (I.getNumOperands() != 3)
3890 return false;
3891
3892 // Merging 2 s64s into an s128.
3893 if (DstTy == LLT::scalar(128)) {
3894 if (SrcTy.getSizeInBits() != 64)
3895 return false;
3896 Register DstReg = I.getOperand(0).getReg();
3897 Register Src1Reg = I.getOperand(1).getReg();
3898 Register Src2Reg = I.getOperand(2).getReg();
3899 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3900 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3901 /* LaneIdx */ 0, RB, MIB);
3902 if (!InsMI)
3903 return false;
3904 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3905 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3906 if (!Ins2MI)
3907 return false;
3910 I.eraseFromParent();
3911 return true;
3912 }
3913
3914 if (RB.getID() != AArch64::GPRRegBankID)
3915 return false;
3916
3917 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3918 return false;
3919
3920 auto *DstRC = &AArch64::GPR64RegClass;
3921 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3922 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3923 TII.get(TargetOpcode::SUBREG_TO_REG))
3924 .addDef(SubToRegDef)
3925 .addImm(0)
3926 .addUse(I.getOperand(1).getReg())
3927 .addImm(AArch64::sub_32);
3928 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3929 // Need to anyext the second scalar before we can use bfm
3930 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3931 TII.get(TargetOpcode::SUBREG_TO_REG))
3932 .addDef(SubToRegDef2)
3933 .addImm(0)
3934 .addUse(I.getOperand(2).getReg())
3935 .addImm(AArch64::sub_32);
3936 MachineInstr &BFM =
3937 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3938 .addDef(I.getOperand(0).getReg())
3939 .addUse(SubToRegDef)
3940 .addUse(SubToRegDef2)
3941 .addImm(32)
3942 .addImm(31);
3943 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3944 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3946 I.eraseFromParent();
3947 return true;
3948}
3949
3950static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3951 const unsigned EltSize) {
3952 // Choose a lane copy opcode and subregister based off of the size of the
3953 // vector's elements.
3954 switch (EltSize) {
3955 case 8:
3956 CopyOpc = AArch64::DUPi8;
3957 ExtractSubReg = AArch64::bsub;
3958 break;
3959 case 16:
3960 CopyOpc = AArch64::DUPi16;
3961 ExtractSubReg = AArch64::hsub;
3962 break;
3963 case 32:
3964 CopyOpc = AArch64::DUPi32;
3965 ExtractSubReg = AArch64::ssub;
3966 break;
3967 case 64:
3968 CopyOpc = AArch64::DUPi64;
3969 ExtractSubReg = AArch64::dsub;
3970 break;
3971 default:
3972 // Unknown size, bail out.
3973 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3974 return false;
3975 }
3976 return true;
3977}
3978
3979MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3980 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3981 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3982 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3983 unsigned CopyOpc = 0;
3984 unsigned ExtractSubReg = 0;
3985 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3986 LLVM_DEBUG(
3987 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3988 return nullptr;
3989 }
3990
3991 const TargetRegisterClass *DstRC =
3992 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3993 if (!DstRC) {
3994 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3995 return nullptr;
3996 }
3997
3998 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3999 const LLT &VecTy = MRI.getType(VecReg);
4000 const TargetRegisterClass *VecRC =
4001 getRegClassForTypeOnBank(VecTy, VecRB, true);
4002 if (!VecRC) {
4003 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4004 return nullptr;
4005 }
4006
4007 // The register that we're going to copy into.
4008 Register InsertReg = VecReg;
4009 if (!DstReg)
4010 DstReg = MRI.createVirtualRegister(DstRC);
4011 // If the lane index is 0, we just use a subregister COPY.
4012 if (LaneIdx == 0) {
4013 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4014 .addReg(VecReg, 0, ExtractSubReg);
4015 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4016 return &*Copy;
4017 }
4018
4019 // Lane copies require 128-bit wide registers. If we're dealing with an
4020 // unpacked vector, then we need to move up to that width. Insert an implicit
4021 // def and a subregister insert to get us there.
4022 if (VecTy.getSizeInBits() != 128) {
4023 MachineInstr *ScalarToVector = emitScalarToVector(
4024 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4025 if (!ScalarToVector)
4026 return nullptr;
4027 InsertReg = ScalarToVector->getOperand(0).getReg();
4028 }
4029
4030 MachineInstr *LaneCopyMI =
4031 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4032 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4033
4034 // Make sure that we actually constrain the initial copy.
4035 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4036 return LaneCopyMI;
4037}
4038
4039bool AArch64InstructionSelector::selectExtractElt(
4041 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4042 "unexpected opcode!");
4043 Register DstReg = I.getOperand(0).getReg();
4044 const LLT NarrowTy = MRI.getType(DstReg);
4045 const Register SrcReg = I.getOperand(1).getReg();
4046 const LLT WideTy = MRI.getType(SrcReg);
4047 (void)WideTy;
4048 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4049 "source register size too small!");
4050 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4051
4052 // Need the lane index to determine the correct copy opcode.
4053 MachineOperand &LaneIdxOp = I.getOperand(2);
4054 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4055
4056 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4057 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4058 return false;
4059 }
4060
4061 // Find the index to extract from.
4062 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4063 if (!VRegAndVal)
4064 return false;
4065 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4066
4067
4068 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4069 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4070 LaneIdx, MIB);
4071 if (!Extract)
4072 return false;
4073
4074 I.eraseFromParent();
4075 return true;
4076}
4077
4078bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4080 unsigned NumElts = I.getNumOperands() - 1;
4081 Register SrcReg = I.getOperand(NumElts).getReg();
4082 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4083 const LLT SrcTy = MRI.getType(SrcReg);
4084
4085 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4086 if (SrcTy.getSizeInBits() > 128) {
4087 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4088 return false;
4089 }
4090
4091 // We implement a split vector operation by treating the sub-vectors as
4092 // scalars and extracting them.
4093 const RegisterBank &DstRB =
4094 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4095 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4096 Register Dst = I.getOperand(OpIdx).getReg();
4097 MachineInstr *Extract =
4098 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4099 if (!Extract)
4100 return false;
4101 }
4102 I.eraseFromParent();
4103 return true;
4104}
4105
4106bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4108 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4109 "unexpected opcode");
4110
4111 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4112 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4113 AArch64::FPRRegBankID ||
4114 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4115 AArch64::FPRRegBankID) {
4116 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4117 "currently unsupported.\n");
4118 return false;
4119 }
4120
4121 // The last operand is the vector source register, and every other operand is
4122 // a register to unpack into.
4123 unsigned NumElts = I.getNumOperands() - 1;
4124 Register SrcReg = I.getOperand(NumElts).getReg();
4125 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4126 const LLT WideTy = MRI.getType(SrcReg);
4127 (void)WideTy;
4128 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4129 "can only unmerge from vector or s128 types!");
4130 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4131 "source register size too small!");
4132
4133 if (!NarrowTy.isScalar())
4134 return selectSplitVectorUnmerge(I, MRI);
4135
4136 // Choose a lane copy opcode and subregister based off of the size of the
4137 // vector's elements.
4138 unsigned CopyOpc = 0;
4139 unsigned ExtractSubReg = 0;
4140 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4141 return false;
4142
4143 // Set up for the lane copies.
4144 MachineBasicBlock &MBB = *I.getParent();
4145
4146 // Stores the registers we'll be copying from.
4147 SmallVector<Register, 4> InsertRegs;
4148
4149 // We'll use the first register twice, so we only need NumElts-1 registers.
4150 unsigned NumInsertRegs = NumElts - 1;
4151
4152 // If our elements fit into exactly 128 bits, then we can copy from the source
4153 // directly. Otherwise, we need to do a bit of setup with some subregister
4154 // inserts.
4155 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4156 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4157 } else {
4158 // No. We have to perform subregister inserts. For each insert, create an
4159 // implicit def and a subregister insert, and save the register we create.
4160 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4161 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4162 *RBI.getRegBank(SrcReg, MRI, TRI));
4163 unsigned SubReg = 0;
4164 bool Found = getSubRegForClass(RC, TRI, SubReg);
4165 (void)Found;
4166 assert(Found && "expected to find last operand's subeg idx");
4167 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4168 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4169 MachineInstr &ImpDefMI =
4170 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4171 ImpDefReg);
4172
4173 // Now, create the subregister insert from SrcReg.
4174 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4175 MachineInstr &InsMI =
4176 *BuildMI(MBB, I, I.getDebugLoc(),
4177 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4178 .addUse(ImpDefReg)
4179 .addUse(SrcReg)
4180 .addImm(SubReg);
4181
4182 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4184
4185 // Save the register so that we can copy from it after.
4186 InsertRegs.push_back(InsertReg);
4187 }
4188 }
4189
4190 // Now that we've created any necessary subregister inserts, we can
4191 // create the copies.
4192 //
4193 // Perform the first copy separately as a subregister copy.
4194 Register CopyTo = I.getOperand(0).getReg();
4195 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4196 .addReg(InsertRegs[0], 0, ExtractSubReg);
4197 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4198
4199 // Now, perform the remaining copies as vector lane copies.
4200 unsigned LaneIdx = 1;
4201 for (Register InsReg : InsertRegs) {
4202 Register CopyTo = I.getOperand(LaneIdx).getReg();
4203 MachineInstr &CopyInst =
4204 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4205 .addUse(InsReg)
4206 .addImm(LaneIdx);
4207 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4208 ++LaneIdx;
4209 }
4210
4211 // Separately constrain the first copy's destination. Because of the
4212 // limitation in constrainOperandRegClass, we can't guarantee that this will
4213 // actually be constrained. So, do it ourselves using the second operand.
4214 const TargetRegisterClass *RC =
4215 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4216 if (!RC) {
4217 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4218 return false;
4219 }
4220
4221 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4222 I.eraseFromParent();
4223 return true;
4224}
4225
4226bool AArch64InstructionSelector::selectConcatVectors(
4228 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4229 "Unexpected opcode");
4230 Register Dst = I.getOperand(0).getReg();
4231 Register Op1 = I.getOperand(1).getReg();
4232 Register Op2 = I.getOperand(2).getReg();
4233 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4234 if (!ConcatMI)
4235 return false;
4236 I.eraseFromParent();
4237 return true;
4238}
4239
4240unsigned
4241AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4242 MachineFunction &MF) const {
4243 Type *CPTy = CPVal->getType();
4244 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4245
4247 return MCP->getConstantPoolIndex(CPVal, Alignment);
4248}
4249
4250MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4251 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4252 const TargetRegisterClass *RC;
4253 unsigned Opc;
4254 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4255 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4256 switch (Size) {
4257 case 16:
4258 RC = &AArch64::FPR128RegClass;
4259 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4260 break;
4261 case 8:
4262 RC = &AArch64::FPR64RegClass;
4263 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4264 break;
4265 case 4:
4266 RC = &AArch64::FPR32RegClass;
4267 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4268 break;
4269 case 2:
4270 RC = &AArch64::FPR16RegClass;
4271 Opc = AArch64::LDRHui;
4272 break;
4273 default:
4274 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4275 << *CPVal->getType());
4276 return nullptr;
4277 }
4278
4279 MachineInstr *LoadMI = nullptr;
4280 auto &MF = MIRBuilder.getMF();
4281 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4282 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4283 // Use load(literal) for tiny code model.
4284 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4285 } else {
4286 auto Adrp =
4287 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4288 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4289
4290 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4291 .addConstantPoolIndex(
4293
4295 }
4296
4298 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4300 Size, Align(Size)));
4302 return LoadMI;
4303}
4304
4305/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4306/// size and RB.
4307static std::pair<unsigned, unsigned>
4308getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4309 unsigned Opc, SubregIdx;
4310 if (RB.getID() == AArch64::GPRRegBankID) {
4311 if (EltSize == 8) {
4312 Opc = AArch64::INSvi8gpr;
4313 SubregIdx = AArch64::bsub;
4314 } else if (EltSize == 16) {
4315 Opc = AArch64::INSvi16gpr;
4316 SubregIdx = AArch64::ssub;
4317 } else if (EltSize == 32) {
4318 Opc = AArch64::INSvi32gpr;
4319 SubregIdx = AArch64::ssub;
4320 } else if (EltSize == 64) {
4321 Opc = AArch64::INSvi64gpr;
4322 SubregIdx = AArch64::dsub;
4323 } else {
4324 llvm_unreachable("invalid elt size!");
4325 }
4326 } else {
4327 if (EltSize == 8) {
4328 Opc = AArch64::INSvi8lane;
4329 SubregIdx = AArch64::bsub;
4330 } else if (EltSize == 16) {
4331 Opc = AArch64::INSvi16lane;
4332 SubregIdx = AArch64::hsub;
4333 } else if (EltSize == 32) {
4334 Opc = AArch64::INSvi32lane;
4335 SubregIdx = AArch64::ssub;
4336 } else if (EltSize == 64) {
4337 Opc = AArch64::INSvi64lane;
4338 SubregIdx = AArch64::dsub;
4339 } else {
4340 llvm_unreachable("invalid elt size!");
4341 }
4342 }
4343 return std::make_pair(Opc, SubregIdx);
4344}
4345
4346MachineInstr *AArch64InstructionSelector::emitInstr(
4347 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4348 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4349 const ComplexRendererFns &RenderFns) const {
4350 assert(Opcode && "Expected an opcode?");
4351 assert(!isPreISelGenericOpcode(Opcode) &&
4352 "Function should only be used to produce selected instructions!");
4353 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4354 if (RenderFns)
4355 for (auto &Fn : *RenderFns)
4356 Fn(MI);
4358 return &*MI;
4359}
4360
4361MachineInstr *AArch64InstructionSelector::emitAddSub(
4362 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4363 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4364 MachineIRBuilder &MIRBuilder) const {
4365 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4366 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4367 auto Ty = MRI.getType(LHS.getReg());
4368 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4369 unsigned Size = Ty.getSizeInBits();
4370 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4371 bool Is32Bit = Size == 32;
4372
4373 // INSTRri form with positive arithmetic immediate.
4374 if (auto Fns = selectArithImmed(RHS))
4375 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4376 MIRBuilder, Fns);
4377
4378 // INSTRri form with negative arithmetic immediate.
4379 if (auto Fns = selectNegArithImmed(RHS))
4380 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4381 MIRBuilder, Fns);
4382
4383 // INSTRrx form.
4384 if (auto Fns = selectArithExtendedRegister(RHS))
4385 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4386 MIRBuilder, Fns);
4387
4388 // INSTRrs form.
4389 if (auto Fns = selectShiftedRegister(RHS))
4390 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4391 MIRBuilder, Fns);
4392 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4393 MIRBuilder);
4394}
4395
4397AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4398 MachineOperand &RHS,
4399 MachineIRBuilder &MIRBuilder) const {
4400 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4401 {{AArch64::ADDXri, AArch64::ADDWri},
4402 {AArch64::ADDXrs, AArch64::ADDWrs},
4403 {AArch64::ADDXrr, AArch64::ADDWrr},
4404 {AArch64::SUBXri, AArch64::SUBWri},
4405 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4406 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4407}
4408
4410AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4411 MachineOperand &RHS,
4412 MachineIRBuilder &MIRBuilder) const {
4413 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4414 {{AArch64::ADDSXri, AArch64::ADDSWri},
4415 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4416 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4417 {AArch64::SUBSXri, AArch64::SUBSWri},
4418 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4419 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4420}
4421
4423AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4424 MachineOperand &RHS,
4425 MachineIRBuilder &MIRBuilder) const {
4426 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4427 {{AArch64::SUBSXri, AArch64::SUBSWri},
4428 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4429 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4430 {AArch64::ADDSXri, AArch64::ADDSWri},
4431 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4432 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4433}
4434
4436AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4437 MachineOperand &RHS,
4438 MachineIRBuilder &MIRBuilder) const {
4439 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4440 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4441 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4442 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4443 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4444}
4445
4447AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4448 MachineOperand &RHS,
4449 MachineIRBuilder &MIRBuilder) const {
4450 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4451 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4452 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4453 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4454 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4455}
4456
4458AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4459 MachineIRBuilder &MIRBuilder) const {
4460 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4461 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4462 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4463 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4464}
4465
4467AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4468 MachineIRBuilder &MIRBuilder) const {
4469 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4470 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4471 LLT Ty = MRI.getType(LHS.getReg());
4472 unsigned RegSize = Ty.getSizeInBits();
4473 bool Is32Bit = (RegSize == 32);
4474 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4475 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4476 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4477 // ANDS needs a logical immediate for its immediate form. Check if we can
4478 // fold one in.
4479 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4480 int64_t Imm = ValAndVReg->Value.getSExtValue();
4481
4483 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4486 return &*TstMI;
4487 }
4488 }
4489
4490 if (auto Fns = selectLogicalShiftedRegister(RHS))
4491 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4492 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4493}
4494
4495MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4496 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4497 MachineIRBuilder &MIRBuilder) const {
4498 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4499 assert(Predicate.isPredicate() && "Expected predicate?");
4500 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4501 LLT CmpTy = MRI.getType(LHS.getReg());
4502 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4503 unsigned Size = CmpTy.getSizeInBits();
4504 (void)Size;
4505 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4506 // Fold the compare into a cmn or tst if possible.
4507 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4508 return FoldCmp;
4509 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4510 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4511}
4512
4513MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4514 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4515 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4516#ifndef NDEBUG
4517 LLT Ty = MRI.getType(Dst);
4518 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4519 "Expected a 32-bit scalar register?");
4520#endif
4521 const Register ZReg = AArch64::WZR;
4522 AArch64CC::CondCode CC1, CC2;
4523 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4524 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4525 if (CC2 == AArch64CC::AL)
4526 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4527 MIRBuilder);
4528 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4529 Register Def1Reg = MRI.createVirtualRegister(RC);
4530 Register Def2Reg = MRI.createVirtualRegister(RC);
4531 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4532 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4533 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4534 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4536 return &*OrMI;
4537}
4538
4539MachineInstr *AArch64InstructionSelector::emitFPCompare(
4540 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4541 std::optional<CmpInst::Predicate> Pred) const {
4542 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4543 LLT Ty = MRI.getType(LHS);
4544 if (Ty.isVector())
4545 return nullptr;
4546 unsigned OpSize = Ty.getSizeInBits();
4547 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4548
4549 // If this is a compare against +0.0, then we don't have
4550 // to explicitly materialize a constant.
4551 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4552 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4553
4554 auto IsEqualityPred = [](CmpInst::Predicate P) {
4555 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4557 };
4558 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4559 // Try commutating the operands.
4560 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4561 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4562 ShouldUseImm = true;
4563 std::swap(LHS, RHS);
4564 }
4565 }
4566 unsigned CmpOpcTbl[2][3] = {
4567 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4568 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4569 unsigned CmpOpc =
4570 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4571
4572 // Partially build the compare. Decide if we need to add a use for the
4573 // third operand based off whether or not we're comparing against 0.0.
4574 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4576 if (!ShouldUseImm)
4577 CmpMI.addUse(RHS);
4579 return &*CmpMI;
4580}
4581
4582MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4583 std::optional<Register> Dst, Register Op1, Register Op2,
4584 MachineIRBuilder &MIRBuilder) const {
4585 // We implement a vector concat by:
4586 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4587 // 2. Insert the upper vector into the destination's upper element
4588 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4589 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4590
4591 const LLT Op1Ty = MRI.getType(Op1);
4592 const LLT Op2Ty = MRI.getType(Op2);
4593
4594 if (Op1Ty != Op2Ty) {
4595 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4596 return nullptr;
4597 }
4598 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4599
4600 if (Op1Ty.getSizeInBits() >= 128) {
4601 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4602 return nullptr;
4603 }
4604
4605 // At the moment we just support 64 bit vector concats.
4606 if (Op1Ty.getSizeInBits() != 64) {
4607 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4608 return nullptr;
4609 }
4610
4611 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4612 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4613 const TargetRegisterClass *DstRC =
4614 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4615
4616 MachineInstr *WidenedOp1 =
4617 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4618 MachineInstr *WidenedOp2 =
4619 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4620 if (!WidenedOp1 || !WidenedOp2) {
4621 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4622 return nullptr;
4623 }
4624
4625 // Now do the insert of the upper element.
4626 unsigned InsertOpc, InsSubRegIdx;
4627 std::tie(InsertOpc, InsSubRegIdx) =
4628 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4629
4630 if (!Dst)
4631 Dst = MRI.createVirtualRegister(DstRC);
4632 auto InsElt =
4633 MIRBuilder
4634 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4635 .addImm(1) /* Lane index */
4636 .addUse(WidenedOp2->getOperand(0).getReg())
4637 .addImm(0);
4639 return &*InsElt;
4640}
4641
4643AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4644 Register Src2, AArch64CC::CondCode Pred,
4645 MachineIRBuilder &MIRBuilder) const {
4646 auto &MRI = *MIRBuilder.getMRI();
4647 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4648 // If we used a register class, then this won't necessarily have an LLT.
4649 // Compute the size based off whether or not we have a class or bank.
4650 unsigned Size;
4651 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4652 Size = TRI.getRegSizeInBits(*RC);
4653 else
4654 Size = MRI.getType(Dst).getSizeInBits();
4655 // Some opcodes use s1.
4656 assert(Size <= 64 && "Expected 64 bits or less only!");
4657 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4658 unsigned Opc = OpcTable[Size == 64];
4659 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4661 return &*CSINC;
4662}
4663
4664MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4665 Register CarryReg) {
4667 unsigned Opcode = I.getOpcode();
4668
4669 // If the instruction is a SUB, we need to negate the carry,
4670 // because borrowing is indicated by carry-flag == 0.
4671 bool NeedsNegatedCarry =
4672 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4673
4674 // If the previous instruction will already produce the correct carry, do not
4675 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4676 // generated during legalization of wide add/sub. This optimization depends on
4677 // these sequences not being interrupted by other instructions.
4678 // We have to select the previous instruction before the carry-using
4679 // instruction is deleted by the calling function, otherwise the previous
4680 // instruction might become dead and would get deleted.
4681 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4682 if (SrcMI == I.getPrevNode()) {
4683 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4684 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4685 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4686 CarrySrcMI->isUnsigned() &&
4687 CarrySrcMI->getCarryOutReg() == CarryReg &&
4688 selectAndRestoreState(*SrcMI))
4689 return nullptr;
4690 }
4691 }
4692
4693 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4694
4695 if (NeedsNegatedCarry) {
4696 // (0 - Carry) sets !C in NZCV when Carry == 1
4697 Register ZReg = AArch64::WZR;
4698 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4699 }
4700
4701 // (Carry - 1) sets !C in NZCV when Carry == 0
4702 auto Fns = select12BitValueWithLeftShift(1);
4703 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4704}
4705
4706bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4708 auto &CarryMI = cast<GAddSubCarryOut>(I);
4709
4710 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4711 // Set NZCV carry according to carry-in VReg
4712 emitCarryIn(I, CarryInMI->getCarryInReg());
4713 }
4714
4715 // Emit the operation and get the correct condition code.
4716 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4717 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4718
4719 Register CarryOutReg = CarryMI.getCarryOutReg();
4720
4721 // Don't convert carry-out to VReg if it is never used
4722 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4723 // Now, put the overflow result in the register given by the first operand
4724 // to the overflow op. CSINC increments the result when the predicate is
4725 // false, so to get the increment when it's true, we need to use the
4726 // inverse. In this case, we want to increment when carry is set.
4727 Register ZReg = AArch64::WZR;
4728 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4729 getInvertedCondCode(OpAndCC.second), MIB);
4730 }
4731
4732 I.eraseFromParent();
4733 return true;
4734}
4735
4736std::pair<MachineInstr *, AArch64CC::CondCode>
4737AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4738 MachineOperand &LHS,
4739 MachineOperand &RHS,
4740 MachineIRBuilder &MIRBuilder) const {
4741 switch (Opcode) {
4742 default:
4743 llvm_unreachable("Unexpected opcode!");
4744 case TargetOpcode::G_SADDO:
4745 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4746 case TargetOpcode::G_UADDO:
4747 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4748 case TargetOpcode::G_SSUBO:
4749 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4750 case TargetOpcode::G_USUBO:
4751 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4752 case TargetOpcode::G_SADDE:
4753 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4754 case TargetOpcode::G_UADDE:
4755 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4756 case TargetOpcode::G_SSUBE:
4757 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4758 case TargetOpcode::G_USUBE:
4759 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4760 }
4761}
4762
4763/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4764/// expressed as a conjunction.
4765/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4766/// changing the conditions on the CMP tests.
4767/// (this means we can call emitConjunctionRec() with
4768/// Negate==true on this sub-tree)
4769/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4770/// cannot do the negation naturally. We are required to
4771/// emit the subtree first in this case.
4772/// \param WillNegate Is true if are called when the result of this
4773/// subexpression must be negated. This happens when the
4774/// outer expression is an OR. We can use this fact to know
4775/// that we have a double negation (or (or ...) ...) that
4776/// can be implemented for free.
4777static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4778 bool WillNegate, MachineRegisterInfo &MRI,
4779 unsigned Depth = 0) {
4780 if (!MRI.hasOneNonDBGUse(Val))
4781 return false;
4782 MachineInstr *ValDef = MRI.getVRegDef(Val);
4783 unsigned Opcode = ValDef->getOpcode();
4784 if (isa<GAnyCmp>(ValDef)) {
4785 CanNegate = true;
4786 MustBeFirst = false;
4787 return true;
4788 }
4789 // Protect against exponential runtime and stack overflow.
4790 if (Depth > 6)
4791 return false;
4792 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4793 bool IsOR = Opcode == TargetOpcode::G_OR;
4794 Register O0 = ValDef->getOperand(1).getReg();
4795 Register O1 = ValDef->getOperand(2).getReg();
4796 bool CanNegateL;
4797 bool MustBeFirstL;
4798 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4799 return false;
4800 bool CanNegateR;
4801 bool MustBeFirstR;
4802 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4803 return false;
4804
4805 if (MustBeFirstL && MustBeFirstR)
4806 return false;
4807
4808 if (IsOR) {
4809 // For an OR expression we need to be able to naturally negate at least
4810 // one side or we cannot do the transformation at all.
4811 if (!CanNegateL && !CanNegateR)
4812 return false;
4813 // If we the result of the OR will be negated and we can naturally negate
4814 // the leaves, then this sub-tree as a whole negates naturally.
4815 CanNegate = WillNegate && CanNegateL && CanNegateR;
4816 // If we cannot naturally negate the whole sub-tree, then this must be
4817 // emitted first.
4818 MustBeFirst = !CanNegate;
4819 } else {
4820 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4821 // We cannot naturally negate an AND operation.
4822 CanNegate = false;
4823 MustBeFirst = MustBeFirstL || MustBeFirstR;
4824 }
4825 return true;
4826 }
4827 return false;
4828}
4829
4830MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4833 MachineIRBuilder &MIB) const {
4834 // TODO: emit CMN as an optimization.
4835 auto &MRI = *MIB.getMRI();
4836 LLT OpTy = MRI.getType(LHS);
4837 unsigned CCmpOpc;
4838 std::optional<ValueAndVReg> C;
4840 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4842 if (C && C->Value.ult(32))
4843 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4844 else
4845 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4846 } else {
4847 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4848 OpTy.getSizeInBits() == 64);
4849 switch (OpTy.getSizeInBits()) {
4850 case 16:
4851 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4852 CCmpOpc = AArch64::FCCMPHrr;
4853 break;
4854 case 32:
4855 CCmpOpc = AArch64::FCCMPSrr;
4856 break;
4857 case 64:
4858 CCmpOpc = AArch64::FCCMPDrr;
4859 break;
4860 default:
4861 return nullptr;
4862 }
4863 }
4865 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4866 auto CCmp =
4867 MIB.buildInstr(CCmpOpc, {}, {LHS});
4868 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4869 CCmp.addImm(C->Value.getZExtValue());
4870 else
4871 CCmp.addReg(RHS);
4872 CCmp.addImm(NZCV).addImm(Predicate);
4874 return &*CCmp;
4875}
4876
4877MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4878 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4879 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4880 // We're at a tree leaf, produce a conditional comparison operation.
4881 auto &MRI = *MIB.getMRI();
4882 MachineInstr *ValDef = MRI.getVRegDef(Val);
4883 unsigned Opcode = ValDef->getOpcode();
4884 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4885 Register LHS = Cmp->getLHSReg();
4886 Register RHS = Cmp->getRHSReg();
4887 CmpInst::Predicate CC = Cmp->getCond();
4888 if (Negate)
4890 if (isa<GICmp>(Cmp)) {
4892 } else {
4893 // Handle special FP cases.
4894 AArch64CC::CondCode ExtraCC;
4895 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4896 // Some floating point conditions can't be tested with a single condition
4897 // code. Construct an additional comparison in this case.
4898 if (ExtraCC != AArch64CC::AL) {
4899 MachineInstr *ExtraCmp;
4900 if (!CCOp)
4901 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4902 else
4903 ExtraCmp =
4904 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4905 CCOp = ExtraCmp->getOperand(0).getReg();
4906 Predicate = ExtraCC;
4907 }
4908 }
4909
4910 // Produce a normal comparison if we are first in the chain
4911 if (!CCOp) {
4912 auto Dst = MRI.cloneVirtualRegister(LHS);
4913 if (isa<GICmp>(Cmp))
4914 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4915 return emitFPCompare(Cmp->getOperand(2).getReg(),
4916 Cmp->getOperand(3).getReg(), MIB);
4917 }
4918 // Otherwise produce a ccmp.
4919 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4920 }
4921 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4922
4923 bool IsOR = Opcode == TargetOpcode::G_OR;
4924
4925 Register LHS = ValDef->getOperand(1).getReg();
4926 bool CanNegateL;
4927 bool MustBeFirstL;
4928 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4929 assert(ValidL && "Valid conjunction/disjunction tree");
4930 (void)ValidL;
4931
4932 Register RHS = ValDef->getOperand(2).getReg();
4933 bool CanNegateR;
4934 bool MustBeFirstR;
4935 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4936 assert(ValidR && "Valid conjunction/disjunction tree");
4937 (void)ValidR;
4938
4939 // Swap sub-tree that must come first to the right side.
4940 if (MustBeFirstL) {
4941 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4942 std::swap(LHS, RHS);
4943 std::swap(CanNegateL, CanNegateR);
4944 std::swap(MustBeFirstL, MustBeFirstR);
4945 }
4946
4947 bool NegateR;
4948 bool NegateAfterR;
4949 bool NegateL;
4950 bool NegateAfterAll;
4951 if (Opcode == TargetOpcode::G_OR) {
4952 // Swap the sub-tree that we can negate naturally to the left.
4953 if (!CanNegateL) {
4954 assert(CanNegateR && "at least one side must be negatable");
4955 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4956 assert(!Negate);
4957 std::swap(LHS, RHS);
4958 NegateR = false;
4959 NegateAfterR = true;
4960 } else {
4961 // Negate the left sub-tree if possible, otherwise negate the result.
4962 NegateR = CanNegateR;
4963 NegateAfterR = !CanNegateR;
4964 }
4965 NegateL = true;
4966 NegateAfterAll = !Negate;
4967 } else {
4968 assert(Opcode == TargetOpcode::G_AND &&
4969 "Valid conjunction/disjunction tree");
4970 assert(!Negate && "Valid conjunction/disjunction tree");
4971
4972 NegateL = false;
4973 NegateR = false;
4974 NegateAfterR = false;
4975 NegateAfterAll = false;
4976 }
4977
4978 // Emit sub-trees.
4979 AArch64CC::CondCode RHSCC;
4980 MachineInstr *CmpR =
4981 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4982 if (NegateAfterR)
4983 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4985 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4986 if (NegateAfterAll)
4987 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4988 return CmpL;
4989}
4990
4991MachineInstr *AArch64InstructionSelector::emitConjunction(
4992 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4993 bool DummyCanNegate;
4994 bool DummyMustBeFirst;
4995 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4996 *MIB.getMRI()))
4997 return nullptr;
4998 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4999}
5000
5001bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5002 MachineInstr &CondMI) {
5003 AArch64CC::CondCode AArch64CC;
5004 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5005 if (!ConjMI)
5006 return false;
5007
5008 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5009 SelI.eraseFromParent();
5010 return true;
5011}
5012
5013bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5014 MachineRegisterInfo &MRI = *MIB.getMRI();
5015 // We want to recognize this pattern:
5016 //
5017 // $z = G_FCMP pred, $x, $y
5018 // ...
5019 // $w = G_SELECT $z, $a, $b
5020 //
5021 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5022 // some copies/truncs in between.)
5023 //
5024 // If we see this, then we can emit something like this:
5025 //
5026 // fcmp $x, $y
5027 // fcsel $w, $a, $b, pred
5028 //
5029 // Rather than emitting both of the rather long sequences in the standard
5030 // G_FCMP/G_SELECT select methods.
5031
5032 // First, check if the condition is defined by a compare.
5033 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5034
5035 // We can only fold if all of the defs have one use.
5036 Register CondDefReg = CondDef->getOperand(0).getReg();
5037 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5038 // Unless it's another select.
5039 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5040 if (CondDef == &UI)
5041 continue;
5042 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5043 return false;
5044 }
5045 }
5046
5047 // Is the condition defined by a compare?
5048 unsigned CondOpc = CondDef->getOpcode();
5049 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5050 if (tryOptSelectConjunction(I, *CondDef))
5051 return true;
5052 return false;
5053 }
5054
5056 if (CondOpc == TargetOpcode::G_ICMP) {
5057 auto Pred =
5058 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5060 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5061 CondDef->getOperand(1), MIB);
5062 } else {
5063 // Get the condition code for the select.
5064 auto Pred =
5065 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5066 AArch64CC::CondCode CondCode2;
5067 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5068
5069 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5070 // instructions to emit the comparison.
5071 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5072 // unnecessary.
5073 if (CondCode2 != AArch64CC::AL)
5074 return false;
5075
5076 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5077 CondDef->getOperand(3).getReg(), MIB)) {
5078 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5079 return false;
5080 }
5081 }
5082
5083 // Emit the select.
5084 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5085 I.getOperand(3).getReg(), CondCode, MIB);
5086 I.eraseFromParent();
5087 return true;
5088}
5089
5090MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5091 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5092 MachineIRBuilder &MIRBuilder) const {
5093 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5094 "Unexpected MachineOperand");
5095 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5096 // We want to find this sort of thing:
5097 // x = G_SUB 0, y
5098 // G_ICMP z, x
5099 //
5100 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5101 // e.g:
5102 //
5103 // cmn z, y
5104
5105 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5106 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5107 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5108 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5109 // Given this:
5110 //
5111 // x = G_SUB 0, y
5112 // G_ICMP x, z
5113 //
5114 // Produce this:
5115 //
5116 // cmn y, z
5117 if (isCMN(LHSDef, P, MRI))
5118 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5119
5120 // Same idea here, but with the RHS of the compare instead:
5121 //
5122 // Given this:
5123 //
5124 // x = G_SUB 0, y
5125 // G_ICMP z, x
5126 //
5127 // Produce this:
5128 //
5129 // cmn z, y
5130 if (isCMN(RHSDef, P, MRI))
5131 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5132
5133 // Given this:
5134 //
5135 // z = G_AND x, y
5136 // G_ICMP z, 0
5137 //
5138 // Produce this if the compare is signed:
5139 //
5140 // tst x, y
5141 if (!CmpInst::isUnsigned(P) && LHSDef &&
5142 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5143 // Make sure that the RHS is 0.
5144 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5145 if (!ValAndVReg || ValAndVReg->Value != 0)
5146 return nullptr;
5147
5148 return emitTST(LHSDef->getOperand(1),
5149 LHSDef->getOperand(2), MIRBuilder);
5150 }
5151
5152 return nullptr;
5153}
5154
5155bool AArch64InstructionSelector::selectShuffleVector(
5157 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5158 Register Src1Reg = I.getOperand(1).getReg();
5159 const LLT Src1Ty = MRI.getType(Src1Reg);
5160 Register Src2Reg = I.getOperand(2).getReg();
5161 const LLT Src2Ty = MRI.getType(Src2Reg);
5162 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5163
5164 MachineBasicBlock &MBB = *I.getParent();
5165 MachineFunction &MF = *MBB.getParent();
5166 LLVMContext &Ctx = MF.getFunction().getContext();
5167
5168 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5169 // it's originated from a <1 x T> type. Those should have been lowered into
5170 // G_BUILD_VECTOR earlier.
5171 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5172 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5173 return false;
5174 }
5175
5176 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5177
5179 for (int Val : Mask) {
5180 // For now, any undef indexes we'll just assume to be 0. This should be
5181 // optimized in future, e.g. to select DUP etc.
5182 Val = Val < 0 ? 0 : Val;
5183 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5184 unsigned Offset = Byte + Val * BytesPerElt;
5185 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5186 }
5187 }
5188
5189 // Use a constant pool to load the index vector for TBL.
5190 Constant *CPVal = ConstantVector::get(CstIdxs);
5191 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5192 if (!IndexLoad) {
5193 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5194 return false;
5195 }
5196
5197 if (DstTy.getSizeInBits() != 128) {
5198 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5199 // This case can be done with TBL1.
5201 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5202 if (!Concat) {
5203 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5204 return false;
5205 }
5206
5207 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5208 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5209 IndexLoad->getOperand(0).getReg(), MIB);
5210
5211 auto TBL1 = MIB.buildInstr(
5212 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5213 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5215
5216 auto Copy =
5217 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5218 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5219 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5220 I.eraseFromParent();
5221 return true;
5222 }
5223
5224 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5225 // Q registers for regalloc.
5226 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5227 auto RegSeq = createQTuple(Regs, MIB);
5228 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5229 {RegSeq, IndexLoad->getOperand(0)});
5231 I.eraseFromParent();
5232 return true;
5233}
5234
5235MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5236 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5237 unsigned LaneIdx, const RegisterBank &RB,
5238 MachineIRBuilder &MIRBuilder) const {
5239 MachineInstr *InsElt = nullptr;
5240 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5241 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5242
5243 // Create a register to define with the insert if one wasn't passed in.
5244 if (!DstReg)
5245 DstReg = MRI.createVirtualRegister(DstRC);
5246
5247 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5248 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5249
5250 if (RB.getID() == AArch64::FPRRegBankID) {
5251 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5252 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5253 .addImm(LaneIdx)
5254 .addUse(InsSub->getOperand(0).getReg())
5255 .addImm(0);
5256 } else {
5257 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5258 .addImm(LaneIdx)
5259 .addUse(EltReg);
5260 }
5261
5263 return InsElt;
5264}
5265
5266bool AArch64InstructionSelector::selectUSMovFromExtend(
5268 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5269 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5270 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5271 return false;
5272 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5273 const Register DefReg = MI.getOperand(0).getReg();
5274 const LLT DstTy = MRI.getType(DefReg);
5275 unsigned DstSize = DstTy.getSizeInBits();
5276
5277 if (DstSize != 32 && DstSize != 64)
5278 return false;
5279
5280 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5281 MI.getOperand(1).getReg(), MRI);
5282 int64_t Lane;
5283 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5284 return false;
5285 Register Src0 = Extract->getOperand(1).getReg();
5286
5287 const LLT &VecTy = MRI.getType(Src0);
5288
5289 if (VecTy.getSizeInBits() != 128) {
5290 const MachineInstr *ScalarToVector = emitScalarToVector(
5291 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5292 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5293 Src0 = ScalarToVector->getOperand(0).getReg();
5294 }
5295
5296 unsigned Opcode;
5297 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5298 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5299 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5300 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5301 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5302 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5303 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5304 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5305 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5306 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5307 else
5308 llvm_unreachable("Unexpected type combo for S/UMov!");
5309
5310 // We may need to generate one of these, depending on the type and sign of the
5311 // input:
5312 // DstReg = SMOV Src0, Lane;
5313 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5314 MachineInstr *ExtI = nullptr;
5315 if (DstSize == 64 && !IsSigned) {
5316 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5317 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5318 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5319 .addImm(0)
5320 .addUse(NewReg)
5321 .addImm(AArch64::sub_32);
5322 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5323 } else
5324 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5325
5327 MI.eraseFromParent();
5328 return true;
5329}
5330
5331bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5333 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
5334
5335 // Get information on the destination.
5336 Register DstReg = I.getOperand(0).getReg();
5337 const LLT DstTy = MRI.getType(DstReg);
5338 unsigned VecSize = DstTy.getSizeInBits();
5339
5340 // Get information on the element we want to insert into the destination.
5341 Register EltReg = I.getOperand(2).getReg();
5342 const LLT EltTy = MRI.getType(EltReg);
5343 unsigned EltSize = EltTy.getSizeInBits();
5344 if (EltSize < 8 || EltSize > 64)
5345 return false;
5346
5347 // Find the definition of the index. Bail out if it's not defined by a
5348 // G_CONSTANT.
5349 Register IdxReg = I.getOperand(3).getReg();
5350 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5351 if (!VRegAndVal)
5352 return false;
5353 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5354
5355 // Perform the lane insert.
5356 Register SrcReg = I.getOperand(1).getReg();
5357 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5358
5359 if (VecSize < 128) {
5360 // If the vector we're inserting into is smaller than 128 bits, widen it
5361 // to 128 to do the insert.
5362 MachineInstr *ScalarToVec =
5363 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5364 if (!ScalarToVec)
5365 return false;
5366 SrcReg = ScalarToVec->getOperand(0).getReg();
5367 }
5368
5369 // Create an insert into a new FPR128 register.
5370 // Note that if our vector is already 128 bits, we end up emitting an extra
5371 // register.
5372 MachineInstr *InsMI =
5373 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5374
5375 if (VecSize < 128) {
5376 // If we had to widen to perform the insert, then we have to demote back to
5377 // the original size to get the result we want.
5378 if (!emitNarrowVector(DstReg, InsMI->getOperand(0).getReg(), MIB, MRI))
5379 return false;
5380 } else {
5381 // No widening needed.
5382 InsMI->getOperand(0).setReg(DstReg);
5384 }
5385
5386 I.eraseFromParent();
5387 return true;
5388}
5389
5390MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5391 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5392 unsigned int Op;
5393 if (DstSize == 128) {
5394 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5395 return nullptr;
5396 Op = AArch64::MOVIv16b_ns;
5397 } else {
5398 Op = AArch64::MOVIv8b_ns;
5399 }
5400
5401 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5402
5405 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5407 return &*Mov;
5408 }
5409 return nullptr;
5410}
5411
5412MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5413 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5414 bool Inv) {
5415
5416 unsigned int Op;
5417 if (DstSize == 128) {
5418 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5419 return nullptr;
5420 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5421 } else {
5422 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5423 }
5424
5425 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5426 uint64_t Shift;
5427
5430 Shift = 0;
5431 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5433 Shift = 8;
5434 } else
5435 return nullptr;
5436
5437 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5439 return &*Mov;
5440}
5441
5442MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5443 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5444 bool Inv) {
5445
5446 unsigned int Op;
5447 if (DstSize == 128) {
5448 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5449 return nullptr;
5450 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5451 } else {
5452 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5453 }
5454
5455 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5456 uint64_t Shift;
5457
5460 Shift = 0;
5461 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5463 Shift = 8;
5464 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5466 Shift = 16;
5467 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5469 Shift = 24;
5470 } else
5471 return nullptr;
5472
5473 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5475 return &*Mov;
5476}
5477
5478MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5479 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5480
5481 unsigned int Op;
5482 if (DstSize == 128) {
5483 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5484 return nullptr;
5485 Op = AArch64::MOVIv2d_ns;
5486 } else {
5487 Op = AArch64::MOVID;
5488 }
5489
5490 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5493 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5495 return &*Mov;
5496 }
5497 return nullptr;
5498}
5499
5500MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5501 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5502 bool Inv) {
5503
5504 unsigned int Op;
5505 if (DstSize == 128) {
5506 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5507 return nullptr;
5508 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5509 } else {
5510 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5511 }
5512
5513 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5514 uint64_t Shift;
5515
5518 Shift = 264;
5519 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5521 Shift = 272;
5522 } else
5523 return nullptr;
5524
5525 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5527 return &*Mov;
5528}
5529
5530MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5531 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5532
5533 unsigned int Op;
5534 bool IsWide = false;
5535 if (DstSize == 128) {
5536 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5537 return nullptr;
5538 Op = AArch64::FMOVv4f32_ns;
5539 IsWide = true;
5540 } else {
5541 Op = AArch64::FMOVv2f32_ns;
5542 }
5543
5544 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5545
5548 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5550 Op = AArch64::FMOVv2f64_ns;
5551 } else
5552 return nullptr;
5553
5554 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5556 return &*Mov;
5557}
5558
5559bool AArch64InstructionSelector::selectIndexedExtLoad(
5561 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5562 Register Dst = ExtLd.getDstReg();
5563 Register WriteBack = ExtLd.getWritebackReg();
5564 Register Base = ExtLd.getBaseReg();
5565 Register Offset = ExtLd.getOffsetReg();
5566 LLT Ty = MRI.getType(Dst);
5567 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5568 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5569 bool IsPre = ExtLd.isPre();
5570 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5571 bool InsertIntoXReg = false;
5572 bool IsDst64 = Ty.getSizeInBits() == 64;
5573
5574 unsigned Opc = 0;
5575 LLT NewLdDstTy;
5576 LLT s32 = LLT::scalar(32);
5577 LLT s64 = LLT::scalar(64);
5578
5579 if (MemSizeBits == 8) {
5580 if (IsSExt) {
5581 if (IsDst64)
5582 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5583 else
5584 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5585 NewLdDstTy = IsDst64 ? s64 : s32;
5586 } else {
5587 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5588 InsertIntoXReg = IsDst64;
5589 NewLdDstTy = s32;
5590 }
5591 } else if (MemSizeBits == 16) {
5592 if (IsSExt) {
5593 if (IsDst64)
5594 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5595 else
5596 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5597 NewLdDstTy = IsDst64 ? s64 : s32;
5598 } else {
5599 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5600 InsertIntoXReg = IsDst64;
5601 NewLdDstTy = s32;
5602 }
5603 } else if (MemSizeBits == 32) {
5604 if (IsSExt) {
5605 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5606 NewLdDstTy = s64;
5607 } else {
5608 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5609 InsertIntoXReg = IsDst64;
5610 NewLdDstTy = s32;
5611 }
5612 } else {
5613 llvm_unreachable("Unexpected size for indexed load");
5614 }
5615
5616 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5617 return false; // We should be on gpr.
5618
5619 auto Cst = getIConstantVRegVal(Offset, MRI);
5620 if (!Cst)
5621 return false; // Shouldn't happen, but just in case.
5622
5623 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5624 .addImm(Cst->getSExtValue());
5625 LdMI.cloneMemRefs(ExtLd);
5627 // Make sure to select the load with the MemTy as the dest type, and then
5628 // insert into X reg if needed.
5629 if (InsertIntoXReg) {
5630 // Generate a SUBREG_TO_REG.
5631 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5632 .addImm(0)
5633 .addUse(LdMI.getReg(1))
5634 .addImm(AArch64::sub_32);
5635 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5636 MRI);
5637 } else {
5638 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5639 selectCopy(*Copy, TII, MRI, TRI, RBI);
5640 }
5641 MI.eraseFromParent();
5642
5643 return true;
5644}
5645
5646bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5648 auto &Ld = cast<GIndexedLoad>(MI);
5649 Register Dst = Ld.getDstReg();
5650 Register WriteBack = Ld.getWritebackReg();
5651 Register Base = Ld.getBaseReg();
5652 Register Offset = Ld.getOffsetReg();
5653 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5654 "Unexpected type for indexed load");
5655 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5656
5657 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5658 return selectIndexedExtLoad(MI, MRI);
5659
5660 unsigned Opc = 0;
5661 if (Ld.isPre()) {
5662 static constexpr unsigned GPROpcodes[] = {
5663 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5664 AArch64::LDRXpre};
5665 static constexpr unsigned FPROpcodes[] = {
5666 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5667 AArch64::LDRQpre};
5668 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5669 Opc = FPROpcodes[Log2_32(MemSize)];
5670 else
5671 Opc = GPROpcodes[Log2_32(MemSize)];
5672 } else {
5673 static constexpr unsigned GPROpcodes[] = {
5674 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5675 AArch64::LDRXpost};
5676 static constexpr unsigned FPROpcodes[] = {
5677 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5678 AArch64::LDRDpost, AArch64::LDRQpost};
5679 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5680 Opc = FPROpcodes[Log2_32(MemSize)];
5681 else
5682 Opc = GPROpcodes[Log2_32(MemSize)];
5683 }
5684 auto Cst = getIConstantVRegVal(Offset, MRI);
5685 if (!Cst)
5686 return false; // Shouldn't happen, but just in case.
5687 auto LdMI =
5688 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5689 LdMI.cloneMemRefs(Ld);
5691 MI.eraseFromParent();
5692 return true;
5693}
5694
5695bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5697 Register Dst = I.getWritebackReg();
5698 Register Val = I.getValueReg();
5699 Register Base = I.getBaseReg();
5700 Register Offset = I.getOffsetReg();
5701 LLT ValTy = MRI.getType(Val);
5702 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5703
5704 unsigned Opc = 0;
5705 if (I.isPre()) {
5706 static constexpr unsigned GPROpcodes[] = {
5707 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5708 AArch64::STRXpre};
5709 static constexpr unsigned FPROpcodes[] = {
5710 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5711 AArch64::STRQpre};
5712
5713 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5714 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5715 else
5716 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5717 } else {
5718 static constexpr unsigned GPROpcodes[] = {
5719 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5720 AArch64::STRXpost};
5721 static constexpr unsigned FPROpcodes[] = {
5722 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5723 AArch64::STRDpost, AArch64::STRQpost};
5724
5725 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5726 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5727 else
5728 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5729 }
5730
5731 auto Cst = getIConstantVRegVal(Offset, MRI);
5732 if (!Cst)
5733 return false; // Shouldn't happen, but just in case.
5734 auto Str =
5735 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5736 Str.cloneMemRefs(I);
5738 I.eraseFromParent();
5739 return true;
5740}
5741
5743AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5744 MachineIRBuilder &MIRBuilder,
5746 LLT DstTy = MRI.getType(Dst);
5747 unsigned DstSize = DstTy.getSizeInBits();
5748 if (CV->isNullValue()) {
5749 if (DstSize == 128) {
5750 auto Mov =
5751 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5753 return &*Mov;
5754 }
5755
5756 if (DstSize == 64) {
5757 auto Mov =
5758 MIRBuilder
5759 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5760 .addImm(0);
5761 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5762 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5763 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5764 return &*Copy;
5765 }
5766 }
5767
5768 if (CV->getSplatValue()) {
5769 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5770 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5771 MachineInstr *NewOp;
5772 bool Inv = false;
5773 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5774 (NewOp =
5775 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5776 (NewOp =
5777 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5778 (NewOp =
5779 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5780 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5781 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5782 return NewOp;
5783
5784 DefBits = ~DefBits;
5785 Inv = true;
5786 if ((NewOp =
5787 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5788 (NewOp =
5789 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5790 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5791 return NewOp;
5792 return nullptr;
5793 };
5794
5795 if (auto *NewOp = TryMOVIWithBits(DefBits))
5796 return NewOp;
5797
5798 // See if a fneg of the constant can be materialized with a MOVI, etc
5799 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5800 unsigned NegOpc) -> MachineInstr * {
5801 // FNegate each sub-element of the constant
5802 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5803 APInt NegBits(DstSize, 0);
5804 unsigned NumElts = DstSize / NumBits;
5805 for (unsigned i = 0; i < NumElts; i++)
5806 NegBits |= Neg << (NumBits * i);
5807 NegBits = DefBits ^ NegBits;
5808
5809 // Try to create the new constants with MOVI, and if so generate a fneg
5810 // for it.
5811 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5812 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5813 NewOp->getOperand(0).setReg(NewDst);
5814 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5815 }
5816 return nullptr;
5817 };
5818 MachineInstr *R;
5819 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5820 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5821 (STI.hasFullFP16() &&
5822 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5823 return R;
5824 }
5825
5826 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5827 if (!CPLoad) {
5828 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5829 return nullptr;
5830 }
5831
5832 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5833 RBI.constrainGenericRegister(
5834 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5835 return &*Copy;
5836}
5837
5838bool AArch64InstructionSelector::tryOptConstantBuildVec(
5840 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5841 unsigned DstSize = DstTy.getSizeInBits();
5842 assert(DstSize <= 128 && "Unexpected build_vec type!");
5843 if (DstSize < 32)
5844 return false;
5845 // Check if we're building a constant vector, in which case we want to
5846 // generate a constant pool load instead of a vector insert sequence.
5848 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5849 // Try to find G_CONSTANT or G_FCONSTANT
5850 auto *OpMI =
5851 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5852 if (OpMI)
5853 Csts.emplace_back(
5854 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5855 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5856 I.getOperand(Idx).getReg(), MRI)))
5857 Csts.emplace_back(
5858 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5859 else
5860 return false;
5861 }
5862 Constant *CV = ConstantVector::get(Csts);
5863 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5864 return false;
5865 I.eraseFromParent();
5866 return true;
5867}
5868
5869bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5871 // Given:
5872 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5873 //
5874 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5875 Register Dst = I.getOperand(0).getReg();
5876 Register EltReg = I.getOperand(1).getReg();
5877 LLT EltTy = MRI.getType(EltReg);
5878 // If the index isn't on the same bank as its elements, then this can't be a
5879 // SUBREG_TO_REG.
5880 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5881 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5882 if (EltRB != DstRB)
5883 return false;
5884 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5885 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5886 }))
5887 return false;
5888 unsigned SubReg;
5889 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5890 if (!EltRC)
5891 return false;
5892 const TargetRegisterClass *DstRC =
5893 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5894 if (!DstRC)
5895 return false;
5896 if (!getSubRegForClass(EltRC, TRI, SubReg))
5897 return false;
5898 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5899 .addImm(0)
5900 .addUse(EltReg)
5901 .addImm(SubReg);
5902 I.eraseFromParent();
5903 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5904 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5905}
5906
5907bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5909 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5910 // Until we port more of the optimized selections, for now just use a vector
5911 // insert sequence.
5912 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5913 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5914 unsigned EltSize = EltTy.getSizeInBits();
5915
5916 if (tryOptConstantBuildVec(I, DstTy, MRI))
5917 return true;
5918 if (tryOptBuildVecToSubregToReg(I, MRI))
5919 return true;
5920
5921 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5922 return false; // Don't support all element types yet.
5923 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5924
5925 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5926 MachineInstr *ScalarToVec =
5927 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5928 I.getOperand(1).getReg(), MIB);
5929 if (!ScalarToVec)
5930 return false;
5931
5932 Register DstVec = ScalarToVec->getOperand(0).getReg();
5933 unsigned DstSize = DstTy.getSizeInBits();
5934
5935 // Keep track of the last MI we inserted. Later on, we might be able to save
5936 // a copy using it.
5937 MachineInstr *PrevMI = ScalarToVec;
5938 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5939 // Note that if we don't do a subregister copy, we can end up making an
5940 // extra register.
5941 Register OpReg = I.getOperand(i).getReg();
5942 // Do not emit inserts for undefs
5943 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5944 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5945 DstVec = PrevMI->getOperand(0).getReg();
5946 }
5947 }
5948
5949 // If DstTy's size in bits is less than 128, then emit a subregister copy
5950 // from DstVec to the last register we've defined.
5951 if (DstSize < 128) {
5952 // Force this to be FPR using the destination vector.
5953 const TargetRegisterClass *RC =
5954 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5955 if (!RC)
5956 return false;
5957 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5958 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5959 return false;
5960 }
5961
5962 unsigned SubReg = 0;
5963 if (!getSubRegForClass(RC, TRI, SubReg))
5964 return false;
5965 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5966 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5967 << "\n");
5968 return false;
5969 }
5970
5971 Register Reg = MRI.createVirtualRegister(RC);
5972 Register DstReg = I.getOperand(0).getReg();
5973
5974 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5975 MachineOperand &RegOp = I.getOperand(1);
5976 RegOp.setReg(Reg);
5977 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5978 } else {
5979 // We either have a vector with all elements (except the first one) undef or
5980 // at least one non-undef non-first element. In the first case, we need to
5981 // constrain the output register ourselves as we may have generated an
5982 // INSERT_SUBREG operation which is a generic operation for which the
5983 // output regclass cannot be automatically chosen.
5984 //
5985 // In the second case, there is no need to do this as it may generate an
5986 // instruction like INSvi32gpr where the regclass can be automatically
5987 // chosen.
5988 //
5989 // Also, we save a copy by re-using the destination register on the final
5990 // insert.
5991 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5993
5994 Register DstReg = PrevMI->getOperand(0).getReg();
5995 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5996 const TargetRegisterClass *RC =
5997 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5998 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5999 }
6000 }
6001
6002 I.eraseFromParent();
6003 return true;
6004}
6005
6006bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
6007 unsigned NumVecs,
6008 MachineInstr &I) {
6009 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6010 assert(Opc && "Expected an opcode?");
6011 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6012 auto &MRI = *MIB.getMRI();
6013 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6014 unsigned Size = Ty.getSizeInBits();
6015 assert((Size == 64 || Size == 128) &&
6016 "Destination must be 64 bits or 128 bits?");
6017 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
6018 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
6019 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
6020 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
6021 Load.cloneMemRefs(I);
6023 Register SelectedLoadDst = Load->getOperand(0).getReg();
6024 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6025 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
6026 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6027 // Emit the subreg copies and immediately select them.
6028 // FIXME: We should refactor our copy code into an emitCopy helper and
6029 // clean up uses of this pattern elsewhere in the selector.
6030 selectCopy(*Vec, TII, MRI, TRI, RBI);
6031 }
6032 return true;
6033}
6034
6035bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6036 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6037 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6038 assert(Opc && "Expected an opcode?");
6039 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6040 auto &MRI = *MIB.getMRI();
6041 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6042 bool Narrow = Ty.getSizeInBits() == 64;
6043
6044 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
6045 SmallVector<Register, 4> Regs(NumVecs);
6046 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
6047 [](auto MO) { return MO.getReg(); });
6048
6049 if (Narrow) {
6050 transform(Regs, Regs.begin(), [this](Register Reg) {
6051 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6052 ->getOperand(0)
6053 .getReg();
6054 });
6055 Ty = Ty.multiplyElements(2);
6056 }
6057
6058 Register Tuple = createQTuple(Regs, MIB);
6059 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6060 if (!LaneNo)
6061 return false;
6062
6063 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6064 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6065 .addReg(Tuple)
6066 .addImm(LaneNo->getZExtValue())
6067 .addReg(Ptr);
6068 Load.cloneMemRefs(I);
6070 Register SelectedLoadDst = Load->getOperand(0).getReg();
6071 unsigned SubReg = AArch64::qsub0;
6072 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6073 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6074 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6075 : DstOp(I.getOperand(Idx).getReg())},
6076 {})
6077 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6078 Register WideReg = Vec.getReg(0);
6079 // Emit the subreg copies and immediately select them.
6080 selectCopy(*Vec, TII, MRI, TRI, RBI);
6081 if (Narrow &&
6082 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6083 return false;
6084 }
6085 return true;
6086}
6087
6088void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6089 unsigned NumVecs,
6090 unsigned Opc) {
6091 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6092 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6093 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6094
6095 SmallVector<Register, 2> Regs(NumVecs);
6096 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6097 Regs.begin(), [](auto MO) { return MO.getReg(); });
6098
6099 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6100 : createDTuple(Regs, MIB);
6101 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6102 Store.cloneMemRefs(I);
6104}
6105
6106bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6107 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6108 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6109 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6110 bool Narrow = Ty.getSizeInBits() == 64;
6111
6112 SmallVector<Register, 2> Regs(NumVecs);
6113 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6114 Regs.begin(), [](auto MO) { return MO.getReg(); });
6115
6116 if (Narrow)
6117 transform(Regs, Regs.begin(), [this](Register Reg) {
6118 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6119 ->getOperand(0)
6120 .getReg();
6121 });
6122
6123 Register Tuple = createQTuple(Regs, MIB);
6124
6125 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6126 if (!LaneNo)
6127 return false;
6128 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6129 auto Store = MIB.buildInstr(Opc, {}, {})
6130 .addReg(Tuple)
6131 .addImm(LaneNo->getZExtValue())
6132 .addReg(Ptr);
6133 Store.cloneMemRefs(I);
6135 return true;
6136}
6137
6138bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6140 // Find the intrinsic ID.
6141 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6142
6143 const LLT S8 = LLT::scalar(8);
6144 const LLT S16 = LLT::scalar(16);
6145 const LLT S32 = LLT::scalar(32);
6146 const LLT S64 = LLT::scalar(64);
6147 const LLT P0 = LLT::pointer(0, 64);
6148 // Select the instruction.
6149 switch (IntrinID) {
6150 default:
6151 return false;
6152 case Intrinsic::aarch64_ldxp:
6153 case Intrinsic::aarch64_ldaxp: {
6154 auto NewI = MIB.buildInstr(
6155 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6156 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6157 {I.getOperand(3)});
6158 NewI.cloneMemRefs(I);
6160 break;
6161 }
6162 case Intrinsic::trap:
6163 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
6164 break;
6165 case Intrinsic::debugtrap:
6166 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
6167 break;
6168 case Intrinsic::ubsantrap:
6169 MIB.buildInstr(AArch64::BRK, {}, {})
6170 .addImm(I.getOperand(1).getImm() | ('U' << 8));
6171 break;
6172 case Intrinsic::aarch64_neon_ld1x2: {
6173 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6174 unsigned Opc = 0;
6175 if (Ty == LLT::fixed_vector(8, S8))
6176 Opc = AArch64::LD1Twov8b;
6177 else if (Ty == LLT::fixed_vector(16, S8))
6178 Opc = AArch64::LD1Twov16b;
6179 else if (Ty == LLT::fixed_vector(4, S16))
6180 Opc = AArch64::LD1Twov4h;
6181 else if (Ty == LLT::fixed_vector(8, S16))
6182 Opc = AArch64::LD1Twov8h;
6183 else if (Ty == LLT::fixed_vector(2, S32))
6184 Opc = AArch64::LD1Twov2s;
6185 else if (Ty == LLT::fixed_vector(4, S32))
6186 Opc = AArch64::LD1Twov4s;
6187 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6188 Opc = AArch64::LD1Twov2d;
6189 else if (Ty == S64 || Ty == P0)
6190 Opc = AArch64::LD1Twov1d;
6191 else
6192 llvm_unreachable("Unexpected type for ld1x2!");
6193 selectVectorLoadIntrinsic(Opc, 2, I);
6194 break;
6195 }
6196 case Intrinsic::aarch64_neon_ld1x3: {
6197 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6198 unsigned Opc = 0;
6199 if (Ty == LLT::fixed_vector(8, S8))
6200 Opc = AArch64::LD1Threev8b;
6201 else if (Ty == LLT::fixed_vector(16, S8))
6202 Opc = AArch64::LD1Threev16b;
6203 else if (Ty == LLT::fixed_vector(4, S16))
6204 Opc = AArch64::LD1Threev4h;
6205 else if (Ty == LLT::fixed_vector(8, S16))
6206 Opc = AArch64::LD1Threev8h;
6207 else if (Ty == LLT::fixed_vector(2, S32))
6208 Opc = AArch64::LD1Threev2s;
6209 else if (Ty == LLT::fixed_vector(4, S32))
6210 Opc = AArch64::LD1Threev4s;
6211 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6212 Opc = AArch64::LD1Threev2d;
6213 else if (Ty == S64 || Ty == P0)
6214 Opc = AArch64::LD1Threev1d;
6215 else
6216 llvm_unreachable("Unexpected type for ld1x3!");
6217 selectVectorLoadIntrinsic(Opc, 3, I);
6218 break;
6219 }
6220 case Intrinsic::aarch64_neon_ld1x4: {
6221 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6222 unsigned Opc = 0;
6223 if (Ty == LLT::fixed_vector(8, S8))
6224 Opc = AArch64::LD1Fourv8b;
6225 else if (Ty == LLT::fixed_vector(16, S8))
6226 Opc = AArch64::LD1Fourv16b;
6227 else if (Ty == LLT::fixed_vector(4, S16))
6228 Opc = AArch64::LD1Fourv4h;
6229 else if (Ty == LLT::fixed_vector(8, S16))
6230 Opc = AArch64::LD1Fourv8h;
6231 else if (Ty == LLT::fixed_vector(2, S32))
6232 Opc = AArch64::LD1Fourv2s;
6233 else if (Ty == LLT::fixed_vector(4, S32))
6234 Opc = AArch64::LD1Fourv4s;
6235 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6236 Opc = AArch64::LD1Fourv2d;
6237 else if (Ty == S64 || Ty == P0)
6238 Opc = AArch64::LD1Fourv1d;
6239 else
6240 llvm_unreachable("Unexpected type for ld1x4!");
6241 selectVectorLoadIntrinsic(Opc, 4, I);
6242 break;
6243 }
6244 case Intrinsic::aarch64_neon_ld2: {
6245 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6246 unsigned Opc = 0;
6247 if (Ty == LLT::fixed_vector(8, S8))
6248 Opc = AArch64::LD2Twov8b;
6249 else if (Ty == LLT::fixed_vector(16, S8))
6250 Opc = AArch64::LD2Twov16b;
6251 else if (Ty == LLT::fixed_vector(4, S16))
6252 Opc = AArch64::LD2Twov4h;
6253 else if (Ty == LLT::fixed_vector(8, S16))
6254 Opc = AArch64::LD2Twov8h;
6255 else if (Ty == LLT::fixed_vector(2, S32))
6256 Opc = AArch64::LD2Twov2s;
6257 else if (Ty == LLT::fixed_vector(4, S32))
6258 Opc = AArch64::LD2Twov4s;
6259 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6260 Opc = AArch64::LD2Twov2d;
6261 else if (Ty == S64 || Ty == P0)
6262 Opc = AArch64::LD1Twov1d;
6263 else
6264 llvm_unreachable("Unexpected type for ld2!");
6265 selectVectorLoadIntrinsic(Opc, 2, I);
6266 break;
6267 }
6268 case Intrinsic::aarch64_neon_ld2lane: {
6269 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6270 unsigned Opc;
6271 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6272 Opc = AArch64::LD2i8;
6273 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6274 Opc = AArch64::LD2i16;
6275 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6276 Opc = AArch64::LD2i32;
6277 else if (Ty == LLT::fixed_vector(2, S64) ||
6278 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6279 Opc = AArch64::LD2i64;
6280 else
6281 llvm_unreachable("Unexpected type for st2lane!");
6282 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6283 return false;
6284 break;
6285 }
6286 case Intrinsic::aarch64_neon_ld2r: {
6287 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6288 unsigned Opc = 0;
6289 if (Ty == LLT::fixed_vector(8, S8))
6290 Opc = AArch64::LD2Rv8b;
6291 else if (Ty == LLT::fixed_vector(16, S8))
6292 Opc = AArch64::LD2Rv16b;
6293 else if (Ty == LLT::fixed_vector(4, S16))
6294 Opc = AArch64::LD2Rv4h;
6295 else if (Ty == LLT::fixed_vector(8, S16))
6296 Opc = AArch64::LD2Rv8h;
6297 else if (Ty == LLT::fixed_vector(2, S32))
6298 Opc = AArch64::LD2Rv2s;
6299 else if (Ty == LLT::fixed_vector(4, S32))
6300 Opc = AArch64::LD2Rv4s;
6301 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6302 Opc = AArch64::LD2Rv2d;
6303 else if (Ty == S64 || Ty == P0)
6304 Opc = AArch64::LD2Rv1d;
6305 else
6306 llvm_unreachable("Unexpected type for ld2r!");
6307 selectVectorLoadIntrinsic(Opc, 2, I);
6308 break;
6309 }
6310 case Intrinsic::aarch64_neon_ld3: {
6311 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6312 unsigned Opc = 0;
6313 if (Ty == LLT::fixed_vector(8, S8))
6314 Opc = AArch64::LD3Threev8b;
6315 else if (Ty == LLT::fixed_vector(16, S8))
6316 Opc = AArch64::LD3Threev16b;
6317 else if (Ty == LLT::fixed_vector(4, S16))
6318 Opc = AArch64::LD3Threev4h;
6319 else if (Ty == LLT::fixed_vector(8, S16))
6320 Opc = AArch64::LD3Threev8h;
6321 else if (Ty == LLT::fixed_vector(2, S32))
6322 Opc = AArch64::LD3Threev2s;
6323 else if (Ty == LLT::fixed_vector(4, S32))
6324 Opc = AArch64::LD3Threev4s;
6325 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6326 Opc = AArch64::LD3Threev2d;
6327 else if (Ty == S64 || Ty == P0)
6328 Opc = AArch64::LD1Threev1d;
6329 else
6330 llvm_unreachable("Unexpected type for ld3!");
6331 selectVectorLoadIntrinsic(Opc, 3, I);
6332 break;
6333 }
6334 case Intrinsic::aarch64_neon_ld3lane: {
6335 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6336 unsigned Opc;
6337 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6338 Opc = AArch64::LD3i8;
6339 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6340 Opc = AArch64::LD3i16;
6341 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6342 Opc = AArch64::LD3i32;
6343 else if (Ty == LLT::fixed_vector(2, S64) ||
6344 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6345 Opc = AArch64::LD3i64;
6346 else
6347 llvm_unreachable("Unexpected type for st3lane!");
6348 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6349 return false;
6350 break;
6351 }
6352 case Intrinsic::aarch64_neon_ld3r: {
6353 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6354 unsigned Opc = 0;
6355 if (Ty == LLT::fixed_vector(8, S8))
6356 Opc = AArch64::LD3Rv8b;
6357 else if (Ty == LLT::fixed_vector(16, S8))
6358 Opc = AArch64::LD3Rv16b;
6359 else if (Ty == LLT::fixed_vector(4, S16))
6360 Opc = AArch64::LD3Rv4h;
6361 else if (Ty == LLT::fixed_vector(8, S16))
6362 Opc = AArch64::LD3Rv8h;
6363 else if (Ty == LLT::fixed_vector(2, S32))
6364 Opc = AArch64::LD3Rv2s;
6365 else if (Ty == LLT::fixed_vector(4, S32))
6366 Opc = AArch64::LD3Rv4s;
6367 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6368 Opc = AArch64::LD3Rv2d;
6369 else if (Ty == S64 || Ty == P0)
6370 Opc = AArch64::LD3Rv1d;
6371 else
6372 llvm_unreachable("Unexpected type for ld3r!");
6373 selectVectorLoadIntrinsic(Opc, 3, I);
6374 break;
6375 }
6376 case Intrinsic::aarch64_neon_ld4: {
6377 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6378 unsigned Opc = 0;
6379 if (Ty == LLT::fixed_vector(8, S8))
6380 Opc = AArch64::LD4Fourv8b;
6381 else if (Ty == LLT::fixed_vector(16, S8))
6382 Opc = AArch64::LD4Fourv16b;
6383 else if (Ty == LLT::fixed_vector(4, S16))
6384 Opc = AArch64::LD4Fourv4h;
6385 else if (Ty == LLT::fixed_vector(8, S16))
6386 Opc = AArch64::LD4Fourv8h;
6387 else if (Ty == LLT::fixed_vector(2, S32))
6388 Opc = AArch64::LD4Fourv2s;
6389 else if (Ty == LLT::fixed_vector(4, S32))
6390 Opc = AArch64::LD4Fourv4s;
6391 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6392 Opc = AArch64::LD4Fourv2d;
6393 else if (Ty == S64 || Ty == P0)
6394 Opc = AArch64::LD1Fourv1d;
6395 else
6396 llvm_unreachable("Unexpected type for ld4!");
6397 selectVectorLoadIntrinsic(Opc, 4, I);
6398 break;
6399 }
6400 case Intrinsic::aarch64_neon_ld4lane: {
6401 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6402 unsigned Opc;
6403 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6404 Opc = AArch64::LD4i8;
6405 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6406 Opc = AArch64::LD4i16;
6407 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6408 Opc = AArch64::LD4i32;
6409 else if (Ty == LLT::fixed_vector(2, S64) ||
6410 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6411 Opc = AArch64::LD4i64;
6412 else
6413 llvm_unreachable("Unexpected type for st4lane!");
6414 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6415 return false;
6416 break;
6417 }
6418 case Intrinsic::aarch64_neon_ld4r: {
6419 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6420 unsigned Opc = 0;
6421 if (Ty == LLT::fixed_vector(8, S8))
6422 Opc = AArch64::LD4Rv8b;
6423 else if (Ty == LLT::fixed_vector(16, S8))
6424 Opc = AArch64::LD4Rv16b;
6425 else if (Ty == LLT::fixed_vector(4, S16))
6426 Opc = AArch64::LD4Rv4h;
6427 else if (Ty == LLT::fixed_vector(8, S16))
6428 Opc = AArch64::LD4Rv8h;
6429 else if (Ty == LLT::fixed_vector(2, S32))
6430 Opc = AArch64::LD4Rv2s;
6431 else if (Ty == LLT::fixed_vector(4, S32))
6432 Opc = AArch64::LD4Rv4s;
6433 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6434 Opc = AArch64::LD4Rv2d;
6435 else if (Ty == S64 || Ty == P0)
6436 Opc = AArch64::LD4Rv1d;
6437 else
6438 llvm_unreachable("Unexpected type for ld4r!");
6439 selectVectorLoadIntrinsic(Opc, 4, I);
6440 break;
6441 }
6442 case Intrinsic::aarch64_neon_st1x2: {
6443 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6444 unsigned Opc;
6445 if (Ty == LLT::fixed_vector(8, S8))
6446 Opc = AArch64::ST1Twov8b;
6447 else if (Ty == LLT::fixed_vector(16, S8))
6448 Opc = AArch64::ST1Twov16b;
6449 else if (Ty == LLT::fixed_vector(4, S16))
6450 Opc = AArch64::ST1Twov4h;
6451 else if (Ty == LLT::fixed_vector(8, S16))
6452 Opc = AArch64::ST1Twov8h;
6453 else if (Ty == LLT::fixed_vector(2, S32))
6454 Opc = AArch64::ST1Twov2s;
6455 else if (Ty == LLT::fixed_vector(4, S32))
6456 Opc = AArch64::ST1Twov4s;
6457 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6458 Opc = AArch64::ST1Twov2d;
6459 else if (Ty == S64 || Ty == P0)
6460 Opc = AArch64::ST1Twov1d;
6461 else
6462 llvm_unreachable("Unexpected type for st1x2!");
6463 selectVectorStoreIntrinsic(I, 2, Opc);
6464 break;
6465 }
6466 case Intrinsic::aarch64_neon_st1x3: {
6467 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6468 unsigned Opc;
6469 if (Ty == LLT::fixed_vector(8, S8))
6470 Opc = AArch64::ST1Threev8b;
6471 else if (Ty == LLT::fixed_vector(16, S8))
6472 Opc = AArch64::ST1Threev16b;
6473 else if (Ty == LLT::fixed_vector(4, S16))
6474 Opc = AArch64::ST1Threev4h;
6475 else if (Ty == LLT::fixed_vector(8, S16))
6476 Opc = AArch64::ST1Threev8h;
6477 else if (Ty == LLT::fixed_vector(2, S32))
6478 Opc = AArch64::ST1Threev2s;
6479 else if (Ty == LLT::fixed_vector(4, S32))
6480 Opc = AArch64::ST1Threev4s;
6481 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6482 Opc = AArch64::ST1Threev2d;
6483 else if (Ty == S64 || Ty == P0)
6484 Opc = AArch64::ST1Threev1d;
6485 else
6486 llvm_unreachable("Unexpected type for st1x3!");
6487 selectVectorStoreIntrinsic(I, 3, Opc);
6488 break;
6489 }
6490 case Intrinsic::aarch64_neon_st1x4: {
6491 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6492 unsigned Opc;
6493 if (Ty == LLT::fixed_vector(8, S8))
6494 Opc = AArch64::ST1Fourv8b;
6495 else if (Ty == LLT::fixed_vector(16, S8))
6496 Opc = AArch64::ST1Fourv16b;
6497 else if (Ty == LLT::fixed_vector(4, S16))
6498 Opc = AArch64::ST1Fourv4h;
6499 else if (Ty == LLT::fixed_vector(8, S16))
6500 Opc = AArch64::ST1Fourv8h;
6501 else if (Ty == LLT::fixed_vector(2, S32))
6502 Opc = AArch64::ST1Fourv2s;
6503 else if (Ty == LLT::fixed_vector(4, S32))
6504 Opc = AArch64::ST1Fourv4s;
6505 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6506 Opc = AArch64::ST1Fourv2d;
6507 else if (Ty == S64 || Ty == P0)
6508 Opc = AArch64::ST1Fourv1d;
6509 else
6510 llvm_unreachable("Unexpected type for st1x4!");
6511 selectVectorStoreIntrinsic(I, 4, Opc);
6512 break;
6513 }
6514 case Intrinsic::aarch64_neon_st2: {
6515 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6516 unsigned Opc;
6517 if (Ty == LLT::fixed_vector(8, S8))
6518 Opc = AArch64::ST2Twov8b;
6519 else if (Ty == LLT::fixed_vector(16, S8))
6520 Opc = AArch64::ST2Twov16b;
6521 else if (Ty == LLT::fixed_vector(4, S16))
6522 Opc = AArch64::ST2Twov4h;
6523 else if (Ty == LLT::fixed_vector(8, S16))
6524 Opc = AArch64::ST2Twov8h;
6525 else if (Ty == LLT::fixed_vector(2, S32))
6526 Opc = AArch64::ST2Twov2s;
6527 else if (Ty == LLT::fixed_vector(4, S32))
6528 Opc = AArch64::ST2Twov4s;
6529 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6530 Opc = AArch64::ST2Twov2d;
6531 else if (Ty == S64 || Ty == P0)
6532 Opc = AArch64::ST1Twov1d;
6533 else
6534 llvm_unreachable("Unexpected type for st2!");
6535 selectVectorStoreIntrinsic(I, 2, Opc);
6536 break;
6537 }
6538 case Intrinsic::aarch64_neon_st3: {
6539 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6540 unsigned Opc;
6541 if (Ty == LLT::fixed_vector(8, S8))
6542 Opc = AArch64::ST3Threev8b;
6543 else if (Ty == LLT::fixed_vector(16, S8))
6544 Opc = AArch64::ST3Threev16b;
6545 else if (Ty == LLT::fixed_vector(4, S16))
6546 Opc = AArch64::ST3Threev4h;
6547 else if (Ty == LLT::fixed_vector(8, S16))
6548 Opc = AArch64::ST3Threev8h;
6549 else if (Ty == LLT::fixed_vector(2, S32))
6550 Opc = AArch64::ST3Threev2s;
6551 else if (Ty == LLT::fixed_vector(4, S32))
6552 Opc = AArch64::ST3Threev4s;
6553 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6554 Opc = AArch64::ST3Threev2d;
6555 else if (Ty == S64 || Ty == P0)
6556 Opc = AArch64::ST1Threev1d;
6557 else
6558 llvm_unreachable("Unexpected type for st3!");
6559 selectVectorStoreIntrinsic(I, 3, Opc);
6560 break;
6561 }
6562 case Intrinsic::aarch64_neon_st4: {
6563 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6564 unsigned Opc;
6565 if (Ty == LLT::fixed_vector(8, S8))
6566 Opc = AArch64::ST4Fourv8b;
6567 else if (Ty == LLT::fixed_vector(16, S8))
6568 Opc = AArch64::ST4Fourv16b;
6569 else if (Ty == LLT::fixed_vector(4, S16))
6570 Opc = AArch64::ST4Fourv4h;
6571 else if (Ty == LLT::fixed_vector(8, S16))
6572 Opc = AArch64::ST4Fourv8h;
6573 else if (Ty == LLT::fixed_vector(2, S32))
6574 Opc = AArch64::ST4Fourv2s;
6575 else if (Ty == LLT::fixed_vector(4, S32))
6576 Opc = AArch64::ST4Fourv4s;
6577 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6578 Opc = AArch64::ST4Fourv2d;
6579 else if (Ty == S64 || Ty == P0)
6580 Opc = AArch64::ST1Fourv1d;
6581 else
6582 llvm_unreachable("Unexpected type for st4!");
6583 selectVectorStoreIntrinsic(I, 4, Opc);
6584 break;
6585 }
6586 case Intrinsic::aarch64_neon_st2lane: {
6587 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6588 unsigned Opc;
6589 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6590 Opc = AArch64::ST2i8;
6591 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6592 Opc = AArch64::ST2i16;
6593 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6594 Opc = AArch64::ST2i32;
6595 else if (Ty == LLT::fixed_vector(2, S64) ||
6596 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6597 Opc = AArch64::ST2i64;
6598 else
6599 llvm_unreachable("Unexpected type for st2lane!");
6600 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6601 return false;
6602 break;
6603 }
6604 case Intrinsic::aarch64_neon_st3lane: {
6605 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6606 unsigned Opc;
6607 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6608 Opc = AArch64::ST3i8;
6609 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6610 Opc = AArch64::ST3i16;
6611 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6612 Opc = AArch64::ST3i32;
6613 else if (Ty == LLT::fixed_vector(2, S64) ||
6614 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6615 Opc = AArch64::ST3i64;
6616 else
6617 llvm_unreachable("Unexpected type for st3lane!");
6618 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6619 return false;
6620 break;
6621 }
6622 case Intrinsic::aarch64_neon_st4lane: {
6623 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6624 unsigned Opc;
6625 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6626 Opc = AArch64::ST4i8;
6627 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6628 Opc = AArch64::ST4i16;
6629 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6630 Opc = AArch64::ST4i32;
6631 else if (Ty == LLT::fixed_vector(2, S64) ||
6632 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6633 Opc = AArch64::ST4i64;
6634 else
6635 llvm_unreachable("Unexpected type for st4lane!");
6636 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6637 return false;
6638 break;
6639 }
6640 case Intrinsic::aarch64_mops_memset_tag: {
6641 // Transform
6642 // %dst:gpr(p0) = \
6643 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6644 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6645 // where %dst is updated, into
6646 // %Rd:GPR64common, %Rn:GPR64) = \
6647 // MOPSMemorySetTaggingPseudo \
6648 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6649 // where Rd and Rn are tied.
6650 // It is expected that %val has been extended to s64 in legalization.
6651 // Note that the order of the size/value operands are swapped.
6652
6653 Register DstDef = I.getOperand(0).getReg();
6654 // I.getOperand(1) is the intrinsic function
6655 Register DstUse = I.getOperand(2).getReg();
6656 Register ValUse = I.getOperand(3).getReg();
6657 Register SizeUse = I.getOperand(4).getReg();
6658
6659 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6660 // Therefore an additional virtual register is requried for the updated size
6661 // operand. This value is not accessible via the semantics of the intrinsic.
6662 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6663
6664 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6665 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6666 Memset.cloneMemRefs(I);
6668 break;
6669 }
6670 }
6671
6672 I.eraseFromParent();
6673 return true;
6674}
6675
6676bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6678 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6679
6680 switch (IntrinID) {
6681 default:
6682 break;
6683 case Intrinsic::aarch64_crypto_sha1h: {
6684 Register DstReg = I.getOperand(0).getReg();
6685 Register SrcReg = I.getOperand(2).getReg();
6686
6687 // FIXME: Should this be an assert?
6688 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6689 MRI.getType(SrcReg).getSizeInBits() != 32)
6690 return false;
6691
6692 // The operation has to happen on FPRs. Set up some new FPR registers for
6693 // the source and destination if they are on GPRs.
6694 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6695 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6696 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6697
6698 // Make sure the copy ends up getting constrained properly.
6699 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6700 AArch64::GPR32RegClass, MRI);
6701 }
6702
6703 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6704 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6705
6706 // Actually insert the instruction.
6707 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6708 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6709
6710 // Did we create a new register for the destination?
6711 if (DstReg != I.getOperand(0).getReg()) {
6712 // Yep. Copy the result of the instruction back into the original
6713 // destination.
6714 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6715 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6716 AArch64::GPR32RegClass, MRI);
6717 }
6718
6719 I.eraseFromParent();
6720 return true;
6721 }
6722 case Intrinsic::frameaddress:
6723 case Intrinsic::returnaddress: {
6724 MachineFunction &MF = *I.getParent()->getParent();
6725 MachineFrameInfo &MFI = MF.getFrameInfo();
6726
6727 unsigned Depth = I.getOperand(2).getImm();
6728 Register DstReg = I.getOperand(0).getReg();
6729 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6730
6731 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6732 if (!MFReturnAddr) {
6733 // Insert the copy from LR/X30 into the entry block, before it can be
6734 // clobbered by anything.
6735 MFI.setReturnAddressIsTaken(true);
6736 MFReturnAddr = getFunctionLiveInPhysReg(
6737 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6738 }
6739
6740 if (STI.hasPAuth()) {
6741 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6742 } else {
6743 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6744 MIB.buildInstr(AArch64::XPACLRI);
6745 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6746 }
6747
6748 I.eraseFromParent();
6749 return true;
6750 }
6751
6752 MFI.setFrameAddressIsTaken(true);
6753 Register FrameAddr(AArch64::FP);
6754 while (Depth--) {
6755 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6756 auto Ldr =
6757 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6759 FrameAddr = NextFrame;
6760 }
6761
6762 if (IntrinID == Intrinsic::frameaddress)
6763 MIB.buildCopy({DstReg}, {FrameAddr});
6764 else {
6765 MFI.setReturnAddressIsTaken(true);
6766
6767 if (STI.hasPAuth()) {
6768 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6769 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6770 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6771 } else {
6772 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6773 .addImm(1);
6774 MIB.buildInstr(AArch64::XPACLRI);
6775 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6776 }
6777 }
6778
6779 I.eraseFromParent();
6780 return true;
6781 }
6782 case Intrinsic::swift_async_context_addr:
6783 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6784 {Register(AArch64::FP)})
6785 .addImm(8)
6786 .addImm(0);
6788
6790 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6791 I.eraseFromParent();
6792 return true;
6793 }
6794 return false;
6795}
6796
6798AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6799 auto MaybeImmed = getImmedFromMO(Root);
6800 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6801 return std::nullopt;
6802 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6803 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6804}
6805
6807AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6808 auto MaybeImmed = getImmedFromMO(Root);
6809 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6810 return std::nullopt;
6811 uint64_t Enc = 31 - *MaybeImmed;
6812 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6813}
6814
6816AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6817 auto MaybeImmed = getImmedFromMO(Root);
6818 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6819 return std::nullopt;
6820 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6821 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6822}
6823
6825AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6826 auto MaybeImmed = getImmedFromMO(Root);
6827 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6828 return std::nullopt;
6829 uint64_t Enc = 63 - *MaybeImmed;
6830 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6831}
6832
6833/// Helper to select an immediate value that can be represented as a 12-bit
6834/// value shifted left by either 0 or 12. If it is possible to do so, return
6835/// the immediate and shift value. If not, return std::nullopt.
6836///
6837/// Used by selectArithImmed and selectNegArithImmed.
6839AArch64InstructionSelector::select12BitValueWithLeftShift(
6840 uint64_t Immed) const {
6841 unsigned ShiftAmt;
6842 if (Immed >> 12 == 0) {
6843 ShiftAmt = 0;
6844 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6845 ShiftAmt = 12;
6846 Immed = Immed >> 12;
6847 } else
6848 return std::nullopt;
6849
6850 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6851 return {{
6852 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6853 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6854 }};
6855}
6856
6857/// SelectArithImmed - Select an immediate value that can be represented as
6858/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6859/// Val set to the 12-bit value and Shift set to the shifter operand.
6861AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6862 // This function is called from the addsub_shifted_imm ComplexPattern,
6863 // which lists [imm] as the list of opcode it's interested in, however
6864 // we still need to check whether the operand is actually an immediate
6865 // here because the ComplexPattern opcode list is only used in
6866 // root-level opcode matching.
6867 auto MaybeImmed = getImmedFromMO(Root);
6868 if (MaybeImmed == std::nullopt)
6869 return std::nullopt;
6870 return select12BitValueWithLeftShift(*MaybeImmed);
6871}
6872
6873/// SelectNegArithImmed - As above, but negates the value before trying to
6874/// select it.
6876AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6877 // We need a register here, because we need to know if we have a 64 or 32
6878 // bit immediate.
6879 if (!Root.isReg())
6880 return std::nullopt;
6881 auto MaybeImmed = getImmedFromMO(Root);
6882 if (MaybeImmed == std::nullopt)
6883 return std::nullopt;
6884 uint64_t Immed = *MaybeImmed;
6885
6886 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6887 // have the opposite effect on the C flag, so this pattern mustn't match under
6888 // those circumstances.
6889 if (Immed == 0)
6890 return std::nullopt;
6891
6892 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6893 // the root.
6895 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6896 Immed = ~((uint32_t)Immed) + 1;
6897 else
6898 Immed = ~Immed + 1ULL;
6899
6900 if (Immed & 0xFFFFFFFFFF000000ULL)
6901 return std::nullopt;
6902
6903 Immed &= 0xFFFFFFULL;
6904 return select12BitValueWithLeftShift(Immed);
6905}
6906
6907/// Return true if it is worth folding MI into an extended register. That is,
6908/// if it's safe to pull it into the addressing mode of a load or store as a
6909/// shift.
6910bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6911 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6912 // Always fold if there is one use, or if we're optimizing for size.
6913 Register DefReg = MI.getOperand(0).getReg();
6914 if (MRI.hasOneNonDBGUse(DefReg) ||
6915 MI.getParent()->getParent()->getFunction().hasOptSize())
6916 return true;
6917
6918 // It's better to avoid folding and recomputing shifts when we don't have a
6919 // fastpath.
6920 if (!STI.hasAddrLSLFast())
6921 return false;
6922
6923 // We have a fastpath, so folding a shift in and potentially computing it
6924 // many times may be beneficial. Check if this is only used in memory ops.
6925 // If it is, then we should fold.
6926 return all_of(MRI.use_nodbg_instructions(DefReg),
6927 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6928}
6929
6931 switch (Type) {
6932 case AArch64_AM::SXTB:
6933 case AArch64_AM::SXTH:
6934 case AArch64_AM::SXTW:
6935 return true;
6936 default:
6937 return false;
6938 }
6939}
6940
6942AArch64InstructionSelector::selectExtendedSHL(
6944 unsigned SizeInBytes, bool WantsExt) const {
6945 assert(Base.isReg() && "Expected base to be a register operand");
6946 assert(Offset.isReg() && "Expected offset to be a register operand");
6947
6949 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6950
6951 unsigned OffsetOpc = OffsetInst->getOpcode();
6952 bool LookedThroughZExt = false;
6953 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6954 // Try to look through a ZEXT.
6955 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6956 return std::nullopt;
6957
6958 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6959 OffsetOpc = OffsetInst->getOpcode();
6960 LookedThroughZExt = true;
6961
6962 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6963 return std::nullopt;
6964 }
6965 // Make sure that the memory op is a valid size.
6966 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6967 if (LegalShiftVal == 0)
6968 return std::nullopt;
6969 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6970 return std::nullopt;
6971
6972 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6973 // register we will offset is the LHS, and the register containing the
6974 // constant is the RHS.
6975 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6976 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6977 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6978 if (!ValAndVReg) {
6979 // We didn't get a constant on the RHS. If the opcode is a shift, then
6980 // we're done.
6981 if (OffsetOpc == TargetOpcode::G_SHL)
6982 return std::nullopt;
6983
6984 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6985 std::swap(OffsetReg, ConstantReg);
6986 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6987 if (!ValAndVReg)
6988 return std::nullopt;
6989 }
6990
6991 // The value must fit into 3 bits, and must be positive. Make sure that is
6992 // true.
6993 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6994
6995 // Since we're going to pull this into a shift, the constant value must be
6996 // a power of 2. If we got a multiply, then we need to check this.
6997 if (OffsetOpc == TargetOpcode::G_MUL) {
6998 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6999 return std::nullopt;
7000
7001 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7002 ImmVal = Log2_32(ImmVal);
7003 }
7004
7005 if ((ImmVal & 0x7) != ImmVal)
7006 return std::nullopt;
7007
7008 // We are only allowed to shift by LegalShiftVal. This shift value is built
7009 // into the instruction, so we can't just use whatever we want.
7010 if (ImmVal != LegalShiftVal)
7011 return std::nullopt;
7012
7013 unsigned SignExtend = 0;
7014 if (WantsExt) {
7015 // Check if the offset is defined by an extend, unless we looked through a
7016 // G_ZEXT earlier.
7017 if (!LookedThroughZExt) {
7018 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7019 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7021 return std::nullopt;
7022
7023 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7024 // We only support SXTW for signed extension here.
7025 if (SignExtend && Ext != AArch64_AM::SXTW)
7026 return std::nullopt;
7027 OffsetReg = ExtInst->getOperand(1).getReg();
7028 }
7029
7030 // Need a 32-bit wide register here.
7031 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7032 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7033 }
7034
7035 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7036 // offset. Signify that we are shifting by setting the shift flag to 1.
7037 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7038 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7039 [=](MachineInstrBuilder &MIB) {
7040 // Need to add both immediates here to make sure that they are both
7041 // added to the instruction.
7042 MIB.addImm(SignExtend);
7043 MIB.addImm(1);
7044 }}};
7045}
7046
7047/// This is used for computing addresses like this:
7048///
7049/// ldr x1, [x2, x3, lsl #3]
7050///
7051/// Where x2 is the base register, and x3 is an offset register. The shift-left
7052/// is a constant value specific to this load instruction. That is, we'll never
7053/// see anything other than a 3 here (which corresponds to the size of the
7054/// element being loaded.)
7056AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7057 MachineOperand &Root, unsigned SizeInBytes) const {
7058 if (!Root.isReg())
7059 return std::nullopt;
7061
7062 // We want to find something like this:
7063 //
7064 // val = G_CONSTANT LegalShiftVal
7065 // shift = G_SHL off_reg val
7066 // ptr = G_PTR_ADD base_reg shift
7067 // x = G_LOAD ptr
7068 //
7069 // And fold it into this addressing mode:
7070 //
7071 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7072
7073 // Check if we can find the G_PTR_ADD.
7074 MachineInstr *PtrAdd =
7075 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7076 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7077 return std::nullopt;
7078
7079 // Now, try to match an opcode which will match our specific offset.
7080 // We want a G_SHL or a G_MUL.
7081 MachineInstr *OffsetInst =
7083 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7084 OffsetInst->getOperand(0), SizeInBytes,
7085 /*WantsExt=*/false);
7086}
7087
7088/// This is used for computing addresses like this:
7089///
7090/// ldr x1, [x2, x3]
7091///
7092/// Where x2 is the base register, and x3 is an offset register.
7093///
7094/// When possible (or profitable) to fold a G_PTR_ADD into the address
7095/// calculation, this will do so. Otherwise, it will return std::nullopt.
7097AArch64InstructionSelector::selectAddrModeRegisterOffset(
7098 MachineOperand &Root) const {
7100
7101 // We need a GEP.
7102 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7103 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7104 return std::nullopt;
7105
7106 // If this is used more than once, let's not bother folding.
7107 // TODO: Check if they are memory ops. If they are, then we can still fold
7108 // without having to recompute anything.
7109 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7110 return std::nullopt;
7111
7112 // Base is the GEP's LHS, offset is its RHS.
7113 return {{[=](MachineInstrBuilder &MIB) {
7114 MIB.addUse(Gep->getOperand(1).getReg());
7115 },
7116 [=](MachineInstrBuilder &MIB) {
7117 MIB.addUse(Gep->getOperand(2).getReg());
7118 },
7119 [=](MachineInstrBuilder &MIB) {
7120 // Need to add both immediates here to make sure that they are both
7121 // added to the instruction.
7122 MIB.addImm(0);
7123 MIB.addImm(0);
7124 }}};
7125}
7126
7127/// This is intended to be equivalent to selectAddrModeXRO in
7128/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7130AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7131 unsigned SizeInBytes) const {
7133 if (!Root.isReg())
7134 return std::nullopt;
7135 MachineInstr *PtrAdd =
7136 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7137 if (!PtrAdd)
7138 return std::nullopt;
7139
7140 // Check for an immediates which cannot be encoded in the [base + imm]
7141 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7142 // end up with code like:
7143 //
7144 // mov x0, wide
7145 // add x1 base, x0
7146 // ldr x2, [x1, x0]
7147 //
7148 // In this situation, we can use the [base, xreg] addressing mode to save an
7149 // add/sub:
7150 //
7151 // mov x0, wide
7152 // ldr x2, [base, x0]
7153 auto ValAndVReg =
7155 if (ValAndVReg) {
7156 unsigned Scale = Log2_32(SizeInBytes);
7157 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7158
7159 // Skip immediates that can be selected in the load/store addresing
7160 // mode.
7161 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7162 ImmOff < (0x1000 << Scale))
7163 return std::nullopt;
7164
7165 // Helper lambda to decide whether or not it is preferable to emit an add.
7166 auto isPreferredADD = [](int64_t ImmOff) {
7167 // Constants in [0x0, 0xfff] can be encoded in an add.
7168 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7169 return true;
7170
7171 // Can it be encoded in an add lsl #12?
7172 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7173 return false;
7174
7175 // It can be encoded in an add lsl #12, but we may not want to. If it is
7176 // possible to select this as a single movz, then prefer that. A single
7177 // movz is faster than an add with a shift.
7178 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7179 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7180 };
7181
7182 // If the immediate can be encoded in a single add/sub, then bail out.
7183 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7184 return std::nullopt;
7185 }
7186
7187 // Try to fold shifts into the addressing mode.
7188 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7189 if (AddrModeFns)
7190 return AddrModeFns;
7191
7192 // If that doesn't work, see if it's possible to fold in registers from
7193 // a GEP.
7194 return selectAddrModeRegisterOffset(Root);
7195}
7196
7197/// This is used for computing addresses like this:
7198///
7199/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7200///
7201/// Where we have a 64-bit base register, a 32-bit offset register, and an
7202/// extend (which may or may not be signed).
7204AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7205 unsigned SizeInBytes) const {
7207
7208 MachineInstr *PtrAdd =
7209 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7210 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7211 return std::nullopt;
7212
7213 MachineOperand &LHS = PtrAdd->getOperand(1);
7214 MachineOperand &RHS = PtrAdd->getOperand(2);
7215 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7216
7217 // The first case is the same as selectAddrModeXRO, except we need an extend.
7218 // In this case, we try to find a shift and extend, and fold them into the
7219 // addressing mode.
7220 //
7221 // E.g.
7222 //
7223 // off_reg = G_Z/S/ANYEXT ext_reg
7224 // val = G_CONSTANT LegalShiftVal
7225 // shift = G_SHL off_reg val
7226 // ptr = G_PTR_ADD base_reg shift
7227 // x = G_LOAD ptr
7228 //
7229 // In this case we can get a load like this:
7230 //
7231 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7232 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7233 SizeInBytes, /*WantsExt=*/true);
7234 if (ExtendedShl)
7235 return ExtendedShl;
7236
7237 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7238 //
7239 // e.g.
7240 // ldr something, [base_reg, ext_reg, sxtw]
7241 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
7242 return std::nullopt;
7243
7244 // Check if this is an extend. We'll get an extend type if it is.
7246 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7248 return std::nullopt;
7249
7250 // Need a 32-bit wide register.
7251 MachineIRBuilder MIB(*PtrAdd);
7252 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7253 AArch64::GPR32RegClass, MIB);
7254 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7255
7256 // Base is LHS, offset is ExtReg.
7257 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7258 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7259 [=](MachineInstrBuilder &MIB) {
7260 MIB.addImm(SignExtend);
7261 MIB.addImm(0);
7262 }}};
7263}
7264
7265/// Select a "register plus unscaled signed 9-bit immediate" address. This
7266/// should only match when there is an offset that is not valid for a scaled
7267/// immediate addressing mode. The "Size" argument is the size in bytes of the
7268/// memory reference, which is needed here to know what is valid for a scaled
7269/// immediate.
7271AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7272 unsigned Size) const {
7274 Root.getParent()->getParent()->getParent()->getRegInfo();
7275
7276 if (!Root.isReg())
7277 return std::nullopt;
7278
7279 if (!isBaseWithConstantOffset(Root, MRI))
7280 return std::nullopt;
7281
7282 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7283
7284 MachineOperand &OffImm = RootDef->getOperand(2);
7285 if (!OffImm.isReg())
7286 return std::nullopt;
7287 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7288 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7289 return std::nullopt;
7290 int64_t RHSC;
7291 MachineOperand &RHSOp1 = RHS->getOperand(1);
7292 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7293 return std::nullopt;
7294 RHSC = RHSOp1.getCImm()->getSExtValue();
7295
7296 if (RHSC >= -256 && RHSC < 256) {
7297 MachineOperand &Base = RootDef->getOperand(1);
7298 return {{
7299 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7300 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7301 }};
7302 }
7303 return std::nullopt;
7304}
7305
7307AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7308 unsigned Size,
7309 MachineRegisterInfo &MRI) const {
7310 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7311 return std::nullopt;
7312 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7313 if (Adrp.getOpcode() != AArch64::ADRP)
7314 return std::nullopt;
7315
7316 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7317 auto Offset = Adrp.getOperand(1).getOffset();
7318 if (Offset % Size != 0)
7319 return std::nullopt;
7320
7321 auto GV = Adrp.getOperand(1).getGlobal();
7322 if (GV->isThreadLocal())
7323 return std::nullopt;
7324
7325 auto &MF = *RootDef.getParent()->getParent();
7326 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7327 return std::nullopt;
7328
7329 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7330 MachineIRBuilder MIRBuilder(RootDef);
7331 Register AdrpReg = Adrp.getOperand(0).getReg();
7332 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7333 [=](MachineInstrBuilder &MIB) {
7334 MIB.addGlobalAddress(GV, Offset,
7335 OpFlags | AArch64II::MO_PAGEOFF |
7337 }}};
7338}
7339
7340/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7341/// "Size" argument is the size in bytes of the memory reference, which
7342/// determines the scale.
7344AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7345 unsigned Size) const {
7346 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7348
7349 if (!Root.isReg())
7350 return std::nullopt;
7351
7352 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7353 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7354 return {{
7355 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7356 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7357 }};
7358 }
7359
7361 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7362 if (CM == CodeModel::Small) {
7363 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7364 if (OpFns)
7365 return OpFns;
7366 }
7367
7368 if (isBaseWithConstantOffset(Root, MRI)) {
7369 MachineOperand &LHS = RootDef->getOperand(1);
7370 MachineOperand &RHS = RootDef->getOperand(2);
7371 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7372 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7373
7374 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7375 unsigned Scale = Log2_32(Size);
7376 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7377 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7378 return {{
7379 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7380 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7381 }};
7382
7383 return {{
7384 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7385 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7386 }};
7387 }
7388 }
7389
7390 // Before falling back to our general case, check if the unscaled
7391 // instructions can handle this. If so, that's preferable.
7392 if (selectAddrModeUnscaled(Root, Size))
7393 return std::nullopt;
7394
7395 return {{
7396 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7397 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7398 }};
7399}
7400
7401/// Given a shift instruction, return the correct shift type for that
7402/// instruction.
7404 switch (MI.getOpcode()) {
7405 default:
7407 case TargetOpcode::G_SHL:
7408 return AArch64_AM::LSL;
7409 case TargetOpcode::G_LSHR:
7410 return AArch64_AM::LSR;
7411 case TargetOpcode::G_ASHR:
7412 return AArch64_AM::ASR;
7413 case TargetOpcode::G_ROTR:
7414 return AArch64_AM::ROR;
7415 }
7416}
7417
7418/// Select a "shifted register" operand. If the value is not shifted, set the
7419/// shift operand to a default value of "lsl 0".
7421AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7422 bool AllowROR) const {
7423 if (!Root.isReg())
7424 return std::nullopt;
7426 Root.getParent()->getParent()->getParent()->getRegInfo();
7427
7428 // Check if the operand is defined by an instruction which corresponds to
7429 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7430 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7432 if (ShType == AArch64_AM::InvalidShiftExtend)
7433 return std::nullopt;
7434 if (ShType == AArch64_AM::ROR && !AllowROR)
7435 return std::nullopt;
7436 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7437 return std::nullopt;
7438
7439 // Need an immediate on the RHS.
7440 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7441 auto Immed = getImmedFromMO(ShiftRHS);
7442 if (!Immed)
7443 return std::nullopt;
7444
7445 // We have something that we can fold. Fold in the shift's LHS and RHS into
7446 // the instruction.
7447 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7448 Register ShiftReg = ShiftLHS.getReg();
7449
7450 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7451 unsigned Val = *Immed & (NumBits - 1);
7452 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7453
7454 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7455 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7456}
7457
7458AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7459 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7460 unsigned Opc = MI.getOpcode();
7461
7462 // Handle explicit extend instructions first.
7463 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7464 unsigned Size;
7465 if (Opc == TargetOpcode::G_SEXT)
7466 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7467 else
7468 Size = MI.getOperand(2).getImm();
7469 assert(Size != 64 && "Extend from 64 bits?");
7470 switch (Size) {
7471 case 8:
7472 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7473 case 16:
7474 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7475 case 32:
7476 return AArch64_AM::SXTW;
7477 default:
7479 }
7480 }
7481
7482 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7483 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7484 assert(Size != 64 && "Extend from 64 bits?");
7485 switch (Size) {
7486 case 8:
7487 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7488 case 16:
7489 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7490 case 32:
7491 return AArch64_AM::UXTW;
7492 default:
7494 }
7495 }
7496
7497 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7498 // on the RHS.
7499 if (Opc != TargetOpcode::G_AND)
7501
7502 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7503 if (!MaybeAndMask)
7505 uint64_t AndMask = *MaybeAndMask;
7506 switch (AndMask) {
7507 default:
7509 case 0xFF:
7510 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7511 case 0xFFFF:
7512 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7513 case 0xFFFFFFFF:
7514 return AArch64_AM::UXTW;
7515 }
7516}
7517
7518Register AArch64InstructionSelector::moveScalarRegClass(
7519 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7520 MachineRegisterInfo &MRI = *MIB.getMRI();
7521 auto Ty = MRI.getType(Reg);
7522 assert(!Ty.isVector() && "Expected scalars only!");
7523 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7524 return Reg;
7525
7526 // Create a copy and immediately select it.
7527 // FIXME: We should have an emitCopy function?
7528 auto Copy = MIB.buildCopy({&RC}, {Reg});
7529 selectCopy(*Copy, TII, MRI, TRI, RBI);
7530 return Copy.getReg(0);
7531}
7532
7533/// Select an "extended register" operand. This operand folds in an extend
7534/// followed by an optional left shift.
7536AArch64InstructionSelector::selectArithExtendedRegister(
7537 MachineOperand &Root) const {
7538 if (!Root.isReg())
7539 return std::nullopt;
7541 Root.getParent()->getParent()->getParent()->getRegInfo();
7542
7543 uint64_t ShiftVal = 0;
7544 Register ExtReg;
7546 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7547 if (!RootDef)
7548 return std::nullopt;
7549
7550 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7551 return std::nullopt;
7552
7553 // Check if we can fold a shift and an extend.
7554 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7555 // Look for a constant on the RHS of the shift.
7556 MachineOperand &RHS = RootDef->getOperand(2);
7557 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7558 if (!MaybeShiftVal)
7559 return std::nullopt;
7560 ShiftVal = *MaybeShiftVal;
7561 if (ShiftVal > 4)
7562 return std::nullopt;
7563 // Look for a valid extend instruction on the LHS of the shift.
7564 MachineOperand &LHS = RootDef->getOperand(1);
7565 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7566 if (!ExtDef)
7567 return std::nullopt;
7568 Ext = getExtendTypeForInst(*ExtDef, MRI);
7570 return std::nullopt;
7571 ExtReg = ExtDef->getOperand(1).getReg();
7572 } else {
7573 // Didn't get a shift. Try just folding an extend.
7574 Ext = getExtendTypeForInst(*RootDef, MRI);
7576 return std::nullopt;
7577 ExtReg = RootDef->getOperand(1).getReg();
7578
7579 // If we have a 32 bit instruction which zeroes out the high half of a
7580 // register, we get an implicit zero extend for free. Check if we have one.
7581 // FIXME: We actually emit the extend right now even though we don't have
7582 // to.
7583 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7584 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7585 if (isDef32(*ExtInst))
7586 return std::nullopt;
7587 }
7588 }
7589
7590 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7591 // copy.
7592 MachineIRBuilder MIB(*RootDef);
7593 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7594
7595 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7596 [=](MachineInstrBuilder &MIB) {
7597 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7598 }}};
7599}
7600
7602AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7603 if (!Root.isReg())
7604 return std::nullopt;
7606 Root.getParent()->getParent()->getParent()->getRegInfo();
7607
7608 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7609 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7610 STI.isLittleEndian())
7611 Extract =
7612 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7613 if (!Extract)
7614 return std::nullopt;
7615
7616 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7617 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7618 Register ExtReg = Extract->MI->getOperand(2).getReg();
7619 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7620 }
7621 }
7622 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7623 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7625 Extract->MI->getOperand(2).getReg(), MRI);
7626 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7627 LaneIdx->Value.getSExtValue() == 1) {
7628 Register ExtReg = Extract->MI->getOperand(1).getReg();
7629 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7630 }
7631 }
7632
7633 return std::nullopt;
7634}
7635
7636void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7637 const MachineInstr &MI,
7638 int OpIdx) const {
7639 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7640 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7641 "Expected G_CONSTANT");
7642 std::optional<int64_t> CstVal =
7643 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7644 assert(CstVal && "Expected constant value");
7645 MIB.addImm(*CstVal);
7646}
7647
7648void AArch64InstructionSelector::renderLogicalImm32(
7649 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7650 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7651 "Expected G_CONSTANT");
7652 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7654 MIB.addImm(Enc);
7655}
7656
7657void AArch64InstructionSelector::renderLogicalImm64(
7658 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7659 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7660 "Expected G_CONSTANT");
7661 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7663 MIB.addImm(Enc);
7664}
7665
7666void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7667 const MachineInstr &MI,
7668 int OpIdx) const {
7669 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7670 "Expected G_FCONSTANT");
7671 MIB.addImm(
7672 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7673}
7674
7675void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7676 const MachineInstr &MI,
7677 int OpIdx) const {
7678 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7679 "Expected G_FCONSTANT");
7680 MIB.addImm(
7681 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7682}
7683
7684void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7685 const MachineInstr &MI,
7686 int OpIdx) const {
7687 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7688 "Expected G_FCONSTANT");
7689 MIB.addImm(
7690 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7691}
7692
7693void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7694 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7695 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7696 "Expected G_FCONSTANT");
7698 .getFPImm()
7699 ->getValueAPF()
7700 .bitcastToAPInt()
7701 .getZExtValue()));
7702}
7703
7704bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7705 const MachineInstr &MI, unsigned NumBytes) const {
7706 if (!MI.mayLoadOrStore())
7707 return false;
7708 assert(MI.hasOneMemOperand() &&
7709 "Expected load/store to have only one mem op!");
7710 return (*MI.memoperands_begin())->getSize() == NumBytes;
7711}
7712
7713bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7714 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7715 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7716 return false;
7717
7718 // Only return true if we know the operation will zero-out the high half of
7719 // the 64-bit register. Truncates can be subregister copies, which don't
7720 // zero out the high bits. Copies and other copy-like instructions can be
7721 // fed by truncates, or could be lowered as subregister copies.
7722 switch (MI.getOpcode()) {
7723 default:
7724 return true;
7725 case TargetOpcode::COPY:
7726 case TargetOpcode::G_BITCAST:
7727 case TargetOpcode::G_TRUNC:
7728 case TargetOpcode::G_PHI:
7729 return false;
7730 }
7731}
7732
7733
7734// Perform fixups on the given PHI instruction's operands to force them all
7735// to be the same as the destination regbank.
7737 const AArch64RegisterBankInfo &RBI) {
7738 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7739 Register DstReg = MI.getOperand(0).getReg();
7740 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7741 assert(DstRB && "Expected PHI dst to have regbank assigned");
7742 MachineIRBuilder MIB(MI);
7743
7744 // Go through each operand and ensure it has the same regbank.
7745 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7746 if (!MO.isReg())
7747 continue;
7748 Register OpReg = MO.getReg();
7749 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7750 if (RB != DstRB) {
7751 // Insert a cross-bank copy.
7752 auto *OpDef = MRI.getVRegDef(OpReg);
7753 const LLT &Ty = MRI.getType(OpReg);
7754 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7755
7756 // Any instruction we insert must appear after all PHIs in the block
7757 // for the block to be valid MIR.
7758 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7759 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7760 InsertPt = OpDefBB.getFirstNonPHI();
7761 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7762 auto Copy = MIB.buildCopy(Ty, OpReg);
7763 MRI.setRegBank(Copy.getReg(0), *DstRB);
7764 MO.setReg(Copy.getReg(0));
7765 }
7766 }
7767}
7768
7769void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7770 // We're looking for PHIs, build a list so we don't invalidate iterators.
7773 for (auto &BB : MF) {
7774 for (auto &MI : BB) {
7775 if (MI.getOpcode() == TargetOpcode::G_PHI)
7776 Phis.emplace_back(&MI);
7777 }
7778 }
7779
7780 for (auto *MI : Phis) {
7781 // We need to do some work here if the operand types are < 16 bit and they
7782 // are split across fpr/gpr banks. Since all types <32b on gpr
7783 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7784 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7785 // be selecting heterogenous regbanks for operands if possible, but we
7786 // still need to be able to deal with it here.
7787 //
7788 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7789 // one other operand is on the fpr bank, then we add cross-bank copies
7790 // to homogenize the operand banks. For simplicity the bank that we choose
7791 // to settle on is whatever bank the def operand has. For example:
7792 //
7793 // %endbb:
7794 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7795 // =>
7796 // %bb2:
7797 // ...
7798 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7799 // ...
7800 // %endbb:
7801 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7802 bool HasGPROp = false, HasFPROp = false;
7803 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7804 if (!MO.isReg())
7805 continue;
7806 const LLT &Ty = MRI.getType(MO.getReg());
7807 if (!Ty.isValid() || !Ty.isScalar())
7808 break;
7809 if (Ty.getSizeInBits() >= 32)
7810 break;
7811 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7812 // If for some reason we don't have a regbank yet. Don't try anything.
7813 if (!RB)
7814 break;
7815
7816 if (RB->getID() == AArch64::GPRRegBankID)
7817 HasGPROp = true;
7818 else
7819 HasFPROp = true;
7820 }
7821 // We have heterogenous regbanks, need to fixup.
7822 if (HasGPROp && HasFPROp)
7823 fixupPHIOpBanks(*MI, MRI, RBI);
7824 }
7825}
7826
7827namespace llvm {
7830 AArch64Subtarget &Subtarget,
7832 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7833}
7834}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
MachineBasicBlock & MBB
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:968
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:994
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:995
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:971
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:980
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:969
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:970
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:989
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:992
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:979
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:973
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:976
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:977
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:972
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:974
@ ICMP_EQ
equal
Definition: InstrTypes.h:986
@ ICMP_NE
not equal
Definition: InstrTypes.h:987
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:993
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:981
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:991
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:978
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:975
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1090
bool isIntPredicate() const
Definition: InstrTypes.h:1084
bool isUnsigned() const
Definition: InstrTypes.h:1232
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:2954
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:267
const APFloat & getValueAPF() const
Definition: Constants.h:310
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:317
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:314
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:159
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:147
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1699
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1758
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:342
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
uint64_t getMemSize() const
Returns the size in bytes of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:882
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:54
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:153
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:305
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1540
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1937
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:419
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.