LLVM 22.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
318 MachineIRBuilder &MIRBuilder) const;
319 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
353 MachineIRBuilder &MIB) const;
358 MachineIRBuilder &MIB) const;
360 bool Negate, Register CCOp,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
486 int OpIdx = -1) const;
487 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
490 int OpIdx = -1) const;
491 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx) const;
493 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
498 int OpIdx = -1) const;
499 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
500 const MachineInstr &MI,
501 int OpIdx = -1) const;
502
503 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
504 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
505
506 // Optimization methods.
507 bool tryOptSelect(GSelect &Sel);
508 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
509 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
511 MachineIRBuilder &MIRBuilder) const;
512
513 /// Return true if \p MI is a load or store of \p NumBytes bytes.
514 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
515
516 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
517 /// register zeroed out. In other words, the result of MI has been explicitly
518 /// zero extended.
519 bool isDef32(const MachineInstr &MI) const;
520
521 const AArch64TargetMachine &TM;
522 const AArch64Subtarget &STI;
523 const AArch64InstrInfo &TII;
525 const AArch64RegisterBankInfo &RBI;
526
527 bool ProduceNonFlagSettingCondBr = false;
528
529 // Some cached values used during selection.
530 // We use LR as a live-in register, and we keep track of it here as it can be
531 // clobbered by calls.
532 Register MFReturnAddr;
533
535
536#define GET_GLOBALISEL_PREDICATES_DECL
537#include "AArch64GenGlobalISel.inc"
538#undef GET_GLOBALISEL_PREDICATES_DECL
539
540// We declare the temporaries used by selectImpl() in the class to minimize the
541// cost of constructing placeholder values.
542#define GET_GLOBALISEL_TEMPORARIES_DECL
543#include "AArch64GenGlobalISel.inc"
544#undef GET_GLOBALISEL_TEMPORARIES_DECL
545};
546
547} // end anonymous namespace
548
549#define GET_GLOBALISEL_IMPL
550#include "AArch64GenGlobalISel.inc"
551#undef GET_GLOBALISEL_IMPL
552
553AArch64InstructionSelector::AArch64InstructionSelector(
554 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
555 const AArch64RegisterBankInfo &RBI)
556 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
557 RBI(RBI),
559#include "AArch64GenGlobalISel.inc"
562#include "AArch64GenGlobalISel.inc"
564{
565}
566
567// FIXME: This should be target-independent, inferred from the types declared
568// for each class in the bank.
569//
570/// Given a register bank, and a type, return the smallest register class that
571/// can represent that combination.
572static const TargetRegisterClass *
573getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
574 bool GetAllRegSet = false) {
575 if (RB.getID() == AArch64::GPRRegBankID) {
576 if (Ty.getSizeInBits() <= 32)
577 return GetAllRegSet ? &AArch64::GPR32allRegClass
578 : &AArch64::GPR32RegClass;
579 if (Ty.getSizeInBits() == 64)
580 return GetAllRegSet ? &AArch64::GPR64allRegClass
581 : &AArch64::GPR64RegClass;
582 if (Ty.getSizeInBits() == 128)
583 return &AArch64::XSeqPairsClassRegClass;
584 return nullptr;
585 }
586
587 if (RB.getID() == AArch64::FPRRegBankID) {
588 switch (Ty.getSizeInBits()) {
589 case 8:
590 return &AArch64::FPR8RegClass;
591 case 16:
592 return &AArch64::FPR16RegClass;
593 case 32:
594 return &AArch64::FPR32RegClass;
595 case 64:
596 return &AArch64::FPR64RegClass;
597 case 128:
598 return &AArch64::FPR128RegClass;
599 }
600 return nullptr;
601 }
602
603 return nullptr;
604}
605
606/// Given a register bank, and size in bits, return the smallest register class
607/// that can represent that combination.
608static const TargetRegisterClass *
610 bool GetAllRegSet = false) {
611 if (SizeInBits.isScalable()) {
612 assert(RB.getID() == AArch64::FPRRegBankID &&
613 "Expected FPR regbank for scalable type size");
614 return &AArch64::ZPRRegClass;
615 }
616
617 unsigned RegBankID = RB.getID();
618
619 if (RegBankID == AArch64::GPRRegBankID) {
620 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
621 if (SizeInBits <= 32)
622 return GetAllRegSet ? &AArch64::GPR32allRegClass
623 : &AArch64::GPR32RegClass;
624 if (SizeInBits == 64)
625 return GetAllRegSet ? &AArch64::GPR64allRegClass
626 : &AArch64::GPR64RegClass;
627 if (SizeInBits == 128)
628 return &AArch64::XSeqPairsClassRegClass;
629 }
630
631 if (RegBankID == AArch64::FPRRegBankID) {
632 if (SizeInBits.isScalable()) {
633 assert(SizeInBits == TypeSize::getScalable(128) &&
634 "Unexpected scalable register size");
635 return &AArch64::ZPRRegClass;
636 }
637
638 switch (SizeInBits) {
639 default:
640 return nullptr;
641 case 8:
642 return &AArch64::FPR8RegClass;
643 case 16:
644 return &AArch64::FPR16RegClass;
645 case 32:
646 return &AArch64::FPR32RegClass;
647 case 64:
648 return &AArch64::FPR64RegClass;
649 case 128:
650 return &AArch64::FPR128RegClass;
651 }
652 }
653
654 return nullptr;
655}
656
657/// Returns the correct subregister to use for a given register class.
659 const TargetRegisterInfo &TRI, unsigned &SubReg) {
660 switch (TRI.getRegSizeInBits(*RC)) {
661 case 8:
662 SubReg = AArch64::bsub;
663 break;
664 case 16:
665 SubReg = AArch64::hsub;
666 break;
667 case 32:
668 if (RC != &AArch64::FPR32RegClass)
669 SubReg = AArch64::sub_32;
670 else
671 SubReg = AArch64::ssub;
672 break;
673 case 64:
674 SubReg = AArch64::dsub;
675 break;
676 default:
678 dbgs() << "Couldn't find appropriate subregister for register class.");
679 return false;
680 }
681
682 return true;
683}
684
685/// Returns the minimum size the given register bank can hold.
686static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
687 switch (RB.getID()) {
688 case AArch64::GPRRegBankID:
689 return 32;
690 case AArch64::FPRRegBankID:
691 return 8;
692 default:
693 llvm_unreachable("Tried to get minimum size for unknown register bank.");
694 }
695}
696
697/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
698/// Helper function for functions like createDTuple and createQTuple.
699///
700/// \p RegClassIDs - The list of register class IDs available for some tuple of
701/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
702/// expected to contain between 2 and 4 tuple classes.
703///
704/// \p SubRegs - The list of subregister classes associated with each register
705/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
706/// subregister class. The index of each subregister class is expected to
707/// correspond with the index of each register class.
708///
709/// \returns Either the destination register of REG_SEQUENCE instruction that
710/// was created, or the 0th element of \p Regs if \p Regs contains a single
711/// element.
713 const unsigned RegClassIDs[],
714 const unsigned SubRegs[], MachineIRBuilder &MIB) {
715 unsigned NumRegs = Regs.size();
716 if (NumRegs == 1)
717 return Regs[0];
718 assert(NumRegs >= 2 && NumRegs <= 4 &&
719 "Only support between two and 4 registers in a tuple!");
721 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
722 auto RegSequence =
723 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
724 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
725 RegSequence.addUse(Regs[I]);
726 RegSequence.addImm(SubRegs[I]);
727 }
728 return RegSequence.getReg(0);
729}
730
731/// Create a tuple of D-registers using the registers in \p Regs.
733 static const unsigned RegClassIDs[] = {
734 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
735 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
736 AArch64::dsub2, AArch64::dsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
738}
739
740/// Create a tuple of Q-registers using the registers in \p Regs.
742 static const unsigned RegClassIDs[] = {
743 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
744 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
745 AArch64::qsub2, AArch64::qsub3};
746 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
747}
748
749static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
750 auto &MI = *Root.getParent();
751 auto &MBB = *MI.getParent();
752 auto &MF = *MBB.getParent();
753 auto &MRI = MF.getRegInfo();
754 uint64_t Immed;
755 if (Root.isImm())
756 Immed = Root.getImm();
757 else if (Root.isCImm())
758 Immed = Root.getCImm()->getZExtValue();
759 else if (Root.isReg()) {
760 auto ValAndVReg =
762 if (!ValAndVReg)
763 return std::nullopt;
764 Immed = ValAndVReg->Value.getSExtValue();
765 } else
766 return std::nullopt;
767 return Immed;
768}
769
770/// Check whether \p I is a currently unsupported binary operation:
771/// - it has an unsized type
772/// - an operand is not a vreg
773/// - all operands are not in the same bank
774/// These are checks that should someday live in the verifier, but right now,
775/// these are mostly limitations of the aarch64 selector.
776static bool unsupportedBinOp(const MachineInstr &I,
777 const AArch64RegisterBankInfo &RBI,
779 const AArch64RegisterInfo &TRI) {
780 LLT Ty = MRI.getType(I.getOperand(0).getReg());
781 if (!Ty.isValid()) {
782 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
783 return true;
784 }
785
786 const RegisterBank *PrevOpBank = nullptr;
787 for (auto &MO : I.operands()) {
788 // FIXME: Support non-register operands.
789 if (!MO.isReg()) {
790 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
791 return true;
792 }
793
794 // FIXME: Can generic operations have physical registers operands? If
795 // so, this will need to be taught about that, and we'll need to get the
796 // bank out of the minimal class for the register.
797 // Either way, this needs to be documented (and possibly verified).
798 if (!MO.getReg().isVirtual()) {
799 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
800 return true;
801 }
802
803 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
804 if (!OpBank) {
805 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
806 return true;
807 }
808
809 if (PrevOpBank && OpBank != PrevOpBank) {
810 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
811 return true;
812 }
813 PrevOpBank = OpBank;
814 }
815 return false;
816}
817
818/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
819/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
820/// and of size \p OpSize.
821/// \returns \p GenericOpc if the combination is unsupported.
822static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
823 unsigned OpSize) {
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 if (OpSize == 32) {
827 switch (GenericOpc) {
828 case TargetOpcode::G_SHL:
829 return AArch64::LSLVWr;
830 case TargetOpcode::G_LSHR:
831 return AArch64::LSRVWr;
832 case TargetOpcode::G_ASHR:
833 return AArch64::ASRVWr;
834 default:
835 return GenericOpc;
836 }
837 } else if (OpSize == 64) {
838 switch (GenericOpc) {
839 case TargetOpcode::G_PTR_ADD:
840 return AArch64::ADDXrr;
841 case TargetOpcode::G_SHL:
842 return AArch64::LSLVXr;
843 case TargetOpcode::G_LSHR:
844 return AArch64::LSRVXr;
845 case TargetOpcode::G_ASHR:
846 return AArch64::ASRVXr;
847 default:
848 return GenericOpc;
849 }
850 }
851 break;
852 case AArch64::FPRRegBankID:
853 switch (OpSize) {
854 case 32:
855 switch (GenericOpc) {
856 case TargetOpcode::G_FADD:
857 return AArch64::FADDSrr;
858 case TargetOpcode::G_FSUB:
859 return AArch64::FSUBSrr;
860 case TargetOpcode::G_FMUL:
861 return AArch64::FMULSrr;
862 case TargetOpcode::G_FDIV:
863 return AArch64::FDIVSrr;
864 default:
865 return GenericOpc;
866 }
867 case 64:
868 switch (GenericOpc) {
869 case TargetOpcode::G_FADD:
870 return AArch64::FADDDrr;
871 case TargetOpcode::G_FSUB:
872 return AArch64::FSUBDrr;
873 case TargetOpcode::G_FMUL:
874 return AArch64::FMULDrr;
875 case TargetOpcode::G_FDIV:
876 return AArch64::FDIVDrr;
877 case TargetOpcode::G_OR:
878 return AArch64::ORRv8i8;
879 default:
880 return GenericOpc;
881 }
882 }
883 break;
884 }
885 return GenericOpc;
886}
887
888/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
889/// appropriate for the (value) register bank \p RegBankID and of memory access
890/// size \p OpSize. This returns the variant with the base+unsigned-immediate
891/// addressing mode (e.g., LDRXui).
892/// \returns \p GenericOpc if the combination is unsupported.
893static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
894 unsigned OpSize) {
895 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
896 switch (RegBankID) {
897 case AArch64::GPRRegBankID:
898 switch (OpSize) {
899 case 8:
900 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
901 case 16:
902 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
903 case 32:
904 return isStore ? AArch64::STRWui : AArch64::LDRWui;
905 case 64:
906 return isStore ? AArch64::STRXui : AArch64::LDRXui;
907 }
908 break;
909 case AArch64::FPRRegBankID:
910 switch (OpSize) {
911 case 8:
912 return isStore ? AArch64::STRBui : AArch64::LDRBui;
913 case 16:
914 return isStore ? AArch64::STRHui : AArch64::LDRHui;
915 case 32:
916 return isStore ? AArch64::STRSui : AArch64::LDRSui;
917 case 64:
918 return isStore ? AArch64::STRDui : AArch64::LDRDui;
919 case 128:
920 return isStore ? AArch64::STRQui : AArch64::LDRQui;
921 }
922 break;
923 }
924 return GenericOpc;
925}
926
927/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
928/// to \p *To.
929///
930/// E.g "To = COPY SrcReg:SubReg"
932 const RegisterBankInfo &RBI, Register SrcReg,
933 const TargetRegisterClass *To, unsigned SubReg) {
934 assert(SrcReg.isValid() && "Expected a valid source register?");
935 assert(To && "Destination register class cannot be null");
936 assert(SubReg && "Expected a valid subregister");
937
938 MachineIRBuilder MIB(I);
939 auto SubRegCopy =
940 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
941 MachineOperand &RegOp = I.getOperand(1);
942 RegOp.setReg(SubRegCopy.getReg(0));
943
944 // It's possible that the destination register won't be constrained. Make
945 // sure that happens.
946 if (!I.getOperand(0).getReg().isPhysical())
947 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
948
949 return true;
950}
951
952/// Helper function to get the source and destination register classes for a
953/// copy. Returns a std::pair containing the source register class for the
954/// copy, and the destination register class for the copy. If a register class
955/// cannot be determined, then it will be nullptr.
956static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
959 const RegisterBankInfo &RBI) {
960 Register DstReg = I.getOperand(0).getReg();
961 Register SrcReg = I.getOperand(1).getReg();
962 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
963 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
964
965 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
966 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
967
968 // Special casing for cross-bank copies of s1s. We can technically represent
969 // a 1-bit value with any size of register. The minimum size for a GPR is 32
970 // bits. So, we need to put the FPR on 32 bits as well.
971 //
972 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
973 // then we can pull it into the helpers that get the appropriate class for a
974 // register bank. Or make a new helper that carries along some constraint
975 // information.
976 if (SrcRegBank != DstRegBank &&
977 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
978 SrcSize = DstSize = TypeSize::getFixed(32);
979
980 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
981 getMinClassForRegBank(DstRegBank, DstSize, true)};
982}
983
984// FIXME: We need some sort of API in RBI/TRI to allow generic code to
985// constrain operands of simple instructions given a TargetRegisterClass
986// and LLT
988 const RegisterBankInfo &RBI) {
989 for (MachineOperand &MO : I.operands()) {
990 if (!MO.isReg())
991 continue;
992 Register Reg = MO.getReg();
993 if (!Reg)
994 continue;
995 if (Reg.isPhysical())
996 continue;
997 LLT Ty = MRI.getType(Reg);
998 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
999 const TargetRegisterClass *RC =
1001 if (!RC) {
1002 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1003 RC = getRegClassForTypeOnBank(Ty, RB);
1004 if (!RC) {
1005 LLVM_DEBUG(
1006 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1007 break;
1008 }
1009 }
1010 RBI.constrainGenericRegister(Reg, *RC, MRI);
1011 }
1012
1013 return true;
1014}
1015
1018 const RegisterBankInfo &RBI) {
1019 Register DstReg = I.getOperand(0).getReg();
1020 Register SrcReg = I.getOperand(1).getReg();
1021 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1022 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1023
1024 // Find the correct register classes for the source and destination registers.
1025 const TargetRegisterClass *SrcRC;
1026 const TargetRegisterClass *DstRC;
1027 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1028
1029 if (!DstRC) {
1030 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1031 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1032 return false;
1033 }
1034
1035 // Is this a copy? If so, then we may need to insert a subregister copy.
1036 if (I.isCopy()) {
1037 // Yes. Check if there's anything to fix up.
1038 if (!SrcRC) {
1039 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1040 return false;
1041 }
1042
1043 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1044 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1045 unsigned SubReg;
1046
1047 // If the source bank doesn't support a subregister copy small enough,
1048 // then we first need to copy to the destination bank.
1049 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1050 const TargetRegisterClass *DstTempRC =
1051 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1052 getSubRegForClass(DstRC, TRI, SubReg);
1053
1054 MachineIRBuilder MIB(I);
1055 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1056 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1057 } else if (SrcSize > DstSize) {
1058 // If the source register is bigger than the destination we need to
1059 // perform a subregister copy.
1060 const TargetRegisterClass *SubRegRC =
1061 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1062 getSubRegForClass(SubRegRC, TRI, SubReg);
1063 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1064 } else if (DstSize > SrcSize) {
1065 // If the destination register is bigger than the source we need to do
1066 // a promotion using SUBREG_TO_REG.
1067 const TargetRegisterClass *PromotionRC =
1068 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1069 getSubRegForClass(SrcRC, TRI, SubReg);
1070
1071 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1072 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1073 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1074 .addImm(0)
1075 .addUse(SrcReg)
1076 .addImm(SubReg);
1077 MachineOperand &RegOp = I.getOperand(1);
1078 RegOp.setReg(PromoteReg);
1079 }
1080
1081 // If the destination is a physical register, then there's nothing to
1082 // change, so we're done.
1083 if (DstReg.isPhysical())
1084 return true;
1085 }
1086
1087 // No need to constrain SrcReg. It will get constrained when we hit another
1088 // of its use or its defs. Copies do not have constraints.
1089 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1090 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1091 << " operand\n");
1092 return false;
1093 }
1094
1095 // If this a GPR ZEXT that we want to just reduce down into a copy.
1096 // The sizes will be mismatched with the source < 32b but that's ok.
1097 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1098 I.setDesc(TII.get(AArch64::COPY));
1099 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1100 return selectCopy(I, TII, MRI, TRI, RBI);
1101 }
1102
1103 I.setDesc(TII.get(AArch64::COPY));
1104 return true;
1105}
1106
1108AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1109 Register False, AArch64CC::CondCode CC,
1110 MachineIRBuilder &MIB) const {
1111 MachineRegisterInfo &MRI = *MIB.getMRI();
1112 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1113 RBI.getRegBank(True, MRI, TRI)->getID() &&
1114 "Expected both select operands to have the same regbank?");
1115 LLT Ty = MRI.getType(True);
1116 if (Ty.isVector())
1117 return nullptr;
1118 const unsigned Size = Ty.getSizeInBits();
1119 assert((Size == 32 || Size == 64) &&
1120 "Expected 32 bit or 64 bit select only?");
1121 const bool Is32Bit = Size == 32;
1122 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1123 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1124 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1126 return &*FCSel;
1127 }
1128
1129 // By default, we'll try and emit a CSEL.
1130 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1131 bool Optimized = false;
1132 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1133 &Optimized](Register &Reg, Register &OtherReg,
1134 bool Invert) {
1135 if (Optimized)
1136 return false;
1137
1138 // Attempt to fold:
1139 //
1140 // %sub = G_SUB 0, %x
1141 // %select = G_SELECT cc, %reg, %sub
1142 //
1143 // Into:
1144 // %select = CSNEG %reg, %x, cc
1145 Register MatchReg;
1146 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1147 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1148 Reg = MatchReg;
1149 if (Invert) {
1151 std::swap(Reg, OtherReg);
1152 }
1153 return true;
1154 }
1155
1156 // Attempt to fold:
1157 //
1158 // %xor = G_XOR %x, -1
1159 // %select = G_SELECT cc, %reg, %xor
1160 //
1161 // Into:
1162 // %select = CSINV %reg, %x, cc
1163 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1164 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1165 Reg = MatchReg;
1166 if (Invert) {
1168 std::swap(Reg, OtherReg);
1169 }
1170 return true;
1171 }
1172
1173 // Attempt to fold:
1174 //
1175 // %add = G_ADD %x, 1
1176 // %select = G_SELECT cc, %reg, %add
1177 //
1178 // Into:
1179 // %select = CSINC %reg, %x, cc
1180 if (mi_match(Reg, MRI,
1181 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1182 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1183 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1184 Reg = MatchReg;
1185 if (Invert) {
1187 std::swap(Reg, OtherReg);
1188 }
1189 return true;
1190 }
1191
1192 return false;
1193 };
1194
1195 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1196 // true/false values are constants.
1197 // FIXME: All of these patterns already exist in tablegen. We should be
1198 // able to import these.
1199 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1200 &Optimized]() {
1201 if (Optimized)
1202 return false;
1203 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1204 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1205 if (!TrueCst && !FalseCst)
1206 return false;
1207
1208 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1209 if (TrueCst && FalseCst) {
1210 int64_t T = TrueCst->Value.getSExtValue();
1211 int64_t F = FalseCst->Value.getSExtValue();
1212
1213 if (T == 0 && F == 1) {
1214 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1215 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1216 True = ZReg;
1217 False = ZReg;
1218 return true;
1219 }
1220
1221 if (T == 0 && F == -1) {
1222 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1223 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1224 True = ZReg;
1225 False = ZReg;
1226 return true;
1227 }
1228 }
1229
1230 if (TrueCst) {
1231 int64_t T = TrueCst->Value.getSExtValue();
1232 if (T == 1) {
1233 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1234 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1235 True = False;
1236 False = ZReg;
1238 return true;
1239 }
1240
1241 if (T == -1) {
1242 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1243 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1244 True = False;
1245 False = ZReg;
1247 return true;
1248 }
1249 }
1250
1251 if (FalseCst) {
1252 int64_t F = FalseCst->Value.getSExtValue();
1253 if (F == 1) {
1254 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1255 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1256 False = ZReg;
1257 return true;
1258 }
1259
1260 if (F == -1) {
1261 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1262 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1263 False = ZReg;
1264 return true;
1265 }
1266 }
1267 return false;
1268 };
1269
1270 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1271 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1272 Optimized |= TryOptSelectCst();
1273 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1274 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1275 return &*SelectInst;
1276}
1277
1280 MachineRegisterInfo *MRI = nullptr) {
1281 switch (P) {
1282 default:
1283 llvm_unreachable("Unknown condition code!");
1284 case CmpInst::ICMP_NE:
1285 return AArch64CC::NE;
1286 case CmpInst::ICMP_EQ:
1287 return AArch64CC::EQ;
1288 case CmpInst::ICMP_SGT:
1289 return AArch64CC::GT;
1290 case CmpInst::ICMP_SGE:
1291 if (RHS && MRI) {
1292 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1293 if (ValAndVReg && ValAndVReg->Value == 0)
1294 return AArch64CC::PL;
1295 }
1296 return AArch64CC::GE;
1297 case CmpInst::ICMP_SLT:
1298 if (RHS && MRI) {
1299 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1300 if (ValAndVReg && ValAndVReg->Value == 0)
1301 return AArch64CC::MI;
1302 }
1303 return AArch64CC::LT;
1304 case CmpInst::ICMP_SLE:
1305 return AArch64CC::LE;
1306 case CmpInst::ICMP_UGT:
1307 return AArch64CC::HI;
1308 case CmpInst::ICMP_UGE:
1309 return AArch64CC::HS;
1310 case CmpInst::ICMP_ULT:
1311 return AArch64CC::LO;
1312 case CmpInst::ICMP_ULE:
1313 return AArch64CC::LS;
1314 }
1315}
1316
1317/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1319 AArch64CC::CondCode &CondCode,
1320 AArch64CC::CondCode &CondCode2) {
1321 CondCode2 = AArch64CC::AL;
1322 switch (CC) {
1323 default:
1324 llvm_unreachable("Unknown FP condition!");
1325 case CmpInst::FCMP_OEQ:
1326 CondCode = AArch64CC::EQ;
1327 break;
1328 case CmpInst::FCMP_OGT:
1329 CondCode = AArch64CC::GT;
1330 break;
1331 case CmpInst::FCMP_OGE:
1332 CondCode = AArch64CC::GE;
1333 break;
1334 case CmpInst::FCMP_OLT:
1335 CondCode = AArch64CC::MI;
1336 break;
1337 case CmpInst::FCMP_OLE:
1338 CondCode = AArch64CC::LS;
1339 break;
1340 case CmpInst::FCMP_ONE:
1341 CondCode = AArch64CC::MI;
1342 CondCode2 = AArch64CC::GT;
1343 break;
1344 case CmpInst::FCMP_ORD:
1345 CondCode = AArch64CC::VC;
1346 break;
1347 case CmpInst::FCMP_UNO:
1348 CondCode = AArch64CC::VS;
1349 break;
1350 case CmpInst::FCMP_UEQ:
1351 CondCode = AArch64CC::EQ;
1352 CondCode2 = AArch64CC::VS;
1353 break;
1354 case CmpInst::FCMP_UGT:
1355 CondCode = AArch64CC::HI;
1356 break;
1357 case CmpInst::FCMP_UGE:
1358 CondCode = AArch64CC::PL;
1359 break;
1360 case CmpInst::FCMP_ULT:
1361 CondCode = AArch64CC::LT;
1362 break;
1363 case CmpInst::FCMP_ULE:
1364 CondCode = AArch64CC::LE;
1365 break;
1366 case CmpInst::FCMP_UNE:
1367 CondCode = AArch64CC::NE;
1368 break;
1369 }
1370}
1371
1372/// Convert an IR fp condition code to an AArch64 CC.
1373/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1374/// should be AND'ed instead of OR'ed.
1376 AArch64CC::CondCode &CondCode,
1377 AArch64CC::CondCode &CondCode2) {
1378 CondCode2 = AArch64CC::AL;
1379 switch (CC) {
1380 default:
1381 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1382 assert(CondCode2 == AArch64CC::AL);
1383 break;
1384 case CmpInst::FCMP_ONE:
1385 // (a one b)
1386 // == ((a olt b) || (a ogt b))
1387 // == ((a ord b) && (a une b))
1388 CondCode = AArch64CC::VC;
1389 CondCode2 = AArch64CC::NE;
1390 break;
1391 case CmpInst::FCMP_UEQ:
1392 // (a ueq b)
1393 // == ((a uno b) || (a oeq b))
1394 // == ((a ule b) && (a uge b))
1395 CondCode = AArch64CC::PL;
1396 CondCode2 = AArch64CC::LE;
1397 break;
1398 }
1399}
1400
1401/// Return a register which can be used as a bit to test in a TB(N)Z.
1402static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1404 assert(Reg.isValid() && "Expected valid register!");
1405 bool HasZext = false;
1407 unsigned Opc = MI->getOpcode();
1408
1409 if (!MI->getOperand(0).isReg() ||
1410 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1411 break;
1412
1413 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1414 //
1415 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1416 // on the truncated x is the same as the bit number on x.
1417 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1418 Opc == TargetOpcode::G_TRUNC) {
1419 if (Opc == TargetOpcode::G_ZEXT)
1420 HasZext = true;
1421
1422 Register NextReg = MI->getOperand(1).getReg();
1423 // Did we find something worth folding?
1424 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1425 break;
1426
1427 // NextReg is worth folding. Keep looking.
1428 Reg = NextReg;
1429 continue;
1430 }
1431
1432 // Attempt to find a suitable operation with a constant on one side.
1433 std::optional<uint64_t> C;
1434 Register TestReg;
1435 switch (Opc) {
1436 default:
1437 break;
1438 case TargetOpcode::G_AND:
1439 case TargetOpcode::G_XOR: {
1440 TestReg = MI->getOperand(1).getReg();
1441 Register ConstantReg = MI->getOperand(2).getReg();
1442 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1443 if (!VRegAndVal) {
1444 // AND commutes, check the other side for a constant.
1445 // FIXME: Can we canonicalize the constant so that it's always on the
1446 // same side at some point earlier?
1447 std::swap(ConstantReg, TestReg);
1448 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1449 }
1450 if (VRegAndVal) {
1451 if (HasZext)
1452 C = VRegAndVal->Value.getZExtValue();
1453 else
1454 C = VRegAndVal->Value.getSExtValue();
1455 }
1456 break;
1457 }
1458 case TargetOpcode::G_ASHR:
1459 case TargetOpcode::G_LSHR:
1460 case TargetOpcode::G_SHL: {
1461 TestReg = MI->getOperand(1).getReg();
1462 auto VRegAndVal =
1463 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1464 if (VRegAndVal)
1465 C = VRegAndVal->Value.getSExtValue();
1466 break;
1467 }
1468 }
1469
1470 // Didn't find a constant or viable register. Bail out of the loop.
1471 if (!C || !TestReg.isValid())
1472 break;
1473
1474 // We found a suitable instruction with a constant. Check to see if we can
1475 // walk through the instruction.
1476 Register NextReg;
1477 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1478 switch (Opc) {
1479 default:
1480 break;
1481 case TargetOpcode::G_AND:
1482 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1483 if ((*C >> Bit) & 1)
1484 NextReg = TestReg;
1485 break;
1486 case TargetOpcode::G_SHL:
1487 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1488 // the type of the register.
1489 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1490 NextReg = TestReg;
1491 Bit = Bit - *C;
1492 }
1493 break;
1494 case TargetOpcode::G_ASHR:
1495 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1496 // in x
1497 NextReg = TestReg;
1498 Bit = Bit + *C;
1499 if (Bit >= TestRegSize)
1500 Bit = TestRegSize - 1;
1501 break;
1502 case TargetOpcode::G_LSHR:
1503 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1504 if ((Bit + *C) < TestRegSize) {
1505 NextReg = TestReg;
1506 Bit = Bit + *C;
1507 }
1508 break;
1509 case TargetOpcode::G_XOR:
1510 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1511 // appropriate.
1512 //
1513 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1514 //
1515 // tbz x', b -> tbnz x, b
1516 //
1517 // Because x' only has the b-th bit set if x does not.
1518 if ((*C >> Bit) & 1)
1519 Invert = !Invert;
1520 NextReg = TestReg;
1521 break;
1522 }
1523
1524 // Check if we found anything worth folding.
1525 if (!NextReg.isValid())
1526 return Reg;
1527 Reg = NextReg;
1528 }
1529
1530 return Reg;
1531}
1532
1533MachineInstr *AArch64InstructionSelector::emitTestBit(
1534 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1535 MachineIRBuilder &MIB) const {
1536 assert(TestReg.isValid());
1537 assert(ProduceNonFlagSettingCondBr &&
1538 "Cannot emit TB(N)Z with speculation tracking!");
1539 MachineRegisterInfo &MRI = *MIB.getMRI();
1540
1541 // Attempt to optimize the test bit by walking over instructions.
1542 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1543 LLT Ty = MRI.getType(TestReg);
1544 unsigned Size = Ty.getSizeInBits();
1545 assert(!Ty.isVector() && "Expected a scalar!");
1546 assert(Bit < 64 && "Bit is too large!");
1547
1548 // When the test register is a 64-bit register, we have to narrow to make
1549 // TBNZW work.
1550 bool UseWReg = Bit < 32;
1551 unsigned NecessarySize = UseWReg ? 32 : 64;
1552 if (Size != NecessarySize)
1553 TestReg = moveScalarRegClass(
1554 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1555 MIB);
1556
1557 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1558 {AArch64::TBZW, AArch64::TBNZW}};
1559 unsigned Opc = OpcTable[UseWReg][IsNegative];
1560 auto TestBitMI =
1561 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1562 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1563 return &*TestBitMI;
1564}
1565
1566bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1567 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1568 MachineIRBuilder &MIB) const {
1569 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1570 // Given something like this:
1571 //
1572 // %x = ...Something...
1573 // %one = G_CONSTANT i64 1
1574 // %zero = G_CONSTANT i64 0
1575 // %and = G_AND %x, %one
1576 // %cmp = G_ICMP intpred(ne), %and, %zero
1577 // %cmp_trunc = G_TRUNC %cmp
1578 // G_BRCOND %cmp_trunc, %bb.3
1579 //
1580 // We want to try and fold the AND into the G_BRCOND and produce either a
1581 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1582 //
1583 // In this case, we'd get
1584 //
1585 // TBNZ %x %bb.3
1586 //
1587
1588 // Check if the AND has a constant on its RHS which we can use as a mask.
1589 // If it's a power of 2, then it's the same as checking a specific bit.
1590 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1591 auto MaybeBit = getIConstantVRegValWithLookThrough(
1592 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1593 if (!MaybeBit)
1594 return false;
1595
1596 int32_t Bit = MaybeBit->Value.exactLogBase2();
1597 if (Bit < 0)
1598 return false;
1599
1600 Register TestReg = AndInst.getOperand(1).getReg();
1601
1602 // Emit a TB(N)Z.
1603 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1604 return true;
1605}
1606
1607MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1608 bool IsNegative,
1609 MachineBasicBlock *DestMBB,
1610 MachineIRBuilder &MIB) const {
1611 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1612 MachineRegisterInfo &MRI = *MIB.getMRI();
1613 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1614 AArch64::GPRRegBankID &&
1615 "Expected GPRs only?");
1616 auto Ty = MRI.getType(CompareReg);
1617 unsigned Width = Ty.getSizeInBits();
1618 assert(!Ty.isVector() && "Expected scalar only?");
1619 assert(Width <= 64 && "Expected width to be at most 64?");
1620 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1621 {AArch64::CBNZW, AArch64::CBNZX}};
1622 unsigned Opc = OpcTable[IsNegative][Width == 64];
1623 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1624 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1625 return &*BranchMI;
1626}
1627
1628bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1629 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1630 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1631 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1632 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1633 // totally clean. Some of them require two branches to implement.
1634 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1635 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1636 Pred);
1637 AArch64CC::CondCode CC1, CC2;
1638 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1639 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1640 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1641 if (CC2 != AArch64CC::AL)
1642 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1643 I.eraseFromParent();
1644 return true;
1645}
1646
1647bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1648 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1649 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1650 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1651 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1652 //
1653 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1654 // instructions will not be produced, as they are conditional branch
1655 // instructions that do not set flags.
1656 if (!ProduceNonFlagSettingCondBr)
1657 return false;
1658
1659 MachineRegisterInfo &MRI = *MIB.getMRI();
1660 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1661 auto Pred =
1662 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1663 Register LHS = ICmp.getOperand(2).getReg();
1664 Register RHS = ICmp.getOperand(3).getReg();
1665
1666 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1667 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1668 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1669
1670 // When we can emit a TB(N)Z, prefer that.
1671 //
1672 // Handle non-commutative condition codes first.
1673 // Note that we don't want to do this when we have a G_AND because it can
1674 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1675 if (VRegAndVal && !AndInst) {
1676 int64_t C = VRegAndVal->Value.getSExtValue();
1677
1678 // When we have a greater-than comparison, we can just test if the msb is
1679 // zero.
1680 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1681 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1682 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1683 I.eraseFromParent();
1684 return true;
1685 }
1686
1687 // When we have a less than comparison, we can just test if the msb is not
1688 // zero.
1689 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1690 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1691 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1692 I.eraseFromParent();
1693 return true;
1694 }
1695
1696 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1697 // we can test if the msb is zero.
1698 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1699 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1700 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1701 I.eraseFromParent();
1702 return true;
1703 }
1704 }
1705
1706 // Attempt to handle commutative condition codes. Right now, that's only
1707 // eq/ne.
1708 if (ICmpInst::isEquality(Pred)) {
1709 if (!VRegAndVal) {
1710 std::swap(RHS, LHS);
1712 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1713 }
1714
1715 if (VRegAndVal && VRegAndVal->Value == 0) {
1716 // If there's a G_AND feeding into this branch, try to fold it away by
1717 // emitting a TB(N)Z instead.
1718 //
1719 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1720 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1721 // would be redundant.
1722 if (AndInst &&
1723 tryOptAndIntoCompareBranch(
1724 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1725 I.eraseFromParent();
1726 return true;
1727 }
1728
1729 // Otherwise, try to emit a CB(N)Z instead.
1730 auto LHSTy = MRI.getType(LHS);
1731 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1732 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1733 I.eraseFromParent();
1734 return true;
1735 }
1736 }
1737 }
1738
1739 return false;
1740}
1741
1742bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1743 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1744 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1745 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1746 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1747 return true;
1748
1749 // Couldn't optimize. Emit a compare + a Bcc.
1750 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1751 auto &PredOp = ICmp.getOperand(1);
1752 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1754 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1755 ICmp.getOperand(3).getReg(), MIB.getMRI());
1756 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1757 I.eraseFromParent();
1758 return true;
1759}
1760
1761bool AArch64InstructionSelector::selectCompareBranch(
1762 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1763 Register CondReg = I.getOperand(0).getReg();
1764 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1765 // Try to select the G_BRCOND using whatever is feeding the condition if
1766 // possible.
1767 unsigned CCMIOpc = CCMI->getOpcode();
1768 if (CCMIOpc == TargetOpcode::G_FCMP)
1769 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1770 if (CCMIOpc == TargetOpcode::G_ICMP)
1771 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1772
1773 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1774 // instructions will not be produced, as they are conditional branch
1775 // instructions that do not set flags.
1776 if (ProduceNonFlagSettingCondBr) {
1777 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1778 I.getOperand(1).getMBB(), MIB);
1779 I.eraseFromParent();
1780 return true;
1781 }
1782
1783 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1784 auto TstMI =
1785 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1787 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1789 .addMBB(I.getOperand(1).getMBB());
1790 I.eraseFromParent();
1791 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1792}
1793
1794/// Returns the element immediate value of a vector shift operand if found.
1795/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1796static std::optional<int64_t> getVectorShiftImm(Register Reg,
1798 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1799 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1800 return getAArch64VectorSplatScalar(*OpMI, MRI);
1801}
1802
1803/// Matches and returns the shift immediate value for a SHL instruction given
1804/// a shift operand.
1805static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1807 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1808 if (!ShiftImm)
1809 return std::nullopt;
1810 // Check the immediate is in range for a SHL.
1811 int64_t Imm = *ShiftImm;
1812 if (Imm < 0)
1813 return std::nullopt;
1814 switch (SrcTy.getElementType().getSizeInBits()) {
1815 default:
1816 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1817 return std::nullopt;
1818 case 8:
1819 if (Imm > 7)
1820 return std::nullopt;
1821 break;
1822 case 16:
1823 if (Imm > 15)
1824 return std::nullopt;
1825 break;
1826 case 32:
1827 if (Imm > 31)
1828 return std::nullopt;
1829 break;
1830 case 64:
1831 if (Imm > 63)
1832 return std::nullopt;
1833 break;
1834 }
1835 return Imm;
1836}
1837
1838bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1839 MachineRegisterInfo &MRI) {
1840 assert(I.getOpcode() == TargetOpcode::G_SHL);
1841 Register DstReg = I.getOperand(0).getReg();
1842 const LLT Ty = MRI.getType(DstReg);
1843 Register Src1Reg = I.getOperand(1).getReg();
1844 Register Src2Reg = I.getOperand(2).getReg();
1845
1846 if (!Ty.isVector())
1847 return false;
1848
1849 // Check if we have a vector of constants on RHS that we can select as the
1850 // immediate form.
1851 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1852
1853 unsigned Opc = 0;
1854 if (Ty == LLT::fixed_vector(2, 64)) {
1855 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1856 } else if (Ty == LLT::fixed_vector(4, 32)) {
1857 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1858 } else if (Ty == LLT::fixed_vector(2, 32)) {
1859 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1860 } else if (Ty == LLT::fixed_vector(4, 16)) {
1861 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1862 } else if (Ty == LLT::fixed_vector(8, 16)) {
1863 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1864 } else if (Ty == LLT::fixed_vector(16, 8)) {
1865 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1866 } else if (Ty == LLT::fixed_vector(8, 8)) {
1867 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1868 } else {
1869 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1870 return false;
1871 }
1872
1873 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1874 if (ImmVal)
1875 Shl.addImm(*ImmVal);
1876 else
1877 Shl.addUse(Src2Reg);
1879 I.eraseFromParent();
1880 return true;
1881}
1882
1883bool AArch64InstructionSelector::selectVectorAshrLshr(
1884 MachineInstr &I, MachineRegisterInfo &MRI) {
1885 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1886 I.getOpcode() == TargetOpcode::G_LSHR);
1887 Register DstReg = I.getOperand(0).getReg();
1888 const LLT Ty = MRI.getType(DstReg);
1889 Register Src1Reg = I.getOperand(1).getReg();
1890 Register Src2Reg = I.getOperand(2).getReg();
1891
1892 if (!Ty.isVector())
1893 return false;
1894
1895 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1896
1897 // We expect the immediate case to be lowered in the PostLegalCombiner to
1898 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1899
1900 // There is not a shift right register instruction, but the shift left
1901 // register instruction takes a signed value, where negative numbers specify a
1902 // right shift.
1903
1904 unsigned Opc = 0;
1905 unsigned NegOpc = 0;
1906 const TargetRegisterClass *RC =
1907 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1908 if (Ty == LLT::fixed_vector(2, 64)) {
1909 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1910 NegOpc = AArch64::NEGv2i64;
1911 } else if (Ty == LLT::fixed_vector(4, 32)) {
1912 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1913 NegOpc = AArch64::NEGv4i32;
1914 } else if (Ty == LLT::fixed_vector(2, 32)) {
1915 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1916 NegOpc = AArch64::NEGv2i32;
1917 } else if (Ty == LLT::fixed_vector(4, 16)) {
1918 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1919 NegOpc = AArch64::NEGv4i16;
1920 } else if (Ty == LLT::fixed_vector(8, 16)) {
1921 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1922 NegOpc = AArch64::NEGv8i16;
1923 } else if (Ty == LLT::fixed_vector(16, 8)) {
1924 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1925 NegOpc = AArch64::NEGv16i8;
1926 } else if (Ty == LLT::fixed_vector(8, 8)) {
1927 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1928 NegOpc = AArch64::NEGv8i8;
1929 } else {
1930 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1931 return false;
1932 }
1933
1934 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1936 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1938 I.eraseFromParent();
1939 return true;
1940}
1941
1942bool AArch64InstructionSelector::selectVaStartAAPCS(
1943 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1944
1946 MF.getFunction().isVarArg()))
1947 return false;
1948
1949 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1950 // Standard, section 10.1.5.
1951
1952 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1953 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1954 const auto *PtrRegClass =
1955 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1956
1957 const MCInstrDesc &MCIDAddAddr =
1958 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1959 const MCInstrDesc &MCIDStoreAddr =
1960 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1961
1962 /*
1963 * typedef struct va_list {
1964 * void * stack; // next stack param
1965 * void * gr_top; // end of GP arg reg save area
1966 * void * vr_top; // end of FP/SIMD arg reg save area
1967 * int gr_offs; // offset from gr_top to next GP register arg
1968 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1969 * } va_list;
1970 */
1971 const auto VAList = I.getOperand(0).getReg();
1972
1973 // Our current offset in bytes from the va_list struct (VAList).
1974 unsigned OffsetBytes = 0;
1975
1976 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1977 // and increment OffsetBytes by PtrSize.
1978 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1979 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1980 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1981 .addDef(Top)
1982 .addFrameIndex(FrameIndex)
1983 .addImm(Imm)
1984 .addImm(0);
1986
1987 const auto *MMO = *I.memoperands_begin();
1988 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
1989 .addUse(Top)
1990 .addUse(VAList)
1991 .addImm(OffsetBytes / PtrSize)
1993 MMO->getPointerInfo().getWithOffset(OffsetBytes),
1994 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
1996
1997 OffsetBytes += PtrSize;
1998 };
1999
2000 // void* stack at offset 0
2001 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2002
2003 // void* gr_top at offset 8 (4 on ILP32)
2004 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2005 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2006
2007 // void* vr_top at offset 16 (8 on ILP32)
2008 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2009 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2010
2011 // Helper function to store a 4-byte integer constant to VAList at offset
2012 // OffsetBytes, and increment OffsetBytes by 4.
2013 const auto PushIntConstant = [&](const int32_t Value) {
2014 constexpr int IntSize = 4;
2015 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2016 auto MIB =
2017 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2018 .addDef(Temp)
2019 .addImm(Value);
2021
2022 const auto *MMO = *I.memoperands_begin();
2023 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2024 .addUse(Temp)
2025 .addUse(VAList)
2026 .addImm(OffsetBytes / IntSize)
2028 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2029 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2031 OffsetBytes += IntSize;
2032 };
2033
2034 // int gr_offs at offset 24 (12 on ILP32)
2035 PushIntConstant(-static_cast<int32_t>(GPRSize));
2036
2037 // int vr_offs at offset 28 (16 on ILP32)
2038 PushIntConstant(-static_cast<int32_t>(FPRSize));
2039
2040 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2041
2042 I.eraseFromParent();
2043 return true;
2044}
2045
2046bool AArch64InstructionSelector::selectVaStartDarwin(
2047 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2048 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2049 Register ListReg = I.getOperand(0).getReg();
2050
2051 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2052
2053 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2054 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2056 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2057 ? FuncInfo->getVarArgsGPRIndex()
2058 : FuncInfo->getVarArgsStackIndex();
2059 }
2060
2061 auto MIB =
2062 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2063 .addDef(ArgsAddrReg)
2064 .addFrameIndex(FrameIdx)
2065 .addImm(0)
2066 .addImm(0);
2067
2069
2070 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2071 .addUse(ArgsAddrReg)
2072 .addUse(ListReg)
2073 .addImm(0)
2074 .addMemOperand(*I.memoperands_begin());
2075
2077 I.eraseFromParent();
2078 return true;
2079}
2080
2081void AArch64InstructionSelector::materializeLargeCMVal(
2082 MachineInstr &I, const Value *V, unsigned OpFlags) {
2083 MachineBasicBlock &MBB = *I.getParent();
2084 MachineFunction &MF = *MBB.getParent();
2085 MachineRegisterInfo &MRI = MF.getRegInfo();
2086
2087 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2088 MovZ->addOperand(MF, I.getOperand(1));
2089 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2091 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2093
2094 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2095 Register ForceDstReg) {
2096 Register DstReg = ForceDstReg
2097 ? ForceDstReg
2098 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2099 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2100 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2101 MovI->addOperand(MF, MachineOperand::CreateGA(
2102 GV, MovZ->getOperand(1).getOffset(), Flags));
2103 } else {
2104 MovI->addOperand(
2106 MovZ->getOperand(1).getOffset(), Flags));
2107 }
2110 return DstReg;
2111 };
2112 Register DstReg = BuildMovK(MovZ.getReg(0),
2114 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2115 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2116}
2117
2118bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2119 MachineBasicBlock &MBB = *I.getParent();
2120 MachineFunction &MF = *MBB.getParent();
2121 MachineRegisterInfo &MRI = MF.getRegInfo();
2122
2123 switch (I.getOpcode()) {
2124 case TargetOpcode::G_STORE: {
2125 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2126 MachineOperand &SrcOp = I.getOperand(0);
2127 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2128 // Allow matching with imported patterns for stores of pointers. Unlike
2129 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2130 // and constrain.
2131 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2132 Register NewSrc = Copy.getReg(0);
2133 SrcOp.setReg(NewSrc);
2134 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2135 Changed = true;
2136 }
2137 return Changed;
2138 }
2139 case TargetOpcode::G_PTR_ADD: {
2140 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2141 // arithmetic semantics instead of falling back to regular arithmetic.
2142 const auto &TL = STI.getTargetLowering();
2143 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2144 return false;
2145 return convertPtrAddToAdd(I, MRI);
2146 }
2147 case TargetOpcode::G_LOAD: {
2148 // For scalar loads of pointers, we try to convert the dest type from p0
2149 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2150 // conversion, this should be ok because all users should have been
2151 // selected already, so the type doesn't matter for them.
2152 Register DstReg = I.getOperand(0).getReg();
2153 const LLT DstTy = MRI.getType(DstReg);
2154 if (!DstTy.isPointer())
2155 return false;
2156 MRI.setType(DstReg, LLT::scalar(64));
2157 return true;
2158 }
2159 case AArch64::G_DUP: {
2160 // Convert the type from p0 to s64 to help selection.
2161 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2162 if (!DstTy.isPointerVector())
2163 return false;
2164 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2165 MRI.setType(I.getOperand(0).getReg(),
2166 DstTy.changeElementType(LLT::scalar(64)));
2167 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2168 I.getOperand(1).setReg(NewSrc.getReg(0));
2169 return true;
2170 }
2171 case AArch64::G_INSERT_VECTOR_ELT: {
2172 // Convert the type from p0 to s64 to help selection.
2173 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2174 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2175 if (!SrcVecTy.isPointerVector())
2176 return false;
2177 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2178 MRI.setType(I.getOperand(1).getReg(),
2179 DstTy.changeElementType(LLT::scalar(64)));
2180 MRI.setType(I.getOperand(0).getReg(),
2181 DstTy.changeElementType(LLT::scalar(64)));
2182 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2183 I.getOperand(2).setReg(NewSrc.getReg(0));
2184 return true;
2185 }
2186 case TargetOpcode::G_UITOFP:
2187 case TargetOpcode::G_SITOFP: {
2188 // If both source and destination regbanks are FPR, then convert the opcode
2189 // to G_SITOF so that the importer can select it to an fpr variant.
2190 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2191 // copy.
2192 Register SrcReg = I.getOperand(1).getReg();
2193 LLT SrcTy = MRI.getType(SrcReg);
2194 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2195 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2196 return false;
2197
2198 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2199 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2200 I.setDesc(TII.get(AArch64::G_SITOF));
2201 else
2202 I.setDesc(TII.get(AArch64::G_UITOF));
2203 return true;
2204 }
2205 return false;
2206 }
2207 default:
2208 return false;
2209 }
2210}
2211
2212/// This lowering tries to look for G_PTR_ADD instructions and then converts
2213/// them to a standard G_ADD with a COPY on the source.
2214///
2215/// The motivation behind this is to expose the add semantics to the imported
2216/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2217/// because the selector works bottom up, uses before defs. By the time we
2218/// end up trying to select a G_PTR_ADD, we should have already attempted to
2219/// fold this into addressing modes and were therefore unsuccessful.
2220bool AArch64InstructionSelector::convertPtrAddToAdd(
2221 MachineInstr &I, MachineRegisterInfo &MRI) {
2222 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2223 Register DstReg = I.getOperand(0).getReg();
2224 Register AddOp1Reg = I.getOperand(1).getReg();
2225 const LLT PtrTy = MRI.getType(DstReg);
2226 if (PtrTy.getAddressSpace() != 0)
2227 return false;
2228
2229 const LLT CastPtrTy =
2230 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2231 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2232 // Set regbanks on the registers.
2233 if (PtrTy.isVector())
2234 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2235 else
2236 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2237
2238 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2239 // %dst(intty) = G_ADD %intbase, off
2240 I.setDesc(TII.get(TargetOpcode::G_ADD));
2241 MRI.setType(DstReg, CastPtrTy);
2242 I.getOperand(1).setReg(PtrToInt.getReg(0));
2243 if (!select(*PtrToInt)) {
2244 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2245 return false;
2246 }
2247
2248 // Also take the opportunity here to try to do some optimization.
2249 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2250 Register NegatedReg;
2251 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2252 return true;
2253 I.getOperand(2).setReg(NegatedReg);
2254 I.setDesc(TII.get(TargetOpcode::G_SUB));
2255 return true;
2256}
2257
2258bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2259 MachineRegisterInfo &MRI) {
2260 // We try to match the immediate variant of LSL, which is actually an alias
2261 // for a special case of UBFM. Otherwise, we fall back to the imported
2262 // selector which will match the register variant.
2263 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2264 const auto &MO = I.getOperand(2);
2265 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2266 if (!VRegAndVal)
2267 return false;
2268
2269 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2270 if (DstTy.isVector())
2271 return false;
2272 bool Is64Bit = DstTy.getSizeInBits() == 64;
2273 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2274 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2275
2276 if (!Imm1Fn || !Imm2Fn)
2277 return false;
2278
2279 auto NewI =
2280 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2281 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2282
2283 for (auto &RenderFn : *Imm1Fn)
2284 RenderFn(NewI);
2285 for (auto &RenderFn : *Imm2Fn)
2286 RenderFn(NewI);
2287
2288 I.eraseFromParent();
2289 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2290}
2291
2292bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2293 MachineInstr &I, MachineRegisterInfo &MRI) {
2294 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2295 // If we're storing a scalar, it doesn't matter what register bank that
2296 // scalar is on. All that matters is the size.
2297 //
2298 // So, if we see something like this (with a 32-bit scalar as an example):
2299 //
2300 // %x:gpr(s32) = ... something ...
2301 // %y:fpr(s32) = COPY %x:gpr(s32)
2302 // G_STORE %y:fpr(s32)
2303 //
2304 // We can fix this up into something like this:
2305 //
2306 // G_STORE %x:gpr(s32)
2307 //
2308 // And then continue the selection process normally.
2309 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2310 if (!DefDstReg.isValid())
2311 return false;
2312 LLT DefDstTy = MRI.getType(DefDstReg);
2313 Register StoreSrcReg = I.getOperand(0).getReg();
2314 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2315
2316 // If we get something strange like a physical register, then we shouldn't
2317 // go any further.
2318 if (!DefDstTy.isValid())
2319 return false;
2320
2321 // Are the source and dst types the same size?
2322 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2323 return false;
2324
2325 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2326 RBI.getRegBank(DefDstReg, MRI, TRI))
2327 return false;
2328
2329 // We have a cross-bank copy, which is entering a store. Let's fold it.
2330 I.getOperand(0).setReg(DefDstReg);
2331 return true;
2332}
2333
2334bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2335 assert(I.getParent() && "Instruction should be in a basic block!");
2336 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2337
2338 MachineBasicBlock &MBB = *I.getParent();
2339 MachineFunction &MF = *MBB.getParent();
2340 MachineRegisterInfo &MRI = MF.getRegInfo();
2341
2342 switch (I.getOpcode()) {
2343 case AArch64::G_DUP: {
2344 // Before selecting a DUP instruction, check if it is better selected as a
2345 // MOV or load from a constant pool.
2346 Register Src = I.getOperand(1).getReg();
2347 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2348 if (!ValAndVReg)
2349 return false;
2350 LLVMContext &Ctx = MF.getFunction().getContext();
2351 Register Dst = I.getOperand(0).getReg();
2353 MRI.getType(Dst).getNumElements(),
2354 ConstantInt::get(
2355 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2356 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2357 if (!emitConstantVector(Dst, CV, MIB, MRI))
2358 return false;
2359 I.eraseFromParent();
2360 return true;
2361 }
2362 case TargetOpcode::G_SEXT:
2363 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2364 // over a normal extend.
2365 if (selectUSMovFromExtend(I, MRI))
2366 return true;
2367 return false;
2368 case TargetOpcode::G_BR:
2369 return false;
2370 case TargetOpcode::G_SHL:
2371 return earlySelectSHL(I, MRI);
2372 case TargetOpcode::G_CONSTANT: {
2373 bool IsZero = false;
2374 if (I.getOperand(1).isCImm())
2375 IsZero = I.getOperand(1).getCImm()->isZero();
2376 else if (I.getOperand(1).isImm())
2377 IsZero = I.getOperand(1).getImm() == 0;
2378
2379 if (!IsZero)
2380 return false;
2381
2382 Register DefReg = I.getOperand(0).getReg();
2383 LLT Ty = MRI.getType(DefReg);
2384 if (Ty.getSizeInBits() == 64) {
2385 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2386 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2387 } else if (Ty.getSizeInBits() == 32) {
2388 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2389 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2390 } else
2391 return false;
2392
2393 I.setDesc(TII.get(TargetOpcode::COPY));
2394 return true;
2395 }
2396
2397 case TargetOpcode::G_ADD: {
2398 // Check if this is being fed by a G_ICMP on either side.
2399 //
2400 // (cmp pred, x, y) + z
2401 //
2402 // In the above case, when the cmp is true, we increment z by 1. So, we can
2403 // fold the add into the cset for the cmp by using cinc.
2404 //
2405 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2406 Register AddDst = I.getOperand(0).getReg();
2407 Register AddLHS = I.getOperand(1).getReg();
2408 Register AddRHS = I.getOperand(2).getReg();
2409 // Only handle scalars.
2410 LLT Ty = MRI.getType(AddLHS);
2411 if (Ty.isVector())
2412 return false;
2413 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2414 // bits.
2415 unsigned Size = Ty.getSizeInBits();
2416 if (Size != 32 && Size != 64)
2417 return false;
2418 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2419 if (!MRI.hasOneNonDBGUse(Reg))
2420 return nullptr;
2421 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2422 // compare.
2423 if (Size == 32)
2424 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2425 // We model scalar compares using 32-bit destinations right now.
2426 // If it's a 64-bit compare, it'll have 64-bit sources.
2427 Register ZExt;
2428 if (!mi_match(Reg, MRI,
2430 return nullptr;
2431 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2432 if (!Cmp ||
2433 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2434 return nullptr;
2435 return Cmp;
2436 };
2437 // Try to match
2438 // z + (cmp pred, x, y)
2439 MachineInstr *Cmp = MatchCmp(AddRHS);
2440 if (!Cmp) {
2441 // (cmp pred, x, y) + z
2442 std::swap(AddLHS, AddRHS);
2443 Cmp = MatchCmp(AddRHS);
2444 if (!Cmp)
2445 return false;
2446 }
2447 auto &PredOp = Cmp->getOperand(1);
2449 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2450 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2451 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2453 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2454 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2455 I.eraseFromParent();
2456 return true;
2457 }
2458 case TargetOpcode::G_OR: {
2459 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2460 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2461 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2462 Register Dst = I.getOperand(0).getReg();
2463 LLT Ty = MRI.getType(Dst);
2464
2465 if (!Ty.isScalar())
2466 return false;
2467
2468 unsigned Size = Ty.getSizeInBits();
2469 if (Size != 32 && Size != 64)
2470 return false;
2471
2472 Register ShiftSrc;
2473 int64_t ShiftImm;
2474 Register MaskSrc;
2475 int64_t MaskImm;
2476 if (!mi_match(
2477 Dst, MRI,
2478 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2479 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2480 return false;
2481
2482 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2483 return false;
2484
2485 int64_t Immr = Size - ShiftImm;
2486 int64_t Imms = Size - ShiftImm - 1;
2487 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2488 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2489 I.eraseFromParent();
2490 return true;
2491 }
2492 case TargetOpcode::G_FENCE: {
2493 if (I.getOperand(1).getImm() == 0)
2494 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2495 else
2496 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2497 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2498 I.eraseFromParent();
2499 return true;
2500 }
2501 default:
2502 return false;
2503 }
2504}
2505
2506bool AArch64InstructionSelector::select(MachineInstr &I) {
2507 assert(I.getParent() && "Instruction should be in a basic block!");
2508 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2509
2510 MachineBasicBlock &MBB = *I.getParent();
2511 MachineFunction &MF = *MBB.getParent();
2512 MachineRegisterInfo &MRI = MF.getRegInfo();
2513
2514 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2515 if (Subtarget->requiresStrictAlign()) {
2516 // We don't support this feature yet.
2517 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2518 return false;
2519 }
2520
2522
2523 unsigned Opcode = I.getOpcode();
2524 // G_PHI requires same handling as PHI
2525 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2526 // Certain non-generic instructions also need some special handling.
2527
2528 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2530
2531 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2532 const Register DefReg = I.getOperand(0).getReg();
2533 const LLT DefTy = MRI.getType(DefReg);
2534
2535 const RegClassOrRegBank &RegClassOrBank =
2536 MRI.getRegClassOrRegBank(DefReg);
2537
2538 const TargetRegisterClass *DefRC =
2540 if (!DefRC) {
2541 if (!DefTy.isValid()) {
2542 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2543 return false;
2544 }
2545 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2546 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2547 if (!DefRC) {
2548 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2549 return false;
2550 }
2551 }
2552
2553 I.setDesc(TII.get(TargetOpcode::PHI));
2554
2555 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2556 }
2557
2558 if (I.isCopy())
2559 return selectCopy(I, TII, MRI, TRI, RBI);
2560
2561 if (I.isDebugInstr())
2562 return selectDebugInstr(I, MRI, RBI);
2563
2564 return true;
2565 }
2566
2567
2568 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2569 LLVM_DEBUG(
2570 dbgs() << "Generic instruction has unexpected implicit operands\n");
2571 return false;
2572 }
2573
2574 // Try to do some lowering before we start instruction selecting. These
2575 // lowerings are purely transformations on the input G_MIR and so selection
2576 // must continue after any modification of the instruction.
2577 if (preISelLower(I)) {
2578 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2579 }
2580
2581 // There may be patterns where the importer can't deal with them optimally,
2582 // but does select it to a suboptimal sequence so our custom C++ selection
2583 // code later never has a chance to work on it. Therefore, we have an early
2584 // selection attempt here to give priority to certain selection routines
2585 // over the imported ones.
2586 if (earlySelect(I))
2587 return true;
2588
2589 if (selectImpl(I, *CoverageInfo))
2590 return true;
2591
2592 LLT Ty =
2593 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2594
2595 switch (Opcode) {
2596 case TargetOpcode::G_SBFX:
2597 case TargetOpcode::G_UBFX: {
2598 static const unsigned OpcTable[2][2] = {
2599 {AArch64::UBFMWri, AArch64::UBFMXri},
2600 {AArch64::SBFMWri, AArch64::SBFMXri}};
2601 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2602 unsigned Size = Ty.getSizeInBits();
2603 unsigned Opc = OpcTable[IsSigned][Size == 64];
2604 auto Cst1 =
2605 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2606 assert(Cst1 && "Should have gotten a constant for src 1?");
2607 auto Cst2 =
2608 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2609 assert(Cst2 && "Should have gotten a constant for src 2?");
2610 auto LSB = Cst1->Value.getZExtValue();
2611 auto Width = Cst2->Value.getZExtValue();
2612 auto BitfieldInst =
2613 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2614 .addImm(LSB)
2615 .addImm(LSB + Width - 1);
2616 I.eraseFromParent();
2617 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2618 }
2619 case TargetOpcode::G_BRCOND:
2620 return selectCompareBranch(I, MF, MRI);
2621
2622 case TargetOpcode::G_BRINDIRECT: {
2623 const Function &Fn = MF.getFunction();
2624 if (std::optional<uint16_t> BADisc =
2626 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2627 MI.addImm(AArch64PACKey::IA);
2628 MI.addImm(*BADisc);
2629 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2630 I.eraseFromParent();
2632 }
2633 I.setDesc(TII.get(AArch64::BR));
2635 }
2636
2637 case TargetOpcode::G_BRJT:
2638 return selectBrJT(I, MRI);
2639
2640 case AArch64::G_ADD_LOW: {
2641 // This op may have been separated from it's ADRP companion by the localizer
2642 // or some other code motion pass. Given that many CPUs will try to
2643 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2644 // which will later be expanded into an ADRP+ADD pair after scheduling.
2645 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2646 if (BaseMI->getOpcode() != AArch64::ADRP) {
2647 I.setDesc(TII.get(AArch64::ADDXri));
2648 I.addOperand(MachineOperand::CreateImm(0));
2650 }
2652 "Expected small code model");
2653 auto Op1 = BaseMI->getOperand(1);
2654 auto Op2 = I.getOperand(2);
2655 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2656 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2657 Op1.getTargetFlags())
2658 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2659 Op2.getTargetFlags());
2660 I.eraseFromParent();
2661 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2662 }
2663
2664 case TargetOpcode::G_FCONSTANT:
2665 case TargetOpcode::G_CONSTANT: {
2666 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2667
2668 const LLT s8 = LLT::scalar(8);
2669 const LLT s16 = LLT::scalar(16);
2670 const LLT s32 = LLT::scalar(32);
2671 const LLT s64 = LLT::scalar(64);
2672 const LLT s128 = LLT::scalar(128);
2673 const LLT p0 = LLT::pointer(0, 64);
2674
2675 const Register DefReg = I.getOperand(0).getReg();
2676 const LLT DefTy = MRI.getType(DefReg);
2677 const unsigned DefSize = DefTy.getSizeInBits();
2678 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2679
2680 // FIXME: Redundant check, but even less readable when factored out.
2681 if (isFP) {
2682 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2683 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2684 << " constant, expected: " << s16 << " or " << s32
2685 << " or " << s64 << " or " << s128 << '\n');
2686 return false;
2687 }
2688
2689 if (RB.getID() != AArch64::FPRRegBankID) {
2690 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2691 << " constant on bank: " << RB
2692 << ", expected: FPR\n");
2693 return false;
2694 }
2695
2696 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2697 // can be sure tablegen works correctly and isn't rescued by this code.
2698 // 0.0 is not covered by tablegen for FP128. So we will handle this
2699 // scenario in the code here.
2700 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2701 return false;
2702 } else {
2703 // s32 and s64 are covered by tablegen.
2704 if (Ty != p0 && Ty != s8 && Ty != s16) {
2705 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2706 << " constant, expected: " << s32 << ", " << s64
2707 << ", or " << p0 << '\n');
2708 return false;
2709 }
2710
2711 if (RB.getID() != AArch64::GPRRegBankID) {
2712 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2713 << " constant on bank: " << RB
2714 << ", expected: GPR\n");
2715 return false;
2716 }
2717 }
2718
2719 if (isFP) {
2720 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2721 // For 16, 64, and 128b values, emit a constant pool load.
2722 switch (DefSize) {
2723 default:
2724 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2725 case 32:
2726 case 64: {
2727 bool OptForSize = shouldOptForSize(&MF);
2728 const auto &TLI = MF.getSubtarget().getTargetLowering();
2729 // If TLI says that this fpimm is illegal, then we'll expand to a
2730 // constant pool load.
2731 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2732 EVT::getFloatingPointVT(DefSize), OptForSize))
2733 break;
2734 [[fallthrough]];
2735 }
2736 case 16:
2737 case 128: {
2738 auto *FPImm = I.getOperand(1).getFPImm();
2739 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2740 if (!LoadMI) {
2741 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2742 return false;
2743 }
2744 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2745 I.eraseFromParent();
2746 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2747 }
2748 }
2749
2750 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2751 // Either emit a FMOV, or emit a copy to emit a normal mov.
2752 const Register DefGPRReg = MRI.createVirtualRegister(
2753 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2754 MachineOperand &RegOp = I.getOperand(0);
2755 RegOp.setReg(DefGPRReg);
2756 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2757 MIB.buildCopy({DefReg}, {DefGPRReg});
2758
2759 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2760 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2761 return false;
2762 }
2763
2764 MachineOperand &ImmOp = I.getOperand(1);
2765 // FIXME: Is going through int64_t always correct?
2766 ImmOp.ChangeToImmediate(
2768 } else if (I.getOperand(1).isCImm()) {
2769 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2770 I.getOperand(1).ChangeToImmediate(Val);
2771 } else if (I.getOperand(1).isImm()) {
2772 uint64_t Val = I.getOperand(1).getImm();
2773 I.getOperand(1).ChangeToImmediate(Val);
2774 }
2775
2776 const unsigned MovOpc =
2777 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2778 I.setDesc(TII.get(MovOpc));
2780 return true;
2781 }
2782 case TargetOpcode::G_EXTRACT: {
2783 Register DstReg = I.getOperand(0).getReg();
2784 Register SrcReg = I.getOperand(1).getReg();
2785 LLT SrcTy = MRI.getType(SrcReg);
2786 LLT DstTy = MRI.getType(DstReg);
2787 (void)DstTy;
2788 unsigned SrcSize = SrcTy.getSizeInBits();
2789
2790 if (SrcTy.getSizeInBits() > 64) {
2791 // This should be an extract of an s128, which is like a vector extract.
2792 if (SrcTy.getSizeInBits() != 128)
2793 return false;
2794 // Only support extracting 64 bits from an s128 at the moment.
2795 if (DstTy.getSizeInBits() != 64)
2796 return false;
2797
2798 unsigned Offset = I.getOperand(2).getImm();
2799 if (Offset % 64 != 0)
2800 return false;
2801
2802 // Check we have the right regbank always.
2803 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2804 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2805 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2806
2807 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2808 auto NewI =
2809 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2810 .addUse(SrcReg, 0,
2811 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2812 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2813 AArch64::GPR64RegClass, NewI->getOperand(0));
2814 I.eraseFromParent();
2815 return true;
2816 }
2817
2818 // Emit the same code as a vector extract.
2819 // Offset must be a multiple of 64.
2820 unsigned LaneIdx = Offset / 64;
2821 MachineInstr *Extract = emitExtractVectorElt(
2822 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2823 if (!Extract)
2824 return false;
2825 I.eraseFromParent();
2826 return true;
2827 }
2828
2829 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2830 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2831 Ty.getSizeInBits() - 1);
2832
2833 if (SrcSize < 64) {
2834 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2835 "unexpected G_EXTRACT types");
2837 }
2838
2839 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2840 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2841 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2842 .addReg(DstReg, 0, AArch64::sub_32);
2843 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2844 AArch64::GPR32RegClass, MRI);
2845 I.getOperand(0).setReg(DstReg);
2846
2848 }
2849
2850 case TargetOpcode::G_INSERT: {
2851 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2852 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2853 unsigned DstSize = DstTy.getSizeInBits();
2854 // Larger inserts are vectors, same-size ones should be something else by
2855 // now (split up or turned into COPYs).
2856 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2857 return false;
2858
2859 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2860 unsigned LSB = I.getOperand(3).getImm();
2861 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2862 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2863 MachineInstrBuilder(MF, I).addImm(Width - 1);
2864
2865 if (DstSize < 64) {
2866 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2867 "unexpected G_INSERT types");
2869 }
2870
2871 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2872 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2873 TII.get(AArch64::SUBREG_TO_REG))
2874 .addDef(SrcReg)
2875 .addImm(0)
2876 .addUse(I.getOperand(2).getReg())
2877 .addImm(AArch64::sub_32);
2878 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2879 AArch64::GPR32RegClass, MRI);
2880 I.getOperand(2).setReg(SrcReg);
2881
2883 }
2884 case TargetOpcode::G_FRAME_INDEX: {
2885 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2886 if (Ty != LLT::pointer(0, 64)) {
2887 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2888 << ", expected: " << LLT::pointer(0, 64) << '\n');
2889 return false;
2890 }
2891 I.setDesc(TII.get(AArch64::ADDXri));
2892
2893 // MOs for a #0 shifted immediate.
2894 I.addOperand(MachineOperand::CreateImm(0));
2895 I.addOperand(MachineOperand::CreateImm(0));
2896
2898 }
2899
2900 case TargetOpcode::G_GLOBAL_VALUE: {
2901 const GlobalValue *GV = nullptr;
2902 unsigned OpFlags;
2903 if (I.getOperand(1).isSymbol()) {
2904 OpFlags = I.getOperand(1).getTargetFlags();
2905 // Currently only used by "RtLibUseGOT".
2906 assert(OpFlags == AArch64II::MO_GOT);
2907 } else {
2908 GV = I.getOperand(1).getGlobal();
2909 if (GV->isThreadLocal()) {
2910 // We don't support instructions with emulated TLS variables yet
2911 if (TM.useEmulatedTLS())
2912 return false;
2913 return selectTLSGlobalValue(I, MRI);
2914 }
2915 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2916 }
2917
2918 if (OpFlags & AArch64II::MO_GOT) {
2919 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2920 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2921 I.getOperand(1).setTargetFlags(OpFlags);
2922 I.addImplicitDefUseOperands(MF);
2923 } else if (TM.getCodeModel() == CodeModel::Large &&
2924 !TM.isPositionIndependent()) {
2925 // Materialize the global using movz/movk instructions.
2926 materializeLargeCMVal(I, GV, OpFlags);
2927 I.eraseFromParent();
2928 return true;
2929 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2930 I.setDesc(TII.get(AArch64::ADR));
2931 I.getOperand(1).setTargetFlags(OpFlags);
2932 } else {
2933 I.setDesc(TII.get(AArch64::MOVaddr));
2934 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2935 MachineInstrBuilder MIB(MF, I);
2936 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2938 }
2940 }
2941
2942 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2943 return selectPtrAuthGlobalValue(I, MRI);
2944
2945 case TargetOpcode::G_ZEXTLOAD:
2946 case TargetOpcode::G_LOAD:
2947 case TargetOpcode::G_STORE: {
2948 GLoadStore &LdSt = cast<GLoadStore>(I);
2949 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2950 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2951
2952 // Can only handle AddressSpace 0, 64-bit pointers.
2953 if (PtrTy != LLT::pointer(0, 64)) {
2954 return false;
2955 }
2956
2957 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2958 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2959 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2960
2961 // Need special instructions for atomics that affect ordering.
2962 if (isStrongerThanMonotonic(Order)) {
2963 assert(!isa<GZExtLoad>(LdSt));
2964 assert(MemSizeInBytes <= 8 &&
2965 "128-bit atomics should already be custom-legalized");
2966
2967 if (isa<GLoad>(LdSt)) {
2968 static constexpr unsigned LDAPROpcodes[] = {
2969 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2970 static constexpr unsigned LDAROpcodes[] = {
2971 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2972 ArrayRef<unsigned> Opcodes =
2973 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2974 ? LDAPROpcodes
2975 : LDAROpcodes;
2976 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2977 } else {
2978 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2979 AArch64::STLRW, AArch64::STLRX};
2980 Register ValReg = LdSt.getReg(0);
2981 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2982 // Emit a subreg copy of 32 bits.
2983 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2984 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2985 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2986 I.getOperand(0).setReg(NewVal);
2987 }
2988 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2989 }
2991 return true;
2992 }
2993
2994#ifndef NDEBUG
2995 const Register PtrReg = LdSt.getPointerReg();
2996 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2997 // Check that the pointer register is valid.
2998 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2999 "Load/Store pointer operand isn't a GPR");
3000 assert(MRI.getType(PtrReg).isPointer() &&
3001 "Load/Store pointer operand isn't a pointer");
3002#endif
3003
3004 const Register ValReg = LdSt.getReg(0);
3005 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3006 LLT ValTy = MRI.getType(ValReg);
3007
3008 // The code below doesn't support truncating stores, so we need to split it
3009 // again.
3010 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3011 unsigned SubReg;
3012 LLT MemTy = LdSt.getMMO().getMemoryType();
3013 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3014 if (!getSubRegForClass(RC, TRI, SubReg))
3015 return false;
3016
3017 // Generate a subreg copy.
3018 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3019 .addReg(ValReg, 0, SubReg)
3020 .getReg(0);
3021 RBI.constrainGenericRegister(Copy, *RC, MRI);
3022 LdSt.getOperand(0).setReg(Copy);
3023 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3024 // If this is an any-extending load from the FPR bank, split it into a regular
3025 // load + extend.
3026 if (RB.getID() == AArch64::FPRRegBankID) {
3027 unsigned SubReg;
3028 LLT MemTy = LdSt.getMMO().getMemoryType();
3029 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3030 if (!getSubRegForClass(RC, TRI, SubReg))
3031 return false;
3032 Register OldDst = LdSt.getReg(0);
3033 Register NewDst =
3034 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
3035 LdSt.getOperand(0).setReg(NewDst);
3036 MRI.setRegBank(NewDst, RB);
3037 // Generate a SUBREG_TO_REG to extend it.
3038 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3039 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3040 .addImm(0)
3041 .addUse(NewDst)
3042 .addImm(SubReg);
3043 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3044 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3045 MIB.setInstr(LdSt);
3046 ValTy = MemTy; // This is no longer an extending load.
3047 }
3048 }
3049
3050 // Helper lambda for partially selecting I. Either returns the original
3051 // instruction with an updated opcode, or a new instruction.
3052 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3053 bool IsStore = isa<GStore>(I);
3054 const unsigned NewOpc =
3055 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3056 if (NewOpc == I.getOpcode())
3057 return nullptr;
3058 // Check if we can fold anything into the addressing mode.
3059 auto AddrModeFns =
3060 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3061 if (!AddrModeFns) {
3062 // Can't fold anything. Use the original instruction.
3063 I.setDesc(TII.get(NewOpc));
3064 I.addOperand(MachineOperand::CreateImm(0));
3065 return &I;
3066 }
3067
3068 // Folded something. Create a new instruction and return it.
3069 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3070 Register CurValReg = I.getOperand(0).getReg();
3071 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3072 NewInst.cloneMemRefs(I);
3073 for (auto &Fn : *AddrModeFns)
3074 Fn(NewInst);
3075 I.eraseFromParent();
3076 return &*NewInst;
3077 };
3078
3079 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3080 if (!LoadStore)
3081 return false;
3082
3083 // If we're storing a 0, use WZR/XZR.
3084 if (Opcode == TargetOpcode::G_STORE) {
3086 LoadStore->getOperand(0).getReg(), MRI);
3087 if (CVal && CVal->Value == 0) {
3088 switch (LoadStore->getOpcode()) {
3089 case AArch64::STRWui:
3090 case AArch64::STRHHui:
3091 case AArch64::STRBBui:
3092 LoadStore->getOperand(0).setReg(AArch64::WZR);
3093 break;
3094 case AArch64::STRXui:
3095 LoadStore->getOperand(0).setReg(AArch64::XZR);
3096 break;
3097 }
3098 }
3099 }
3100
3101 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3102 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3103 // The any/zextload from a smaller type to i32 should be handled by the
3104 // importer.
3105 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3106 return false;
3107 // If we have an extending load then change the load's type to be a
3108 // narrower reg and zero_extend with SUBREG_TO_REG.
3109 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3110 Register DstReg = LoadStore->getOperand(0).getReg();
3111 LoadStore->getOperand(0).setReg(LdReg);
3112
3113 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3114 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3115 .addImm(0)
3116 .addUse(LdReg)
3117 .addImm(AArch64::sub_32);
3118 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3119 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3120 MRI);
3121 }
3122 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3123 }
3124
3125 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3126 case TargetOpcode::G_INDEXED_SEXTLOAD:
3127 return selectIndexedExtLoad(I, MRI);
3128 case TargetOpcode::G_INDEXED_LOAD:
3129 return selectIndexedLoad(I, MRI);
3130 case TargetOpcode::G_INDEXED_STORE:
3131 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3132
3133 case TargetOpcode::G_LSHR:
3134 case TargetOpcode::G_ASHR:
3135 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3136 return selectVectorAshrLshr(I, MRI);
3137 [[fallthrough]];
3138 case TargetOpcode::G_SHL:
3139 if (Opcode == TargetOpcode::G_SHL &&
3140 MRI.getType(I.getOperand(0).getReg()).isVector())
3141 return selectVectorSHL(I, MRI);
3142
3143 // These shifts were legalized to have 64 bit shift amounts because we
3144 // want to take advantage of the selection patterns that assume the
3145 // immediates are s64s, however, selectBinaryOp will assume both operands
3146 // will have the same bit size.
3147 {
3148 Register SrcReg = I.getOperand(1).getReg();
3149 Register ShiftReg = I.getOperand(2).getReg();
3150 const LLT ShiftTy = MRI.getType(ShiftReg);
3151 const LLT SrcTy = MRI.getType(SrcReg);
3152 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3153 ShiftTy.getSizeInBits() == 64) {
3154 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3155 // Insert a subregister copy to implement a 64->32 trunc
3156 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3157 .addReg(ShiftReg, 0, AArch64::sub_32);
3158 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3159 I.getOperand(2).setReg(Trunc.getReg(0));
3160 }
3161 }
3162 [[fallthrough]];
3163 case TargetOpcode::G_OR: {
3164 // Reject the various things we don't support yet.
3165 if (unsupportedBinOp(I, RBI, MRI, TRI))
3166 return false;
3167
3168 const unsigned OpSize = Ty.getSizeInBits();
3169
3170 const Register DefReg = I.getOperand(0).getReg();
3171 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3172
3173 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3174 if (NewOpc == I.getOpcode())
3175 return false;
3176
3177 I.setDesc(TII.get(NewOpc));
3178 // FIXME: Should the type be always reset in setDesc?
3179
3180 // Now that we selected an opcode, we need to constrain the register
3181 // operands to use appropriate classes.
3183 }
3184
3185 case TargetOpcode::G_PTR_ADD: {
3186 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3187 I.eraseFromParent();
3188 return true;
3189 }
3190
3191 case TargetOpcode::G_SADDE:
3192 case TargetOpcode::G_UADDE:
3193 case TargetOpcode::G_SSUBE:
3194 case TargetOpcode::G_USUBE:
3195 case TargetOpcode::G_SADDO:
3196 case TargetOpcode::G_UADDO:
3197 case TargetOpcode::G_SSUBO:
3198 case TargetOpcode::G_USUBO:
3199 return selectOverflowOp(I, MRI);
3200
3201 case TargetOpcode::G_PTRMASK: {
3202 Register MaskReg = I.getOperand(2).getReg();
3203 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3204 // TODO: Implement arbitrary cases
3205 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3206 return false;
3207
3208 uint64_t Mask = *MaskVal;
3209 I.setDesc(TII.get(AArch64::ANDXri));
3210 I.getOperand(2).ChangeToImmediate(
3212
3214 }
3215 case TargetOpcode::G_PTRTOINT:
3216 case TargetOpcode::G_TRUNC: {
3217 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3218 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3219
3220 const Register DstReg = I.getOperand(0).getReg();
3221 const Register SrcReg = I.getOperand(1).getReg();
3222
3223 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3224 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3225
3226 if (DstRB.getID() != SrcRB.getID()) {
3227 LLVM_DEBUG(
3228 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3229 return false;
3230 }
3231
3232 if (DstRB.getID() == AArch64::GPRRegBankID) {
3233 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3234 if (!DstRC)
3235 return false;
3236
3237 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3238 if (!SrcRC)
3239 return false;
3240
3241 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3242 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3243 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3244 return false;
3245 }
3246
3247 if (DstRC == SrcRC) {
3248 // Nothing to be done
3249 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3250 SrcTy == LLT::scalar(64)) {
3251 llvm_unreachable("TableGen can import this case");
3252 return false;
3253 } else if (DstRC == &AArch64::GPR32RegClass &&
3254 SrcRC == &AArch64::GPR64RegClass) {
3255 I.getOperand(1).setSubReg(AArch64::sub_32);
3256 } else {
3257 LLVM_DEBUG(
3258 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3259 return false;
3260 }
3261
3262 I.setDesc(TII.get(TargetOpcode::COPY));
3263 return true;
3264 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3265 if (DstTy == LLT::fixed_vector(4, 16) &&
3266 SrcTy == LLT::fixed_vector(4, 32)) {
3267 I.setDesc(TII.get(AArch64::XTNv4i16));
3269 return true;
3270 }
3271
3272 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3273 MachineInstr *Extract = emitExtractVectorElt(
3274 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3275 if (!Extract)
3276 return false;
3277 I.eraseFromParent();
3278 return true;
3279 }
3280
3281 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3282 if (Opcode == TargetOpcode::G_PTRTOINT) {
3283 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3284 I.setDesc(TII.get(TargetOpcode::COPY));
3285 return selectCopy(I, TII, MRI, TRI, RBI);
3286 }
3287 }
3288
3289 return false;
3290 }
3291
3292 case TargetOpcode::G_ANYEXT: {
3293 if (selectUSMovFromExtend(I, MRI))
3294 return true;
3295
3296 const Register DstReg = I.getOperand(0).getReg();
3297 const Register SrcReg = I.getOperand(1).getReg();
3298
3299 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3300 if (RBDst.getID() != AArch64::GPRRegBankID) {
3301 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3302 << ", expected: GPR\n");
3303 return false;
3304 }
3305
3306 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3307 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3308 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3309 << ", expected: GPR\n");
3310 return false;
3311 }
3312
3313 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3314
3315 if (DstSize == 0) {
3316 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3317 return false;
3318 }
3319
3320 if (DstSize != 64 && DstSize > 32) {
3321 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3322 << ", expected: 32 or 64\n");
3323 return false;
3324 }
3325 // At this point G_ANYEXT is just like a plain COPY, but we need
3326 // to explicitly form the 64-bit value if any.
3327 if (DstSize > 32) {
3328 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3329 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3330 .addDef(ExtSrc)
3331 .addImm(0)
3332 .addUse(SrcReg)
3333 .addImm(AArch64::sub_32);
3334 I.getOperand(1).setReg(ExtSrc);
3335 }
3336 return selectCopy(I, TII, MRI, TRI, RBI);
3337 }
3338
3339 case TargetOpcode::G_ZEXT:
3340 case TargetOpcode::G_SEXT_INREG:
3341 case TargetOpcode::G_SEXT: {
3342 if (selectUSMovFromExtend(I, MRI))
3343 return true;
3344
3345 unsigned Opcode = I.getOpcode();
3346 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3347 const Register DefReg = I.getOperand(0).getReg();
3348 Register SrcReg = I.getOperand(1).getReg();
3349 const LLT DstTy = MRI.getType(DefReg);
3350 const LLT SrcTy = MRI.getType(SrcReg);
3351 unsigned DstSize = DstTy.getSizeInBits();
3352 unsigned SrcSize = SrcTy.getSizeInBits();
3353
3354 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3355 // extended is encoded in the imm.
3356 if (Opcode == TargetOpcode::G_SEXT_INREG)
3357 SrcSize = I.getOperand(2).getImm();
3358
3359 if (DstTy.isVector())
3360 return false; // Should be handled by imported patterns.
3361
3362 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3363 AArch64::GPRRegBankID &&
3364 "Unexpected ext regbank");
3365
3366 MachineInstr *ExtI;
3367
3368 // First check if we're extending the result of a load which has a dest type
3369 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3370 // GPR register on AArch64 and all loads which are smaller automatically
3371 // zero-extend the upper bits. E.g.
3372 // %v(s8) = G_LOAD %p, :: (load 1)
3373 // %v2(s32) = G_ZEXT %v(s8)
3374 if (!IsSigned) {
3375 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3376 bool IsGPR =
3377 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3378 if (LoadMI && IsGPR) {
3379 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3380 unsigned BytesLoaded = MemOp->getSize().getValue();
3381 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3382 return selectCopy(I, TII, MRI, TRI, RBI);
3383 }
3384
3385 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3386 // + SUBREG_TO_REG.
3387 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3388 Register SubregToRegSrc =
3389 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3390 const Register ZReg = AArch64::WZR;
3391 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3392 .addImm(0);
3393
3394 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3395 .addImm(0)
3396 .addUse(SubregToRegSrc)
3397 .addImm(AArch64::sub_32);
3398
3399 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3400 MRI)) {
3401 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3402 return false;
3403 }
3404
3405 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3406 MRI)) {
3407 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3408 return false;
3409 }
3410
3411 I.eraseFromParent();
3412 return true;
3413 }
3414 }
3415
3416 if (DstSize == 64) {
3417 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3418 // FIXME: Can we avoid manually doing this?
3419 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3420 MRI)) {
3421 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3422 << " operand\n");
3423 return false;
3424 }
3425 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3426 {&AArch64::GPR64RegClass}, {})
3427 .addImm(0)
3428 .addUse(SrcReg)
3429 .addImm(AArch64::sub_32)
3430 .getReg(0);
3431 }
3432
3433 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3434 {DefReg}, {SrcReg})
3435 .addImm(0)
3436 .addImm(SrcSize - 1);
3437 } else if (DstSize <= 32) {
3438 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3439 {DefReg}, {SrcReg})
3440 .addImm(0)
3441 .addImm(SrcSize - 1);
3442 } else {
3443 return false;
3444 }
3445
3447 I.eraseFromParent();
3448 return true;
3449 }
3450
3451 case TargetOpcode::G_FREEZE:
3452 return selectCopy(I, TII, MRI, TRI, RBI);
3453
3454 case TargetOpcode::G_INTTOPTR:
3455 // The importer is currently unable to import pointer types since they
3456 // didn't exist in SelectionDAG.
3457 return selectCopy(I, TII, MRI, TRI, RBI);
3458
3459 case TargetOpcode::G_BITCAST:
3460 // Imported SelectionDAG rules can handle every bitcast except those that
3461 // bitcast from a type to the same type. Ideally, these shouldn't occur
3462 // but we might not run an optimizer that deletes them. The other exception
3463 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3464 // of them.
3465 return selectCopy(I, TII, MRI, TRI, RBI);
3466
3467 case TargetOpcode::G_SELECT: {
3468 auto &Sel = cast<GSelect>(I);
3469 const Register CondReg = Sel.getCondReg();
3470 const Register TReg = Sel.getTrueReg();
3471 const Register FReg = Sel.getFalseReg();
3472
3473 if (tryOptSelect(Sel))
3474 return true;
3475
3476 // Make sure to use an unused vreg instead of wzr, so that the peephole
3477 // optimizations will be able to optimize these.
3478 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3479 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3480 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3482 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3483 return false;
3484 Sel.eraseFromParent();
3485 return true;
3486 }
3487 case TargetOpcode::G_ICMP: {
3488 if (Ty.isVector())
3489 return false;
3490
3491 if (Ty != LLT::scalar(32)) {
3492 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3493 << ", expected: " << LLT::scalar(32) << '\n');
3494 return false;
3495 }
3496
3497 auto &PredOp = I.getOperand(1);
3498 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3499 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3501 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3502 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3503 /*Src2=*/AArch64::WZR, InvCC, MIB);
3504 I.eraseFromParent();
3505 return true;
3506 }
3507
3508 case TargetOpcode::G_FCMP: {
3509 CmpInst::Predicate Pred =
3510 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3511 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3512 Pred) ||
3513 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3514 return false;
3515 I.eraseFromParent();
3516 return true;
3517 }
3518 case TargetOpcode::G_VASTART:
3519 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3520 : selectVaStartAAPCS(I, MF, MRI);
3521 case TargetOpcode::G_INTRINSIC:
3522 return selectIntrinsic(I, MRI);
3523 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3524 return selectIntrinsicWithSideEffects(I, MRI);
3525 case TargetOpcode::G_IMPLICIT_DEF: {
3526 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3527 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3528 const Register DstReg = I.getOperand(0).getReg();
3529 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3530 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3531 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3532 return true;
3533 }
3534 case TargetOpcode::G_BLOCK_ADDR: {
3535 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3536 if (std::optional<uint16_t> BADisc =
3538 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3539 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3540 MIB.buildInstr(AArch64::MOVaddrPAC)
3541 .addBlockAddress(I.getOperand(1).getBlockAddress())
3543 .addReg(/*AddrDisc=*/AArch64::XZR)
3544 .addImm(*BADisc)
3545 .constrainAllUses(TII, TRI, RBI);
3546 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3547 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3548 AArch64::GPR64RegClass, MRI);
3549 I.eraseFromParent();
3550 return true;
3551 }
3553 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3554 I.eraseFromParent();
3555 return true;
3556 } else {
3557 I.setDesc(TII.get(AArch64::MOVaddrBA));
3558 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3559 I.getOperand(0).getReg())
3560 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3561 /* Offset */ 0, AArch64II::MO_PAGE)
3563 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3565 I.eraseFromParent();
3566 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3567 }
3568 }
3569 case AArch64::G_DUP: {
3570 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3571 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3572 // difficult because at RBS we may end up pessimizing the fpr case if we
3573 // decided to add an anyextend to fix this. Manual selection is the most
3574 // robust solution for now.
3575 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3576 AArch64::GPRRegBankID)
3577 return false; // We expect the fpr regbank case to be imported.
3578 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3579 if (VecTy == LLT::fixed_vector(8, 8))
3580 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3581 else if (VecTy == LLT::fixed_vector(16, 8))
3582 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3583 else if (VecTy == LLT::fixed_vector(4, 16))
3584 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3585 else if (VecTy == LLT::fixed_vector(8, 16))
3586 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3587 else
3588 return false;
3590 }
3591 case TargetOpcode::G_BUILD_VECTOR:
3592 return selectBuildVector(I, MRI);
3593 case TargetOpcode::G_MERGE_VALUES:
3594 return selectMergeValues(I, MRI);
3595 case TargetOpcode::G_UNMERGE_VALUES:
3596 return selectUnmergeValues(I, MRI);
3597 case TargetOpcode::G_SHUFFLE_VECTOR:
3598 return selectShuffleVector(I, MRI);
3599 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3600 return selectExtractElt(I, MRI);
3601 case TargetOpcode::G_CONCAT_VECTORS:
3602 return selectConcatVectors(I, MRI);
3603 case TargetOpcode::G_JUMP_TABLE:
3604 return selectJumpTable(I, MRI);
3605 case TargetOpcode::G_MEMCPY:
3606 case TargetOpcode::G_MEMCPY_INLINE:
3607 case TargetOpcode::G_MEMMOVE:
3608 case TargetOpcode::G_MEMSET:
3609 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3610 return selectMOPS(I, MRI);
3611 }
3612
3613 return false;
3614}
3615
3616bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3617 MachineIRBuilderState OldMIBState = MIB.getState();
3618 bool Success = select(I);
3619 MIB.setState(OldMIBState);
3620 return Success;
3621}
3622
3623bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3624 MachineRegisterInfo &MRI) {
3625 unsigned Mopcode;
3626 switch (GI.getOpcode()) {
3627 case TargetOpcode::G_MEMCPY:
3628 case TargetOpcode::G_MEMCPY_INLINE:
3629 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3630 break;
3631 case TargetOpcode::G_MEMMOVE:
3632 Mopcode = AArch64::MOPSMemoryMovePseudo;
3633 break;
3634 case TargetOpcode::G_MEMSET:
3635 // For tagged memset see llvm.aarch64.mops.memset.tag
3636 Mopcode = AArch64::MOPSMemorySetPseudo;
3637 break;
3638 }
3639
3640 auto &DstPtr = GI.getOperand(0);
3641 auto &SrcOrVal = GI.getOperand(1);
3642 auto &Size = GI.getOperand(2);
3643
3644 // Create copies of the registers that can be clobbered.
3645 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3646 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3647 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3648
3649 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3650 const auto &SrcValRegClass =
3651 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3652
3653 // Constrain to specific registers
3654 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3655 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3656 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3657
3658 MIB.buildCopy(DstPtrCopy, DstPtr);
3659 MIB.buildCopy(SrcValCopy, SrcOrVal);
3660 MIB.buildCopy(SizeCopy, Size);
3661
3662 // New instruction uses the copied registers because it must update them.
3663 // The defs are not used since they don't exist in G_MEM*. They are still
3664 // tied.
3665 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3666 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3667 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3668 if (IsSet) {
3669 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3670 {DstPtrCopy, SizeCopy, SrcValCopy});
3671 } else {
3672 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3673 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3674 {DstPtrCopy, SrcValCopy, SizeCopy});
3675 }
3676
3677 GI.eraseFromParent();
3678 return true;
3679}
3680
3681bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3682 MachineRegisterInfo &MRI) {
3683 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3684 Register JTAddr = I.getOperand(0).getReg();
3685 unsigned JTI = I.getOperand(1).getIndex();
3686 Register Index = I.getOperand(2).getReg();
3687
3688 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3689
3690 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3691 // sequence later, to guarantee the integrity of the intermediate values.
3692 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3694 if (STI.isTargetMachO()) {
3695 if (CM != CodeModel::Small && CM != CodeModel::Large)
3696 report_fatal_error("Unsupported code-model for hardened jump-table");
3697 } else {
3698 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3699 assert(STI.isTargetELF() &&
3700 "jump table hardening only supported on MachO/ELF");
3701 if (CM != CodeModel::Small)
3702 report_fatal_error("Unsupported code-model for hardened jump-table");
3703 }
3704
3705 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3706 MIB.buildInstr(AArch64::BR_JumpTable)
3707 .addJumpTableIndex(I.getOperand(1).getIndex());
3708 I.eraseFromParent();
3709 return true;
3710 }
3711
3712 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3713 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3714
3715 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3716 {TargetReg, ScratchReg}, {JTAddr, Index})
3717 .addJumpTableIndex(JTI);
3718 // Save the jump table info.
3719 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3720 {static_cast<int64_t>(JTI)});
3721 // Build the indirect branch.
3722 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3723 I.eraseFromParent();
3724 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3725}
3726
3727bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3728 MachineRegisterInfo &MRI) {
3729 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3730 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3731
3732 Register DstReg = I.getOperand(0).getReg();
3733 unsigned JTI = I.getOperand(1).getIndex();
3734 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3735 auto MovMI =
3736 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3737 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3739 I.eraseFromParent();
3740 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3741}
3742
3743bool AArch64InstructionSelector::selectTLSGlobalValue(
3744 MachineInstr &I, MachineRegisterInfo &MRI) {
3745 if (!STI.isTargetMachO())
3746 return false;
3747 MachineFunction &MF = *I.getParent()->getParent();
3748 MF.getFrameInfo().setAdjustsStack(true);
3749
3750 const auto &GlobalOp = I.getOperand(1);
3751 assert(GlobalOp.getOffset() == 0 &&
3752 "Shouldn't have an offset on TLS globals!");
3753 const GlobalValue &GV = *GlobalOp.getGlobal();
3754
3755 auto LoadGOT =
3756 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3757 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3758
3759 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3760 {LoadGOT.getReg(0)})
3761 .addImm(0);
3762
3763 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3764 // TLS calls preserve all registers except those that absolutely must be
3765 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3766 // silly).
3767 unsigned Opcode = getBLRCallOpcode(MF);
3768
3769 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3770 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3771 assert(Opcode == AArch64::BLR);
3772 Opcode = AArch64::BLRAAZ;
3773 }
3774
3775 MIB.buildInstr(Opcode, {}, {Load})
3776 .addUse(AArch64::X0, RegState::Implicit)
3777 .addDef(AArch64::X0, RegState::Implicit)
3778 .addRegMask(TRI.getTLSCallPreservedMask());
3779
3780 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3781 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3782 MRI);
3783 I.eraseFromParent();
3784 return true;
3785}
3786
3787MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3788 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3789 MachineIRBuilder &MIRBuilder) const {
3790 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3791
3792 auto BuildFn = [&](unsigned SubregIndex) {
3793 auto Ins =
3794 MIRBuilder
3795 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3796 .addImm(SubregIndex);
3799 return &*Ins;
3800 };
3801
3802 switch (EltSize) {
3803 case 8:
3804 return BuildFn(AArch64::bsub);
3805 case 16:
3806 return BuildFn(AArch64::hsub);
3807 case 32:
3808 return BuildFn(AArch64::ssub);
3809 case 64:
3810 return BuildFn(AArch64::dsub);
3811 default:
3812 return nullptr;
3813 }
3814}
3815
3816MachineInstr *
3817AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3818 MachineIRBuilder &MIB,
3819 MachineRegisterInfo &MRI) const {
3820 LLT DstTy = MRI.getType(DstReg);
3821 const TargetRegisterClass *RC =
3822 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3823 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3824 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3825 return nullptr;
3826 }
3827 unsigned SubReg = 0;
3828 if (!getSubRegForClass(RC, TRI, SubReg))
3829 return nullptr;
3830 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3831 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3832 << DstTy.getSizeInBits() << "\n");
3833 return nullptr;
3834 }
3835 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3836 .addReg(SrcReg, 0, SubReg);
3837 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3838 return Copy;
3839}
3840
3841bool AArch64InstructionSelector::selectMergeValues(
3842 MachineInstr &I, MachineRegisterInfo &MRI) {
3843 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3844 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3845 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3846 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3847 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3848
3849 if (I.getNumOperands() != 3)
3850 return false;
3851
3852 // Merging 2 s64s into an s128.
3853 if (DstTy == LLT::scalar(128)) {
3854 if (SrcTy.getSizeInBits() != 64)
3855 return false;
3856 Register DstReg = I.getOperand(0).getReg();
3857 Register Src1Reg = I.getOperand(1).getReg();
3858 Register Src2Reg = I.getOperand(2).getReg();
3859 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3860 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3861 /* LaneIdx */ 0, RB, MIB);
3862 if (!InsMI)
3863 return false;
3864 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3865 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3866 if (!Ins2MI)
3867 return false;
3870 I.eraseFromParent();
3871 return true;
3872 }
3873
3874 if (RB.getID() != AArch64::GPRRegBankID)
3875 return false;
3876
3877 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3878 return false;
3879
3880 auto *DstRC = &AArch64::GPR64RegClass;
3881 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3882 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3883 TII.get(TargetOpcode::SUBREG_TO_REG))
3884 .addDef(SubToRegDef)
3885 .addImm(0)
3886 .addUse(I.getOperand(1).getReg())
3887 .addImm(AArch64::sub_32);
3888 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3889 // Need to anyext the second scalar before we can use bfm
3890 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3891 TII.get(TargetOpcode::SUBREG_TO_REG))
3892 .addDef(SubToRegDef2)
3893 .addImm(0)
3894 .addUse(I.getOperand(2).getReg())
3895 .addImm(AArch64::sub_32);
3896 MachineInstr &BFM =
3897 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3898 .addDef(I.getOperand(0).getReg())
3899 .addUse(SubToRegDef)
3900 .addUse(SubToRegDef2)
3901 .addImm(32)
3902 .addImm(31);
3903 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3904 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3906 I.eraseFromParent();
3907 return true;
3908}
3909
3910static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3911 const unsigned EltSize) {
3912 // Choose a lane copy opcode and subregister based off of the size of the
3913 // vector's elements.
3914 switch (EltSize) {
3915 case 8:
3916 CopyOpc = AArch64::DUPi8;
3917 ExtractSubReg = AArch64::bsub;
3918 break;
3919 case 16:
3920 CopyOpc = AArch64::DUPi16;
3921 ExtractSubReg = AArch64::hsub;
3922 break;
3923 case 32:
3924 CopyOpc = AArch64::DUPi32;
3925 ExtractSubReg = AArch64::ssub;
3926 break;
3927 case 64:
3928 CopyOpc = AArch64::DUPi64;
3929 ExtractSubReg = AArch64::dsub;
3930 break;
3931 default:
3932 // Unknown size, bail out.
3933 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3934 return false;
3935 }
3936 return true;
3937}
3938
3939MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3940 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3941 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3942 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3943 unsigned CopyOpc = 0;
3944 unsigned ExtractSubReg = 0;
3945 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3946 LLVM_DEBUG(
3947 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3948 return nullptr;
3949 }
3950
3951 const TargetRegisterClass *DstRC =
3952 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3953 if (!DstRC) {
3954 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3955 return nullptr;
3956 }
3957
3958 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3959 const LLT &VecTy = MRI.getType(VecReg);
3960 const TargetRegisterClass *VecRC =
3961 getRegClassForTypeOnBank(VecTy, VecRB, true);
3962 if (!VecRC) {
3963 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3964 return nullptr;
3965 }
3966
3967 // The register that we're going to copy into.
3968 Register InsertReg = VecReg;
3969 if (!DstReg)
3970 DstReg = MRI.createVirtualRegister(DstRC);
3971 // If the lane index is 0, we just use a subregister COPY.
3972 if (LaneIdx == 0) {
3973 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3974 .addReg(VecReg, 0, ExtractSubReg);
3975 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3976 return &*Copy;
3977 }
3978
3979 // Lane copies require 128-bit wide registers. If we're dealing with an
3980 // unpacked vector, then we need to move up to that width. Insert an implicit
3981 // def and a subregister insert to get us there.
3982 if (VecTy.getSizeInBits() != 128) {
3983 MachineInstr *ScalarToVector = emitScalarToVector(
3984 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3985 if (!ScalarToVector)
3986 return nullptr;
3987 InsertReg = ScalarToVector->getOperand(0).getReg();
3988 }
3989
3990 MachineInstr *LaneCopyMI =
3991 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3992 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3993
3994 // Make sure that we actually constrain the initial copy.
3995 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3996 return LaneCopyMI;
3997}
3998
3999bool AArch64InstructionSelector::selectExtractElt(
4000 MachineInstr &I, MachineRegisterInfo &MRI) {
4001 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4002 "unexpected opcode!");
4003 Register DstReg = I.getOperand(0).getReg();
4004 const LLT NarrowTy = MRI.getType(DstReg);
4005 const Register SrcReg = I.getOperand(1).getReg();
4006 const LLT WideTy = MRI.getType(SrcReg);
4007 (void)WideTy;
4008 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4009 "source register size too small!");
4010 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4011
4012 // Need the lane index to determine the correct copy opcode.
4013 MachineOperand &LaneIdxOp = I.getOperand(2);
4014 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4015
4016 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4017 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4018 return false;
4019 }
4020
4021 // Find the index to extract from.
4022 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4023 if (!VRegAndVal)
4024 return false;
4025 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4026
4027
4028 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4029 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4030 LaneIdx, MIB);
4031 if (!Extract)
4032 return false;
4033
4034 I.eraseFromParent();
4035 return true;
4036}
4037
4038bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4039 MachineInstr &I, MachineRegisterInfo &MRI) {
4040 unsigned NumElts = I.getNumOperands() - 1;
4041 Register SrcReg = I.getOperand(NumElts).getReg();
4042 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4043 const LLT SrcTy = MRI.getType(SrcReg);
4044
4045 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4046 if (SrcTy.getSizeInBits() > 128) {
4047 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4048 return false;
4049 }
4050
4051 // We implement a split vector operation by treating the sub-vectors as
4052 // scalars and extracting them.
4053 const RegisterBank &DstRB =
4054 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4055 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4056 Register Dst = I.getOperand(OpIdx).getReg();
4057 MachineInstr *Extract =
4058 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4059 if (!Extract)
4060 return false;
4061 }
4062 I.eraseFromParent();
4063 return true;
4064}
4065
4066bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4067 MachineRegisterInfo &MRI) {
4068 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4069 "unexpected opcode");
4070
4071 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4072 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4073 AArch64::FPRRegBankID ||
4074 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4075 AArch64::FPRRegBankID) {
4076 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4077 "currently unsupported.\n");
4078 return false;
4079 }
4080
4081 // The last operand is the vector source register, and every other operand is
4082 // a register to unpack into.
4083 unsigned NumElts = I.getNumOperands() - 1;
4084 Register SrcReg = I.getOperand(NumElts).getReg();
4085 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4086 const LLT WideTy = MRI.getType(SrcReg);
4087 (void)WideTy;
4088 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4089 "can only unmerge from vector or s128 types!");
4090 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4091 "source register size too small!");
4092
4093 if (!NarrowTy.isScalar())
4094 return selectSplitVectorUnmerge(I, MRI);
4095
4096 // Choose a lane copy opcode and subregister based off of the size of the
4097 // vector's elements.
4098 unsigned CopyOpc = 0;
4099 unsigned ExtractSubReg = 0;
4100 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4101 return false;
4102
4103 // Set up for the lane copies.
4104 MachineBasicBlock &MBB = *I.getParent();
4105
4106 // Stores the registers we'll be copying from.
4107 SmallVector<Register, 4> InsertRegs;
4108
4109 // We'll use the first register twice, so we only need NumElts-1 registers.
4110 unsigned NumInsertRegs = NumElts - 1;
4111
4112 // If our elements fit into exactly 128 bits, then we can copy from the source
4113 // directly. Otherwise, we need to do a bit of setup with some subregister
4114 // inserts.
4115 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4116 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4117 } else {
4118 // No. We have to perform subregister inserts. For each insert, create an
4119 // implicit def and a subregister insert, and save the register we create.
4120 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4121 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4122 *RBI.getRegBank(SrcReg, MRI, TRI));
4123 unsigned SubReg = 0;
4124 bool Found = getSubRegForClass(RC, TRI, SubReg);
4125 (void)Found;
4126 assert(Found && "expected to find last operand's subeg idx");
4127 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4128 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4129 MachineInstr &ImpDefMI =
4130 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4131 ImpDefReg);
4132
4133 // Now, create the subregister insert from SrcReg.
4134 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4135 MachineInstr &InsMI =
4136 *BuildMI(MBB, I, I.getDebugLoc(),
4137 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4138 .addUse(ImpDefReg)
4139 .addUse(SrcReg)
4140 .addImm(SubReg);
4141
4142 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4144
4145 // Save the register so that we can copy from it after.
4146 InsertRegs.push_back(InsertReg);
4147 }
4148 }
4149
4150 // Now that we've created any necessary subregister inserts, we can
4151 // create the copies.
4152 //
4153 // Perform the first copy separately as a subregister copy.
4154 Register CopyTo = I.getOperand(0).getReg();
4155 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4156 .addReg(InsertRegs[0], 0, ExtractSubReg);
4157 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4158
4159 // Now, perform the remaining copies as vector lane copies.
4160 unsigned LaneIdx = 1;
4161 for (Register InsReg : InsertRegs) {
4162 Register CopyTo = I.getOperand(LaneIdx).getReg();
4163 MachineInstr &CopyInst =
4164 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4165 .addUse(InsReg)
4166 .addImm(LaneIdx);
4167 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4168 ++LaneIdx;
4169 }
4170
4171 // Separately constrain the first copy's destination. Because of the
4172 // limitation in constrainOperandRegClass, we can't guarantee that this will
4173 // actually be constrained. So, do it ourselves using the second operand.
4174 const TargetRegisterClass *RC =
4175 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4176 if (!RC) {
4177 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4178 return false;
4179 }
4180
4181 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4182 I.eraseFromParent();
4183 return true;
4184}
4185
4186bool AArch64InstructionSelector::selectConcatVectors(
4187 MachineInstr &I, MachineRegisterInfo &MRI) {
4188 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4189 "Unexpected opcode");
4190 Register Dst = I.getOperand(0).getReg();
4191 Register Op1 = I.getOperand(1).getReg();
4192 Register Op2 = I.getOperand(2).getReg();
4193 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4194 if (!ConcatMI)
4195 return false;
4196 I.eraseFromParent();
4197 return true;
4198}
4199
4200unsigned
4201AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4202 MachineFunction &MF) const {
4203 Type *CPTy = CPVal->getType();
4204 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4205
4206 MachineConstantPool *MCP = MF.getConstantPool();
4207 return MCP->getConstantPoolIndex(CPVal, Alignment);
4208}
4209
4210MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4211 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4212 const TargetRegisterClass *RC;
4213 unsigned Opc;
4214 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4215 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4216 switch (Size) {
4217 case 16:
4218 RC = &AArch64::FPR128RegClass;
4219 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4220 break;
4221 case 8:
4222 RC = &AArch64::FPR64RegClass;
4223 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4224 break;
4225 case 4:
4226 RC = &AArch64::FPR32RegClass;
4227 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4228 break;
4229 case 2:
4230 RC = &AArch64::FPR16RegClass;
4231 Opc = AArch64::LDRHui;
4232 break;
4233 default:
4234 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4235 << *CPVal->getType());
4236 return nullptr;
4237 }
4238
4239 MachineInstr *LoadMI = nullptr;
4240 auto &MF = MIRBuilder.getMF();
4241 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4242 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4243 // Use load(literal) for tiny code model.
4244 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4245 } else {
4246 auto Adrp =
4247 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4248 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4249
4250 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4251 .addConstantPoolIndex(
4253
4255 }
4256
4257 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4258 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4260 Size, Align(Size)));
4262 return LoadMI;
4263}
4264
4265/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4266/// size and RB.
4267static std::pair<unsigned, unsigned>
4268getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4269 unsigned Opc, SubregIdx;
4270 if (RB.getID() == AArch64::GPRRegBankID) {
4271 if (EltSize == 8) {
4272 Opc = AArch64::INSvi8gpr;
4273 SubregIdx = AArch64::bsub;
4274 } else if (EltSize == 16) {
4275 Opc = AArch64::INSvi16gpr;
4276 SubregIdx = AArch64::ssub;
4277 } else if (EltSize == 32) {
4278 Opc = AArch64::INSvi32gpr;
4279 SubregIdx = AArch64::ssub;
4280 } else if (EltSize == 64) {
4281 Opc = AArch64::INSvi64gpr;
4282 SubregIdx = AArch64::dsub;
4283 } else {
4284 llvm_unreachable("invalid elt size!");
4285 }
4286 } else {
4287 if (EltSize == 8) {
4288 Opc = AArch64::INSvi8lane;
4289 SubregIdx = AArch64::bsub;
4290 } else if (EltSize == 16) {
4291 Opc = AArch64::INSvi16lane;
4292 SubregIdx = AArch64::hsub;
4293 } else if (EltSize == 32) {
4294 Opc = AArch64::INSvi32lane;
4295 SubregIdx = AArch64::ssub;
4296 } else if (EltSize == 64) {
4297 Opc = AArch64::INSvi64lane;
4298 SubregIdx = AArch64::dsub;
4299 } else {
4300 llvm_unreachable("invalid elt size!");
4301 }
4302 }
4303 return std::make_pair(Opc, SubregIdx);
4304}
4305
4306MachineInstr *AArch64InstructionSelector::emitInstr(
4307 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4308 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4309 const ComplexRendererFns &RenderFns) const {
4310 assert(Opcode && "Expected an opcode?");
4311 assert(!isPreISelGenericOpcode(Opcode) &&
4312 "Function should only be used to produce selected instructions!");
4313 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4314 if (RenderFns)
4315 for (auto &Fn : *RenderFns)
4316 Fn(MI);
4318 return &*MI;
4319}
4320
4321MachineInstr *AArch64InstructionSelector::emitAddSub(
4322 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4323 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4324 MachineIRBuilder &MIRBuilder) const {
4325 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4326 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4327 auto Ty = MRI.getType(LHS.getReg());
4328 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4329 unsigned Size = Ty.getSizeInBits();
4330 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4331 bool Is32Bit = Size == 32;
4332
4333 // INSTRri form with positive arithmetic immediate.
4334 if (auto Fns = selectArithImmed(RHS))
4335 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4336 MIRBuilder, Fns);
4337
4338 // INSTRri form with negative arithmetic immediate.
4339 if (auto Fns = selectNegArithImmed(RHS))
4340 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4341 MIRBuilder, Fns);
4342
4343 // INSTRrx form.
4344 if (auto Fns = selectArithExtendedRegister(RHS))
4345 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4346 MIRBuilder, Fns);
4347
4348 // INSTRrs form.
4349 if (auto Fns = selectShiftedRegister(RHS))
4350 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4351 MIRBuilder, Fns);
4352 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4353 MIRBuilder);
4354}
4355
4356MachineInstr *
4357AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4358 MachineOperand &RHS,
4359 MachineIRBuilder &MIRBuilder) const {
4360 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4361 {{AArch64::ADDXri, AArch64::ADDWri},
4362 {AArch64::ADDXrs, AArch64::ADDWrs},
4363 {AArch64::ADDXrr, AArch64::ADDWrr},
4364 {AArch64::SUBXri, AArch64::SUBWri},
4365 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4366 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4367}
4368
4369MachineInstr *
4370AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4371 MachineOperand &RHS,
4372 MachineIRBuilder &MIRBuilder) const {
4373 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4374 {{AArch64::ADDSXri, AArch64::ADDSWri},
4375 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4376 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4377 {AArch64::SUBSXri, AArch64::SUBSWri},
4378 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4379 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4380}
4381
4382MachineInstr *
4383AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4384 MachineOperand &RHS,
4385 MachineIRBuilder &MIRBuilder) const {
4386 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4387 {{AArch64::SUBSXri, AArch64::SUBSWri},
4388 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4389 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4390 {AArch64::ADDSXri, AArch64::ADDSWri},
4391 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4392 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4393}
4394
4395MachineInstr *
4396AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4397 MachineOperand &RHS,
4398 MachineIRBuilder &MIRBuilder) const {
4399 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4400 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4401 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4402 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4403 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4404}
4405
4406MachineInstr *
4407AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4408 MachineOperand &RHS,
4409 MachineIRBuilder &MIRBuilder) const {
4410 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4411 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4412 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4413 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4414 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4415}
4416
4417MachineInstr *
4418AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4419 MachineIRBuilder &MIRBuilder) const {
4420 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4421 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
4422 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4423 return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4424}
4425
4426MachineInstr *
4427AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4428 MachineIRBuilder &MIRBuilder) const {
4429 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4430 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4431 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4432 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4433}
4434
4435MachineInstr *
4436AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4437 MachineIRBuilder &MIRBuilder) const {
4438 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4439 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4440 LLT Ty = MRI.getType(LHS.getReg());
4441 unsigned RegSize = Ty.getSizeInBits();
4442 bool Is32Bit = (RegSize == 32);
4443 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4444 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4445 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4446 // ANDS needs a logical immediate for its immediate form. Check if we can
4447 // fold one in.
4448 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4449 int64_t Imm = ValAndVReg->Value.getSExtValue();
4450
4452 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4455 return &*TstMI;
4456 }
4457 }
4458
4459 if (auto Fns = selectLogicalShiftedRegister(RHS))
4460 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4461 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4462}
4463
4464MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4465 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4466 MachineIRBuilder &MIRBuilder) const {
4467 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4468 assert(Predicate.isPredicate() && "Expected predicate?");
4469 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4470 LLT CmpTy = MRI.getType(LHS.getReg());
4471 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4472 unsigned Size = CmpTy.getSizeInBits();
4473 (void)Size;
4474 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4475 // Fold the compare into a cmn or tst if possible.
4476 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4477 return FoldCmp;
4478 return emitCMP(LHS, RHS, MIRBuilder);
4479}
4480
4481MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4482 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4483 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4484#ifndef NDEBUG
4485 LLT Ty = MRI.getType(Dst);
4486 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4487 "Expected a 32-bit scalar register?");
4488#endif
4489 const Register ZReg = AArch64::WZR;
4490 AArch64CC::CondCode CC1, CC2;
4491 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4492 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4493 if (CC2 == AArch64CC::AL)
4494 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4495 MIRBuilder);
4496 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4497 Register Def1Reg = MRI.createVirtualRegister(RC);
4498 Register Def2Reg = MRI.createVirtualRegister(RC);
4499 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4500 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4501 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4502 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4504 return &*OrMI;
4505}
4506
4507MachineInstr *AArch64InstructionSelector::emitFPCompare(
4508 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4509 std::optional<CmpInst::Predicate> Pred) const {
4510 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4511 LLT Ty = MRI.getType(LHS);
4512 if (Ty.isVector())
4513 return nullptr;
4514 unsigned OpSize = Ty.getSizeInBits();
4515 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4516
4517 // If this is a compare against +0.0, then we don't have
4518 // to explicitly materialize a constant.
4519 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4520 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4521
4522 auto IsEqualityPred = [](CmpInst::Predicate P) {
4523 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4525 };
4526 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4527 // Try commutating the operands.
4528 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4529 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4530 ShouldUseImm = true;
4531 std::swap(LHS, RHS);
4532 }
4533 }
4534 unsigned CmpOpcTbl[2][3] = {
4535 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4536 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4537 unsigned CmpOpc =
4538 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4539
4540 // Partially build the compare. Decide if we need to add a use for the
4541 // third operand based off whether or not we're comparing against 0.0.
4542 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4544 if (!ShouldUseImm)
4545 CmpMI.addUse(RHS);
4547 return &*CmpMI;
4548}
4549
4550MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4551 std::optional<Register> Dst, Register Op1, Register Op2,
4552 MachineIRBuilder &MIRBuilder) const {
4553 // We implement a vector concat by:
4554 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4555 // 2. Insert the upper vector into the destination's upper element
4556 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4557 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4558
4559 const LLT Op1Ty = MRI.getType(Op1);
4560 const LLT Op2Ty = MRI.getType(Op2);
4561
4562 if (Op1Ty != Op2Ty) {
4563 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4564 return nullptr;
4565 }
4566 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4567
4568 if (Op1Ty.getSizeInBits() >= 128) {
4569 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4570 return nullptr;
4571 }
4572
4573 // At the moment we just support 64 bit vector concats.
4574 if (Op1Ty.getSizeInBits() != 64) {
4575 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4576 return nullptr;
4577 }
4578
4579 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4580 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4581 const TargetRegisterClass *DstRC =
4582 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4583
4584 MachineInstr *WidenedOp1 =
4585 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4586 MachineInstr *WidenedOp2 =
4587 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4588 if (!WidenedOp1 || !WidenedOp2) {
4589 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4590 return nullptr;
4591 }
4592
4593 // Now do the insert of the upper element.
4594 unsigned InsertOpc, InsSubRegIdx;
4595 std::tie(InsertOpc, InsSubRegIdx) =
4596 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4597
4598 if (!Dst)
4599 Dst = MRI.createVirtualRegister(DstRC);
4600 auto InsElt =
4601 MIRBuilder
4602 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4603 .addImm(1) /* Lane index */
4604 .addUse(WidenedOp2->getOperand(0).getReg())
4605 .addImm(0);
4607 return &*InsElt;
4608}
4609
4610MachineInstr *
4611AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4612 Register Src2, AArch64CC::CondCode Pred,
4613 MachineIRBuilder &MIRBuilder) const {
4614 auto &MRI = *MIRBuilder.getMRI();
4615 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4616 // If we used a register class, then this won't necessarily have an LLT.
4617 // Compute the size based off whether or not we have a class or bank.
4618 unsigned Size;
4619 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4620 Size = TRI.getRegSizeInBits(*RC);
4621 else
4622 Size = MRI.getType(Dst).getSizeInBits();
4623 // Some opcodes use s1.
4624 assert(Size <= 64 && "Expected 64 bits or less only!");
4625 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4626 unsigned Opc = OpcTable[Size == 64];
4627 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4629 return &*CSINC;
4630}
4631
4632MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4633 Register CarryReg) {
4634 MachineRegisterInfo *MRI = MIB.getMRI();
4635 unsigned Opcode = I.getOpcode();
4636
4637 // If the instruction is a SUB, we need to negate the carry,
4638 // because borrowing is indicated by carry-flag == 0.
4639 bool NeedsNegatedCarry =
4640 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4641
4642 // If the previous instruction will already produce the correct carry, do not
4643 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4644 // generated during legalization of wide add/sub. This optimization depends on
4645 // these sequences not being interrupted by other instructions.
4646 // We have to select the previous instruction before the carry-using
4647 // instruction is deleted by the calling function, otherwise the previous
4648 // instruction might become dead and would get deleted.
4649 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4650 if (SrcMI == I.getPrevNode()) {
4651 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4652 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4653 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4654 CarrySrcMI->isUnsigned() &&
4655 CarrySrcMI->getCarryOutReg() == CarryReg &&
4656 selectAndRestoreState(*SrcMI))
4657 return nullptr;
4658 }
4659 }
4660
4661 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4662
4663 if (NeedsNegatedCarry) {
4664 // (0 - Carry) sets !C in NZCV when Carry == 1
4665 Register ZReg = AArch64::WZR;
4666 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4667 }
4668
4669 // (Carry - 1) sets !C in NZCV when Carry == 0
4670 auto Fns = select12BitValueWithLeftShift(1);
4671 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4672}
4673
4674bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4675 MachineRegisterInfo &MRI) {
4676 auto &CarryMI = cast<GAddSubCarryOut>(I);
4677
4678 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4679 // Set NZCV carry according to carry-in VReg
4680 emitCarryIn(I, CarryInMI->getCarryInReg());
4681 }
4682
4683 // Emit the operation and get the correct condition code.
4684 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4685 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4686
4687 Register CarryOutReg = CarryMI.getCarryOutReg();
4688
4689 // Don't convert carry-out to VReg if it is never used
4690 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4691 // Now, put the overflow result in the register given by the first operand
4692 // to the overflow op. CSINC increments the result when the predicate is
4693 // false, so to get the increment when it's true, we need to use the
4694 // inverse. In this case, we want to increment when carry is set.
4695 Register ZReg = AArch64::WZR;
4696 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4697 getInvertedCondCode(OpAndCC.second), MIB);
4698 }
4699
4700 I.eraseFromParent();
4701 return true;
4702}
4703
4704std::pair<MachineInstr *, AArch64CC::CondCode>
4705AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4706 MachineOperand &LHS,
4707 MachineOperand &RHS,
4708 MachineIRBuilder &MIRBuilder) const {
4709 switch (Opcode) {
4710 default:
4711 llvm_unreachable("Unexpected opcode!");
4712 case TargetOpcode::G_SADDO:
4713 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4714 case TargetOpcode::G_UADDO:
4715 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4716 case TargetOpcode::G_SSUBO:
4717 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4718 case TargetOpcode::G_USUBO:
4719 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4720 case TargetOpcode::G_SADDE:
4721 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4722 case TargetOpcode::G_UADDE:
4723 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4724 case TargetOpcode::G_SSUBE:
4725 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4726 case TargetOpcode::G_USUBE:
4727 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4728 }
4729}
4730
4731/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4732/// expressed as a conjunction.
4733/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4734/// changing the conditions on the CMP tests.
4735/// (this means we can call emitConjunctionRec() with
4736/// Negate==true on this sub-tree)
4737/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4738/// cannot do the negation naturally. We are required to
4739/// emit the subtree first in this case.
4740/// \param WillNegate Is true if are called when the result of this
4741/// subexpression must be negated. This happens when the
4742/// outer expression is an OR. We can use this fact to know
4743/// that we have a double negation (or (or ...) ...) that
4744/// can be implemented for free.
4745static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4746 bool WillNegate, MachineRegisterInfo &MRI,
4747 unsigned Depth = 0) {
4748 if (!MRI.hasOneNonDBGUse(Val))
4749 return false;
4750 MachineInstr *ValDef = MRI.getVRegDef(Val);
4751 unsigned Opcode = ValDef->getOpcode();
4752 if (isa<GAnyCmp>(ValDef)) {
4753 CanNegate = true;
4754 MustBeFirst = false;
4755 return true;
4756 }
4757 // Protect against exponential runtime and stack overflow.
4758 if (Depth > 6)
4759 return false;
4760 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4761 bool IsOR = Opcode == TargetOpcode::G_OR;
4762 Register O0 = ValDef->getOperand(1).getReg();
4763 Register O1 = ValDef->getOperand(2).getReg();
4764 bool CanNegateL;
4765 bool MustBeFirstL;
4766 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4767 return false;
4768 bool CanNegateR;
4769 bool MustBeFirstR;
4770 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4771 return false;
4772
4773 if (MustBeFirstL && MustBeFirstR)
4774 return false;
4775
4776 if (IsOR) {
4777 // For an OR expression we need to be able to naturally negate at least
4778 // one side or we cannot do the transformation at all.
4779 if (!CanNegateL && !CanNegateR)
4780 return false;
4781 // If we the result of the OR will be negated and we can naturally negate
4782 // the leaves, then this sub-tree as a whole negates naturally.
4783 CanNegate = WillNegate && CanNegateL && CanNegateR;
4784 // If we cannot naturally negate the whole sub-tree, then this must be
4785 // emitted first.
4786 MustBeFirst = !CanNegate;
4787 } else {
4788 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4789 // We cannot naturally negate an AND operation.
4790 CanNegate = false;
4791 MustBeFirst = MustBeFirstL || MustBeFirstR;
4792 }
4793 return true;
4794 }
4795 return false;
4796}
4797
4798MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4801 MachineIRBuilder &MIB) const {
4802 auto &MRI = *MIB.getMRI();
4803 LLT OpTy = MRI.getType(LHS);
4804 unsigned CCmpOpc;
4805 std::optional<ValueAndVReg> C;
4806 if (CmpInst::isIntPredicate(CC)) {
4807 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4809 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4810 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4811 else if (C->Value.ule(31))
4812 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4813 else
4814 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4815 } else {
4816 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4817 OpTy.getSizeInBits() == 64);
4818 switch (OpTy.getSizeInBits()) {
4819 case 16:
4820 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4821 CCmpOpc = AArch64::FCCMPHrr;
4822 break;
4823 case 32:
4824 CCmpOpc = AArch64::FCCMPSrr;
4825 break;
4826 case 64:
4827 CCmpOpc = AArch64::FCCMPDrr;
4828 break;
4829 default:
4830 return nullptr;
4831 }
4832 }
4834 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4835 auto CCmp =
4836 MIB.buildInstr(CCmpOpc, {}, {LHS});
4837 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4838 CCmp.addImm(C->Value.getZExtValue());
4839 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4840 CCmp.addImm(C->Value.abs().getZExtValue());
4841 else
4842 CCmp.addReg(RHS);
4843 CCmp.addImm(NZCV).addImm(Predicate);
4845 return &*CCmp;
4846}
4847
4848MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4849 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4850 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4851 // We're at a tree leaf, produce a conditional comparison operation.
4852 auto &MRI = *MIB.getMRI();
4853 MachineInstr *ValDef = MRI.getVRegDef(Val);
4854 unsigned Opcode = ValDef->getOpcode();
4855 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4856 Register LHS = Cmp->getLHSReg();
4857 Register RHS = Cmp->getRHSReg();
4858 CmpInst::Predicate CC = Cmp->getCond();
4859 if (Negate)
4861 if (isa<GICmp>(Cmp)) {
4862 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4863 } else {
4864 // Handle special FP cases.
4865 AArch64CC::CondCode ExtraCC;
4866 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4867 // Some floating point conditions can't be tested with a single condition
4868 // code. Construct an additional comparison in this case.
4869 if (ExtraCC != AArch64CC::AL) {
4870 MachineInstr *ExtraCmp;
4871 if (!CCOp)
4872 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4873 else
4874 ExtraCmp =
4875 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4876 CCOp = ExtraCmp->getOperand(0).getReg();
4877 Predicate = ExtraCC;
4878 }
4879 }
4880
4881 // Produce a normal comparison if we are first in the chain
4882 if (!CCOp) {
4883 if (isa<GICmp>(Cmp))
4884 return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4885 return emitFPCompare(Cmp->getOperand(2).getReg(),
4886 Cmp->getOperand(3).getReg(), MIB);
4887 }
4888 // Otherwise produce a ccmp.
4889 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4890 }
4891 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4892
4893 bool IsOR = Opcode == TargetOpcode::G_OR;
4894
4895 Register LHS = ValDef->getOperand(1).getReg();
4896 bool CanNegateL;
4897 bool MustBeFirstL;
4898 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4899 assert(ValidL && "Valid conjunction/disjunction tree");
4900 (void)ValidL;
4901
4902 Register RHS = ValDef->getOperand(2).getReg();
4903 bool CanNegateR;
4904 bool MustBeFirstR;
4905 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4906 assert(ValidR && "Valid conjunction/disjunction tree");
4907 (void)ValidR;
4908
4909 // Swap sub-tree that must come first to the right side.
4910 if (MustBeFirstL) {
4911 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4912 std::swap(LHS, RHS);
4913 std::swap(CanNegateL, CanNegateR);
4914 std::swap(MustBeFirstL, MustBeFirstR);
4915 }
4916
4917 bool NegateR;
4918 bool NegateAfterR;
4919 bool NegateL;
4920 bool NegateAfterAll;
4921 if (Opcode == TargetOpcode::G_OR) {
4922 // Swap the sub-tree that we can negate naturally to the left.
4923 if (!CanNegateL) {
4924 assert(CanNegateR && "at least one side must be negatable");
4925 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4926 assert(!Negate);
4927 std::swap(LHS, RHS);
4928 NegateR = false;
4929 NegateAfterR = true;
4930 } else {
4931 // Negate the left sub-tree if possible, otherwise negate the result.
4932 NegateR = CanNegateR;
4933 NegateAfterR = !CanNegateR;
4934 }
4935 NegateL = true;
4936 NegateAfterAll = !Negate;
4937 } else {
4938 assert(Opcode == TargetOpcode::G_AND &&
4939 "Valid conjunction/disjunction tree");
4940 assert(!Negate && "Valid conjunction/disjunction tree");
4941
4942 NegateL = false;
4943 NegateR = false;
4944 NegateAfterR = false;
4945 NegateAfterAll = false;
4946 }
4947
4948 // Emit sub-trees.
4949 AArch64CC::CondCode RHSCC;
4950 MachineInstr *CmpR =
4951 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4952 if (NegateAfterR)
4953 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4954 MachineInstr *CmpL = emitConjunctionRec(
4955 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4956 if (NegateAfterAll)
4957 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4958 return CmpL;
4959}
4960
4961MachineInstr *AArch64InstructionSelector::emitConjunction(
4962 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4963 bool DummyCanNegate;
4964 bool DummyMustBeFirst;
4965 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4966 *MIB.getMRI()))
4967 return nullptr;
4968 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4969}
4970
4971bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4972 MachineInstr &CondMI) {
4973 AArch64CC::CondCode AArch64CC;
4974 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4975 if (!ConjMI)
4976 return false;
4977
4978 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4979 SelI.eraseFromParent();
4980 return true;
4981}
4982
4983bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4984 MachineRegisterInfo &MRI = *MIB.getMRI();
4985 // We want to recognize this pattern:
4986 //
4987 // $z = G_FCMP pred, $x, $y
4988 // ...
4989 // $w = G_SELECT $z, $a, $b
4990 //
4991 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4992 // some copies/truncs in between.)
4993 //
4994 // If we see this, then we can emit something like this:
4995 //
4996 // fcmp $x, $y
4997 // fcsel $w, $a, $b, pred
4998 //
4999 // Rather than emitting both of the rather long sequences in the standard
5000 // G_FCMP/G_SELECT select methods.
5001
5002 // First, check if the condition is defined by a compare.
5003 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5004
5005 // We can only fold if all of the defs have one use.
5006 Register CondDefReg = CondDef->getOperand(0).getReg();
5007 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5008 // Unless it's another select.
5009 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5010 if (CondDef == &UI)
5011 continue;
5012 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5013 return false;
5014 }
5015 }
5016
5017 // Is the condition defined by a compare?
5018 unsigned CondOpc = CondDef->getOpcode();
5019 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5020 if (tryOptSelectConjunction(I, *CondDef))
5021 return true;
5022 return false;
5023 }
5024
5026 if (CondOpc == TargetOpcode::G_ICMP) {
5027 auto &PredOp = CondDef->getOperand(1);
5028 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5029 MIB);
5030 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5031 CondCode =
5032 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5033 } else {
5034 // Get the condition code for the select.
5035 auto Pred =
5036 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5037 AArch64CC::CondCode CondCode2;
5038 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5039
5040 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5041 // instructions to emit the comparison.
5042 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5043 // unnecessary.
5044 if (CondCode2 != AArch64CC::AL)
5045 return false;
5046
5047 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5048 CondDef->getOperand(3).getReg(), MIB)) {
5049 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5050 return false;
5051 }
5052 }
5053
5054 // Emit the select.
5055 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5056 I.getOperand(3).getReg(), CondCode, MIB);
5057 I.eraseFromParent();
5058 return true;
5059}
5060
5061MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5062 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5063 MachineIRBuilder &MIRBuilder) const {
5064 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5065 "Unexpected MachineOperand");
5066 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5067 // We want to find this sort of thing:
5068 // x = G_SUB 0, y
5069 // G_ICMP z, x
5070 //
5071 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5072 // e.g:
5073 //
5074 // cmn z, y
5075
5076 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5077 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5078 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5079 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5080
5081 // Given this:
5082 //
5083 // x = G_SUB 0, y
5084 // G_ICMP z, x
5085 //
5086 // Produce this:
5087 //
5088 // cmn z, y
5089 if (isCMN(RHSDef, P, MRI))
5090 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5091
5092 // Same idea here, but with the LHS of the compare instead:
5093 //
5094 // Given this:
5095 //
5096 // x = G_SUB 0, y
5097 // G_ICMP x, z
5098 //
5099 // Produce this:
5100 //
5101 // cmn y, z
5102 //
5103 // But be careful! We need to swap the predicate!
5104 if (isCMN(LHSDef, P, MRI)) {
5105 if (!CmpInst::isEquality(P)) {
5108 }
5109 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5110 }
5111
5112 // Given this:
5113 //
5114 // z = G_AND x, y
5115 // G_ICMP z, 0
5116 //
5117 // Produce this if the compare is signed:
5118 //
5119 // tst x, y
5120 if (!CmpInst::isUnsigned(P) && LHSDef &&
5121 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5122 // Make sure that the RHS is 0.
5123 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5124 if (!ValAndVReg || ValAndVReg->Value != 0)
5125 return nullptr;
5126
5127 return emitTST(LHSDef->getOperand(1),
5128 LHSDef->getOperand(2), MIRBuilder);
5129 }
5130
5131 return nullptr;
5132}
5133
5134bool AArch64InstructionSelector::selectShuffleVector(
5135 MachineInstr &I, MachineRegisterInfo &MRI) {
5136 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5137 Register Src1Reg = I.getOperand(1).getReg();
5138 Register Src2Reg = I.getOperand(2).getReg();
5139 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5140
5141 MachineBasicBlock &MBB = *I.getParent();
5142 MachineFunction &MF = *MBB.getParent();
5143 LLVMContext &Ctx = MF.getFunction().getContext();
5144
5145 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5146
5148 for (int Val : Mask) {
5149 // For now, any undef indexes we'll just assume to be 0. This should be
5150 // optimized in future, e.g. to select DUP etc.
5151 Val = Val < 0 ? 0 : Val;
5152 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5153 unsigned Offset = Byte + Val * BytesPerElt;
5154 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5155 }
5156 }
5157
5158 // Use a constant pool to load the index vector for TBL.
5159 Constant *CPVal = ConstantVector::get(CstIdxs);
5160 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5161 if (!IndexLoad) {
5162 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5163 return false;
5164 }
5165
5166 if (DstTy.getSizeInBits() != 128) {
5167 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5168 // This case can be done with TBL1.
5169 MachineInstr *Concat =
5170 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5171 if (!Concat) {
5172 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5173 return false;
5174 }
5175
5176 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5177 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5178 IndexLoad->getOperand(0).getReg(), MIB);
5179
5180 auto TBL1 = MIB.buildInstr(
5181 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5182 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5184
5185 auto Copy =
5186 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5187 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5188 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5189 I.eraseFromParent();
5190 return true;
5191 }
5192
5193 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5194 // Q registers for regalloc.
5195 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5196 auto RegSeq = createQTuple(Regs, MIB);
5197 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5198 {RegSeq, IndexLoad->getOperand(0)});
5200 I.eraseFromParent();
5201 return true;
5202}
5203
5204MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5205 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5206 unsigned LaneIdx, const RegisterBank &RB,
5207 MachineIRBuilder &MIRBuilder) const {
5208 MachineInstr *InsElt = nullptr;
5209 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5210 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5211
5212 // Create a register to define with the insert if one wasn't passed in.
5213 if (!DstReg)
5214 DstReg = MRI.createVirtualRegister(DstRC);
5215
5216 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5217 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5218
5219 if (RB.getID() == AArch64::FPRRegBankID) {
5220 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5221 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5222 .addImm(LaneIdx)
5223 .addUse(InsSub->getOperand(0).getReg())
5224 .addImm(0);
5225 } else {
5226 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5227 .addImm(LaneIdx)
5228 .addUse(EltReg);
5229 }
5230
5232 return InsElt;
5233}
5234
5235bool AArch64InstructionSelector::selectUSMovFromExtend(
5236 MachineInstr &MI, MachineRegisterInfo &MRI) {
5237 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5238 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5239 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5240 return false;
5241 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5242 const Register DefReg = MI.getOperand(0).getReg();
5243 const LLT DstTy = MRI.getType(DefReg);
5244 unsigned DstSize = DstTy.getSizeInBits();
5245
5246 if (DstSize != 32 && DstSize != 64)
5247 return false;
5248
5249 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5250 MI.getOperand(1).getReg(), MRI);
5251 int64_t Lane;
5252 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5253 return false;
5254 Register Src0 = Extract->getOperand(1).getReg();
5255
5256 const LLT VecTy = MRI.getType(Src0);
5257 if (VecTy.isScalableVector())
5258 return false;
5259
5260 if (VecTy.getSizeInBits() != 128) {
5261 const MachineInstr *ScalarToVector = emitScalarToVector(
5262 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5263 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5264 Src0 = ScalarToVector->getOperand(0).getReg();
5265 }
5266
5267 unsigned Opcode;
5268 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5269 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5270 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5271 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5272 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5273 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5274 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5275 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5276 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5277 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5278 else
5279 llvm_unreachable("Unexpected type combo for S/UMov!");
5280
5281 // We may need to generate one of these, depending on the type and sign of the
5282 // input:
5283 // DstReg = SMOV Src0, Lane;
5284 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5285 MachineInstr *ExtI = nullptr;
5286 if (DstSize == 64 && !IsSigned) {
5287 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5288 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5289 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5290 .addImm(0)
5291 .addUse(NewReg)
5292 .addImm(AArch64::sub_32);
5293 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5294 } else
5295 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5296
5298 MI.eraseFromParent();
5299 return true;
5300}
5301
5302MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5303 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5304 unsigned int Op;
5305 if (DstSize == 128) {
5306 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5307 return nullptr;
5308 Op = AArch64::MOVIv16b_ns;
5309 } else {
5310 Op = AArch64::MOVIv8b_ns;
5311 }
5312
5313 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5314
5317 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5319 return &*Mov;
5320 }
5321 return nullptr;
5322}
5323
5324MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5325 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5326 bool Inv) {
5327
5328 unsigned int Op;
5329 if (DstSize == 128) {
5330 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5331 return nullptr;
5332 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5333 } else {
5334 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5335 }
5336
5337 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5338 uint64_t Shift;
5339
5342 Shift = 0;
5343 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5345 Shift = 8;
5346 } else
5347 return nullptr;
5348
5349 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5351 return &*Mov;
5352}
5353
5354MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5355 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5356 bool Inv) {
5357
5358 unsigned int Op;
5359 if (DstSize == 128) {
5360 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5361 return nullptr;
5362 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5363 } else {
5364 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5365 }
5366
5367 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5368 uint64_t Shift;
5369
5372 Shift = 0;
5373 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5375 Shift = 8;
5376 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5378 Shift = 16;
5379 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5381 Shift = 24;
5382 } else
5383 return nullptr;
5384
5385 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5387 return &*Mov;
5388}
5389
5390MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5391 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5392
5393 unsigned int Op;
5394 if (DstSize == 128) {
5395 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5396 return nullptr;
5397 Op = AArch64::MOVIv2d_ns;
5398 } else {
5399 Op = AArch64::MOVID;
5400 }
5401
5402 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5405 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5407 return &*Mov;
5408 }
5409 return nullptr;
5410}
5411
5412MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5413 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5414 bool Inv) {
5415
5416 unsigned int Op;
5417 if (DstSize == 128) {
5418 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5419 return nullptr;
5420 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5421 } else {
5422 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5423 }
5424
5425 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5426 uint64_t Shift;
5427
5430 Shift = 264;
5431 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5433 Shift = 272;
5434 } else
5435 return nullptr;
5436
5437 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5439 return &*Mov;
5440}
5441
5442MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5443 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5444
5445 unsigned int Op;
5446 bool IsWide = false;
5447 if (DstSize == 128) {
5448 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5449 return nullptr;
5450 Op = AArch64::FMOVv4f32_ns;
5451 IsWide = true;
5452 } else {
5453 Op = AArch64::FMOVv2f32_ns;
5454 }
5455
5456 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5457
5460 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5462 Op = AArch64::FMOVv2f64_ns;
5463 } else
5464 return nullptr;
5465
5466 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5468 return &*Mov;
5469}
5470
5471bool AArch64InstructionSelector::selectIndexedExtLoad(
5472 MachineInstr &MI, MachineRegisterInfo &MRI) {
5473 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5474 Register Dst = ExtLd.getDstReg();
5475 Register WriteBack = ExtLd.getWritebackReg();
5476 Register Base = ExtLd.getBaseReg();
5477 Register Offset = ExtLd.getOffsetReg();
5478 LLT Ty = MRI.getType(Dst);
5479 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5480 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5481 bool IsPre = ExtLd.isPre();
5482 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5483 unsigned InsertIntoSubReg = 0;
5484 bool IsDst64 = Ty.getSizeInBits() == 64;
5485
5486 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5487 // long as they are scalar.
5488 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5489 if ((IsSExt && IsFPR) || Ty.isVector())
5490 return false;
5491
5492 unsigned Opc = 0;
5493 LLT NewLdDstTy;
5494 LLT s32 = LLT::scalar(32);
5495 LLT s64 = LLT::scalar(64);
5496
5497 if (MemSizeBits == 8) {
5498 if (IsSExt) {
5499 if (IsDst64)
5500 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5501 else
5502 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5503 NewLdDstTy = IsDst64 ? s64 : s32;
5504 } else if (IsFPR) {
5505 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5506 InsertIntoSubReg = AArch64::bsub;
5507 NewLdDstTy = LLT::scalar(MemSizeBits);
5508 } else {
5509 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5510 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5511 NewLdDstTy = s32;
5512 }
5513 } else if (MemSizeBits == 16) {
5514 if (IsSExt) {
5515 if (IsDst64)
5516 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5517 else
5518 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5519 NewLdDstTy = IsDst64 ? s64 : s32;
5520 } else if (IsFPR) {
5521 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5522 InsertIntoSubReg = AArch64::hsub;
5523 NewLdDstTy = LLT::scalar(MemSizeBits);
5524 } else {
5525 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5526 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5527 NewLdDstTy = s32;
5528 }
5529 } else if (MemSizeBits == 32) {
5530 if (IsSExt) {
5531 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5532 NewLdDstTy = s64;
5533 } else if (IsFPR) {
5534 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5535 InsertIntoSubReg = AArch64::ssub;
5536 NewLdDstTy = LLT::scalar(MemSizeBits);
5537 } else {
5538 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5539 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5540 NewLdDstTy = s32;
5541 }
5542 } else {
5543 llvm_unreachable("Unexpected size for indexed load");
5544 }
5545
5546 auto Cst = getIConstantVRegVal(Offset, MRI);
5547 if (!Cst)
5548 return false; // Shouldn't happen, but just in case.
5549
5550 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5551 .addImm(Cst->getSExtValue());
5552 LdMI.cloneMemRefs(ExtLd);
5554 // Make sure to select the load with the MemTy as the dest type, and then
5555 // insert into a larger reg if needed.
5556 if (InsertIntoSubReg) {
5557 // Generate a SUBREG_TO_REG.
5558 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5559 .addImm(0)
5560 .addUse(LdMI.getReg(1))
5561 .addImm(InsertIntoSubReg);
5563 SubToReg.getReg(0),
5564 *getRegClassForTypeOnBank(MRI.getType(Dst),
5565 *RBI.getRegBank(Dst, MRI, TRI)),
5566 MRI);
5567 } else {
5568 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5569 selectCopy(*Copy, TII, MRI, TRI, RBI);
5570 }
5571 MI.eraseFromParent();
5572
5573 return true;
5574}
5575
5576bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5577 MachineRegisterInfo &MRI) {
5578 auto &Ld = cast<GIndexedLoad>(MI);
5579 Register Dst = Ld.getDstReg();
5580 Register WriteBack = Ld.getWritebackReg();
5581 Register Base = Ld.getBaseReg();
5582 Register Offset = Ld.getOffsetReg();
5583 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5584 "Unexpected type for indexed load");
5585 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5586
5587 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5588 return selectIndexedExtLoad(MI, MRI);
5589
5590 unsigned Opc = 0;
5591 if (Ld.isPre()) {
5592 static constexpr unsigned GPROpcodes[] = {
5593 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5594 AArch64::LDRXpre};
5595 static constexpr unsigned FPROpcodes[] = {
5596 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5597 AArch64::LDRQpre};
5598 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5599 Opc = FPROpcodes[Log2_32(MemSize)];
5600 else
5601 Opc = GPROpcodes[Log2_32(MemSize)];
5602 } else {
5603 static constexpr unsigned GPROpcodes[] = {
5604 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5605 AArch64::LDRXpost};
5606 static constexpr unsigned FPROpcodes[] = {
5607 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5608 AArch64::LDRDpost, AArch64::LDRQpost};
5609 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5610 Opc = FPROpcodes[Log2_32(MemSize)];
5611 else
5612 Opc = GPROpcodes[Log2_32(MemSize)];
5613 }
5614 auto Cst = getIConstantVRegVal(Offset, MRI);
5615 if (!Cst)
5616 return false; // Shouldn't happen, but just in case.
5617 auto LdMI =
5618 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5619 LdMI.cloneMemRefs(Ld);
5621 MI.eraseFromParent();
5622 return true;
5623}
5624
5625bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5626 MachineRegisterInfo &MRI) {
5627 Register Dst = I.getWritebackReg();
5628 Register Val = I.getValueReg();
5629 Register Base = I.getBaseReg();
5630 Register Offset = I.getOffsetReg();
5631 LLT ValTy = MRI.getType(Val);
5632 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5633
5634 unsigned Opc = 0;
5635 if (I.isPre()) {
5636 static constexpr unsigned GPROpcodes[] = {
5637 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5638 AArch64::STRXpre};
5639 static constexpr unsigned FPROpcodes[] = {
5640 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5641 AArch64::STRQpre};
5642
5643 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5644 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5645 else
5646 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5647 } else {
5648 static constexpr unsigned GPROpcodes[] = {
5649 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5650 AArch64::STRXpost};
5651 static constexpr unsigned FPROpcodes[] = {
5652 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5653 AArch64::STRDpost, AArch64::STRQpost};
5654
5655 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5656 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5657 else
5658 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5659 }
5660
5661 auto Cst = getIConstantVRegVal(Offset, MRI);
5662 if (!Cst)
5663 return false; // Shouldn't happen, but just in case.
5664 auto Str =
5665 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5666 Str.cloneMemRefs(I);
5668 I.eraseFromParent();
5669 return true;
5670}
5671
5672MachineInstr *
5673AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5674 MachineIRBuilder &MIRBuilder,
5675 MachineRegisterInfo &MRI) {
5676 LLT DstTy = MRI.getType(Dst);
5677 unsigned DstSize = DstTy.getSizeInBits();
5678 assert((DstSize == 64 || DstSize == 128) &&
5679 "Unexpected vector constant size");
5680
5681 if (CV->isNullValue()) {
5682 if (DstSize == 128) {
5683 auto Mov =
5684 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5686 return &*Mov;
5687 }
5688
5689 if (DstSize == 64) {
5690 auto Mov =
5691 MIRBuilder
5692 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5693 .addImm(0);
5694 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5695 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5696 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5697 return &*Copy;
5698 }
5699 }
5700
5701 if (Constant *SplatValue = CV->getSplatValue()) {
5702 APInt SplatValueAsInt =
5703 isa<ConstantFP>(SplatValue)
5704 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5705 : SplatValue->getUniqueInteger();
5706 APInt DefBits = APInt::getSplat(
5707 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5708 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5709 MachineInstr *NewOp;
5710 bool Inv = false;
5711 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5712 (NewOp =
5713 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp =
5717 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5718 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5719 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5720 return NewOp;
5721
5722 DefBits = ~DefBits;
5723 Inv = true;
5724 if ((NewOp =
5725 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5726 (NewOp =
5727 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5728 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5729 return NewOp;
5730 return nullptr;
5731 };
5732
5733 if (auto *NewOp = TryMOVIWithBits(DefBits))
5734 return NewOp;
5735
5736 // See if a fneg of the constant can be materialized with a MOVI, etc
5737 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5738 unsigned NegOpc) -> MachineInstr * {
5739 // FNegate each sub-element of the constant
5740 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5741 APInt NegBits(DstSize, 0);
5742 unsigned NumElts = DstSize / NumBits;
5743 for (unsigned i = 0; i < NumElts; i++)
5744 NegBits |= Neg << (NumBits * i);
5745 NegBits = DefBits ^ NegBits;
5746
5747 // Try to create the new constants with MOVI, and if so generate a fneg
5748 // for it.
5749 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5750 Register NewDst = MRI.createVirtualRegister(
5751 DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5752 NewOp->getOperand(0).setReg(NewDst);
5753 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5754 }
5755 return nullptr;
5756 };
5757 MachineInstr *R;
5758 if ((R = TryWithFNeg(DefBits, 32,
5759 DstSize == 64 ? AArch64::FNEGv2f32
5760 : AArch64::FNEGv4f32)) ||
5761 (R = TryWithFNeg(DefBits, 64,
5762 DstSize == 64 ? AArch64::FNEGDr
5763 : AArch64::FNEGv2f64)) ||
5764 (STI.hasFullFP16() &&
5765 (R = TryWithFNeg(DefBits, 16,
5766 DstSize == 64 ? AArch64::FNEGv4f16
5767 : AArch64::FNEGv8f16))))
5768 return R;
5769 }
5770
5771 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5772 if (!CPLoad) {
5773 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5774 return nullptr;
5775 }
5776
5777 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5779 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5780 return &*Copy;
5781}
5782
5783bool AArch64InstructionSelector::tryOptConstantBuildVec(
5784 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5785 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5786 unsigned DstSize = DstTy.getSizeInBits();
5787 assert(DstSize <= 128 && "Unexpected build_vec type!");
5788 if (DstSize < 32)
5789 return false;
5790 // Check if we're building a constant vector, in which case we want to
5791 // generate a constant pool load instead of a vector insert sequence.
5793 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5794 // Try to find G_CONSTANT or G_FCONSTANT
5795 auto *OpMI =
5796 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5797 if (OpMI)
5798 Csts.emplace_back(
5799 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5800 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5801 I.getOperand(Idx).getReg(), MRI)))
5802 Csts.emplace_back(
5803 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5804 else
5805 return false;
5806 }
5807 Constant *CV = ConstantVector::get(Csts);
5808 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5809 return false;
5810 I.eraseFromParent();
5811 return true;
5812}
5813
5814bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5815 MachineInstr &I, MachineRegisterInfo &MRI) {
5816 // Given:
5817 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5818 //
5819 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5820 Register Dst = I.getOperand(0).getReg();
5821 Register EltReg = I.getOperand(1).getReg();
5822 LLT EltTy = MRI.getType(EltReg);
5823 // If the index isn't on the same bank as its elements, then this can't be a
5824 // SUBREG_TO_REG.
5825 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5826 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5827 if (EltRB != DstRB)
5828 return false;
5829 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5830 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5831 }))
5832 return false;
5833 unsigned SubReg;
5834 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5835 if (!EltRC)
5836 return false;
5837 const TargetRegisterClass *DstRC =
5838 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5839 if (!DstRC)
5840 return false;
5841 if (!getSubRegForClass(EltRC, TRI, SubReg))
5842 return false;
5843 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5844 .addImm(0)
5845 .addUse(EltReg)
5846 .addImm(SubReg);
5847 I.eraseFromParent();
5848 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5849 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5850}
5851
5852bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5853 MachineRegisterInfo &MRI) {
5854 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5855 // Until we port more of the optimized selections, for now just use a vector
5856 // insert sequence.
5857 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5858 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5859 unsigned EltSize = EltTy.getSizeInBits();
5860
5861 if (tryOptConstantBuildVec(I, DstTy, MRI))
5862 return true;
5863 if (tryOptBuildVecToSubregToReg(I, MRI))
5864 return true;
5865
5866 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5867 return false; // Don't support all element types yet.
5868 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5869
5870 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5871 MachineInstr *ScalarToVec =
5872 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5873 I.getOperand(1).getReg(), MIB);
5874 if (!ScalarToVec)
5875 return false;
5876
5877 Register DstVec = ScalarToVec->getOperand(0).getReg();
5878 unsigned DstSize = DstTy.getSizeInBits();
5879
5880 // Keep track of the last MI we inserted. Later on, we might be able to save
5881 // a copy using it.
5882 MachineInstr *PrevMI = ScalarToVec;
5883 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5884 // Note that if we don't do a subregister copy, we can end up making an
5885 // extra register.
5886 Register OpReg = I.getOperand(i).getReg();
5887 // Do not emit inserts for undefs
5888 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5889 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5890 DstVec = PrevMI->getOperand(0).getReg();
5891 }
5892 }
5893
5894 // If DstTy's size in bits is less than 128, then emit a subregister copy
5895 // from DstVec to the last register we've defined.
5896 if (DstSize < 128) {
5897 // Force this to be FPR using the destination vector.
5898 const TargetRegisterClass *RC =
5899 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5900 if (!RC)
5901 return false;
5902 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5903 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5904 return false;
5905 }
5906
5907 unsigned SubReg = 0;
5908 if (!getSubRegForClass(RC, TRI, SubReg))
5909 return false;
5910 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5911 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5912 << "\n");
5913 return false;
5914 }
5915
5916 Register Reg = MRI.createVirtualRegister(RC);
5917 Register DstReg = I.getOperand(0).getReg();
5918
5919 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5920 MachineOperand &RegOp = I.getOperand(1);
5921 RegOp.setReg(Reg);
5922 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5923 } else {
5924 // We either have a vector with all elements (except the first one) undef or
5925 // at least one non-undef non-first element. In the first case, we need to
5926 // constrain the output register ourselves as we may have generated an
5927 // INSERT_SUBREG operation which is a generic operation for which the
5928 // output regclass cannot be automatically chosen.
5929 //
5930 // In the second case, there is no need to do this as it may generate an
5931 // instruction like INSvi32gpr where the regclass can be automatically
5932 // chosen.
5933 //
5934 // Also, we save a copy by re-using the destination register on the final
5935 // insert.
5936 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5938
5939 Register DstReg = PrevMI->getOperand(0).getReg();
5940 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5941 const TargetRegisterClass *RC =
5942 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5943 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5944 }
5945 }
5946
5948 return true;
5949}
5950
5951bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5952 unsigned NumVecs,
5953 MachineInstr &I) {
5954 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5955 assert(Opc && "Expected an opcode?");
5956 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5957 auto &MRI = *MIB.getMRI();
5958 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5959 unsigned Size = Ty.getSizeInBits();
5960 assert((Size == 64 || Size == 128) &&
5961 "Destination must be 64 bits or 128 bits?");
5962 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5963 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5964 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5965 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5966 Load.cloneMemRefs(I);
5968 Register SelectedLoadDst = Load->getOperand(0).getReg();
5969 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5970 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5971 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5972 // Emit the subreg copies and immediately select them.
5973 // FIXME: We should refactor our copy code into an emitCopy helper and
5974 // clean up uses of this pattern elsewhere in the selector.
5975 selectCopy(*Vec, TII, MRI, TRI, RBI);
5976 }
5977 return true;
5978}
5979
5980bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5981 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5982 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5983 assert(Opc && "Expected an opcode?");
5984 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5985 auto &MRI = *MIB.getMRI();
5986 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5987 bool Narrow = Ty.getSizeInBits() == 64;
5988
5989 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5990 SmallVector<Register, 4> Regs(NumVecs);
5991 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5992 [](auto MO) { return MO.getReg(); });
5993
5994 if (Narrow) {
5995 transform(Regs, Regs.begin(), [this](Register Reg) {
5996 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5997 ->getOperand(0)
5998 .getReg();
5999 });
6000 Ty = Ty.multiplyElements(2);
6001 }
6002
6003 Register Tuple = createQTuple(Regs, MIB);
6004 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6005 if (!LaneNo)
6006 return false;
6007
6008 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6009 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6010 .addReg(Tuple)
6011 .addImm(LaneNo->getZExtValue())
6012 .addReg(Ptr);
6013 Load.cloneMemRefs(I);
6015 Register SelectedLoadDst = Load->getOperand(0).getReg();
6016 unsigned SubReg = AArch64::qsub0;
6017 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6018 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6019 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6020 : DstOp(I.getOperand(Idx).getReg())},
6021 {})
6022 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6023 Register WideReg = Vec.getReg(0);
6024 // Emit the subreg copies and immediately select them.
6025 selectCopy(*Vec, TII, MRI, TRI, RBI);
6026 if (Narrow &&
6027 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6028 return false;
6029 }
6030 return true;
6031}
6032
6033void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6034 unsigned NumVecs,
6035 unsigned Opc) {
6036 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6037 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6038 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6039
6040 SmallVector<Register, 2> Regs(NumVecs);
6041 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6042 Regs.begin(), [](auto MO) { return MO.getReg(); });
6043
6044 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6045 : createDTuple(Regs, MIB);
6046 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6047 Store.cloneMemRefs(I);
6049}
6050
6051bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6052 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6053 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6054 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6055 bool Narrow = Ty.getSizeInBits() == 64;
6056
6057 SmallVector<Register, 2> Regs(NumVecs);
6058 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6059 Regs.begin(), [](auto MO) { return MO.getReg(); });
6060
6061 if (Narrow)
6062 transform(Regs, Regs.begin(), [this](Register Reg) {
6063 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6064 ->getOperand(0)
6065 .getReg();
6066 });
6067
6068 Register Tuple = createQTuple(Regs, MIB);
6069
6070 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6071 if (!LaneNo)
6072 return false;
6073 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6074 auto Store = MIB.buildInstr(Opc, {}, {})
6075 .addReg(Tuple)
6076 .addImm(LaneNo->getZExtValue())
6077 .addReg(Ptr);
6078 Store.cloneMemRefs(I);
6080 return true;
6081}
6082
6083bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6084 MachineInstr &I, MachineRegisterInfo &MRI) {
6085 // Find the intrinsic ID.
6086 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6087
6088 const LLT S8 = LLT::scalar(8);
6089 const LLT S16 = LLT::scalar(16);
6090 const LLT S32 = LLT::scalar(32);
6091 const LLT S64 = LLT::scalar(64);
6092 const LLT P0 = LLT::pointer(0, 64);
6093 // Select the instruction.
6094 switch (IntrinID) {
6095 default:
6096 return false;
6097 case Intrinsic::aarch64_ldxp:
6098 case Intrinsic::aarch64_ldaxp: {
6099 auto NewI = MIB.buildInstr(
6100 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6101 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6102 {I.getOperand(3)});
6103 NewI.cloneMemRefs(I);
6105 break;
6106 }
6107 case Intrinsic::aarch64_neon_ld1x2: {
6108 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6109 unsigned Opc = 0;
6110 if (Ty == LLT::fixed_vector(8, S8))
6111 Opc = AArch64::LD1Twov8b;
6112 else if (Ty == LLT::fixed_vector(16, S8))
6113 Opc = AArch64::LD1Twov16b;
6114 else if (Ty == LLT::fixed_vector(4, S16))
6115 Opc = AArch64::LD1Twov4h;
6116 else if (Ty == LLT::fixed_vector(8, S16))
6117 Opc = AArch64::LD1Twov8h;
6118 else if (Ty == LLT::fixed_vector(2, S32))
6119 Opc = AArch64::LD1Twov2s;
6120 else if (Ty == LLT::fixed_vector(4, S32))
6121 Opc = AArch64::LD1Twov4s;
6122 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6123 Opc = AArch64::LD1Twov2d;
6124 else if (Ty == S64 || Ty == P0)
6125 Opc = AArch64::LD1Twov1d;
6126 else
6127 llvm_unreachable("Unexpected type for ld1x2!");
6128 selectVectorLoadIntrinsic(Opc, 2, I);
6129 break;
6130 }
6131 case Intrinsic::aarch64_neon_ld1x3: {
6132 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6133 unsigned Opc = 0;
6134 if (Ty == LLT::fixed_vector(8, S8))
6135 Opc = AArch64::LD1Threev8b;
6136 else if (Ty == LLT::fixed_vector(16, S8))
6137 Opc = AArch64::LD1Threev16b;
6138 else if (Ty == LLT::fixed_vector(4, S16))
6139 Opc = AArch64::LD1Threev4h;
6140 else if (Ty == LLT::fixed_vector(8, S16))
6141 Opc = AArch64::LD1Threev8h;
6142 else if (Ty == LLT::fixed_vector(2, S32))
6143 Opc = AArch64::LD1Threev2s;
6144 else if (Ty == LLT::fixed_vector(4, S32))
6145 Opc = AArch64::LD1Threev4s;
6146 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6147 Opc = AArch64::LD1Threev2d;
6148 else if (Ty == S64 || Ty == P0)
6149 Opc = AArch64::LD1Threev1d;
6150 else
6151 llvm_unreachable("Unexpected type for ld1x3!");
6152 selectVectorLoadIntrinsic(Opc, 3, I);
6153 break;
6154 }
6155 case Intrinsic::aarch64_neon_ld1x4: {
6156 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6157 unsigned Opc = 0;
6158 if (Ty == LLT::fixed_vector(8, S8))
6159 Opc = AArch64::LD1Fourv8b;
6160 else if (Ty == LLT::fixed_vector(16, S8))
6161 Opc = AArch64::LD1Fourv16b;
6162 else if (Ty == LLT::fixed_vector(4, S16))
6163 Opc = AArch64::LD1Fourv4h;
6164 else if (Ty == LLT::fixed_vector(8, S16))
6165 Opc = AArch64::LD1Fourv8h;
6166 else if (Ty == LLT::fixed_vector(2, S32))
6167 Opc = AArch64::LD1Fourv2s;
6168 else if (Ty == LLT::fixed_vector(4, S32))
6169 Opc = AArch64::LD1Fourv4s;
6170 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6171 Opc = AArch64::LD1Fourv2d;
6172 else if (Ty == S64 || Ty == P0)
6173 Opc = AArch64::LD1Fourv1d;
6174 else
6175 llvm_unreachable("Unexpected type for ld1x4!");
6176 selectVectorLoadIntrinsic(Opc, 4, I);
6177 break;
6178 }
6179 case Intrinsic::aarch64_neon_ld2: {
6180 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6181 unsigned Opc = 0;
6182 if (Ty == LLT::fixed_vector(8, S8))
6183 Opc = AArch64::LD2Twov8b;
6184 else if (Ty == LLT::fixed_vector(16, S8))
6185 Opc = AArch64::LD2Twov16b;
6186 else if (Ty == LLT::fixed_vector(4, S16))
6187 Opc = AArch64::LD2Twov4h;
6188 else if (Ty == LLT::fixed_vector(8, S16))
6189 Opc = AArch64::LD2Twov8h;
6190 else if (Ty == LLT::fixed_vector(2, S32))
6191 Opc = AArch64::LD2Twov2s;
6192 else if (Ty == LLT::fixed_vector(4, S32))
6193 Opc = AArch64::LD2Twov4s;
6194 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6195 Opc = AArch64::LD2Twov2d;
6196 else if (Ty == S64 || Ty == P0)
6197 Opc = AArch64::LD1Twov1d;
6198 else
6199 llvm_unreachable("Unexpected type for ld2!");
6200 selectVectorLoadIntrinsic(Opc, 2, I);
6201 break;
6202 }
6203 case Intrinsic::aarch64_neon_ld2lane: {
6204 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6205 unsigned Opc;
6206 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6207 Opc = AArch64::LD2i8;
6208 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6209 Opc = AArch64::LD2i16;
6210 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6211 Opc = AArch64::LD2i32;
6212 else if (Ty == LLT::fixed_vector(2, S64) ||
6213 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6214 Opc = AArch64::LD2i64;
6215 else
6216 llvm_unreachable("Unexpected type for st2lane!");
6217 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6218 return false;
6219 break;
6220 }
6221 case Intrinsic::aarch64_neon_ld2r: {
6222 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6223 unsigned Opc = 0;
6224 if (Ty == LLT::fixed_vector(8, S8))
6225 Opc = AArch64::LD2Rv8b;
6226 else if (Ty == LLT::fixed_vector(16, S8))
6227 Opc = AArch64::LD2Rv16b;
6228 else if (Ty == LLT::fixed_vector(4, S16))
6229 Opc = AArch64::LD2Rv4h;
6230 else if (Ty == LLT::fixed_vector(8, S16))
6231 Opc = AArch64::LD2Rv8h;
6232 else if (Ty == LLT::fixed_vector(2, S32))
6233 Opc = AArch64::LD2Rv2s;
6234 else if (Ty == LLT::fixed_vector(4, S32))
6235 Opc = AArch64::LD2Rv4s;
6236 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6237 Opc = AArch64::LD2Rv2d;
6238 else if (Ty == S64 || Ty == P0)
6239 Opc = AArch64::LD2Rv1d;
6240 else
6241 llvm_unreachable("Unexpected type for ld2r!");
6242 selectVectorLoadIntrinsic(Opc, 2, I);
6243 break;
6244 }
6245 case Intrinsic::aarch64_neon_ld3: {
6246 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6247 unsigned Opc = 0;
6248 if (Ty == LLT::fixed_vector(8, S8))
6249 Opc = AArch64::LD3Threev8b;
6250 else if (Ty == LLT::fixed_vector(16, S8))
6251 Opc = AArch64::LD3Threev16b;
6252 else if (Ty == LLT::fixed_vector(4, S16))
6253 Opc = AArch64::LD3Threev4h;
6254 else if (Ty == LLT::fixed_vector(8, S16))
6255 Opc = AArch64::LD3Threev8h;
6256 else if (Ty == LLT::fixed_vector(2, S32))
6257 Opc = AArch64::LD3Threev2s;
6258 else if (Ty == LLT::fixed_vector(4, S32))
6259 Opc = AArch64::LD3Threev4s;
6260 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6261 Opc = AArch64::LD3Threev2d;
6262 else if (Ty == S64 || Ty == P0)
6263 Opc = AArch64::LD1Threev1d;
6264 else
6265 llvm_unreachable("Unexpected type for ld3!");
6266 selectVectorLoadIntrinsic(Opc, 3, I);
6267 break;
6268 }
6269 case Intrinsic::aarch64_neon_ld3lane: {
6270 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6271 unsigned Opc;
6272 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6273 Opc = AArch64::LD3i8;
6274 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6275 Opc = AArch64::LD3i16;
6276 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6277 Opc = AArch64::LD3i32;
6278 else if (Ty == LLT::fixed_vector(2, S64) ||
6279 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6280 Opc = AArch64::LD3i64;
6281 else
6282 llvm_unreachable("Unexpected type for st3lane!");
6283 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6284 return false;
6285 break;
6286 }
6287 case Intrinsic::aarch64_neon_ld3r: {
6288 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6289 unsigned Opc = 0;
6290 if (Ty == LLT::fixed_vector(8, S8))
6291 Opc = AArch64::LD3Rv8b;
6292 else if (Ty == LLT::fixed_vector(16, S8))
6293 Opc = AArch64::LD3Rv16b;
6294 else if (Ty == LLT::fixed_vector(4, S16))
6295 Opc = AArch64::LD3Rv4h;
6296 else if (Ty == LLT::fixed_vector(8, S16))
6297 Opc = AArch64::LD3Rv8h;
6298 else if (Ty == LLT::fixed_vector(2, S32))
6299 Opc = AArch64::LD3Rv2s;
6300 else if (Ty == LLT::fixed_vector(4, S32))
6301 Opc = AArch64::LD3Rv4s;
6302 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6303 Opc = AArch64::LD3Rv2d;
6304 else if (Ty == S64 || Ty == P0)
6305 Opc = AArch64::LD3Rv1d;
6306 else
6307 llvm_unreachable("Unexpected type for ld3r!");
6308 selectVectorLoadIntrinsic(Opc, 3, I);
6309 break;
6310 }
6311 case Intrinsic::aarch64_neon_ld4: {
6312 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6313 unsigned Opc = 0;
6314 if (Ty == LLT::fixed_vector(8, S8))
6315 Opc = AArch64::LD4Fourv8b;
6316 else if (Ty == LLT::fixed_vector(16, S8))
6317 Opc = AArch64::LD4Fourv16b;
6318 else if (Ty == LLT::fixed_vector(4, S16))
6319 Opc = AArch64::LD4Fourv4h;
6320 else if (Ty == LLT::fixed_vector(8, S16))
6321 Opc = AArch64::LD4Fourv8h;
6322 else if (Ty == LLT::fixed_vector(2, S32))
6323 Opc = AArch64::LD4Fourv2s;
6324 else if (Ty == LLT::fixed_vector(4, S32))
6325 Opc = AArch64::LD4Fourv4s;
6326 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6327 Opc = AArch64::LD4Fourv2d;
6328 else if (Ty == S64 || Ty == P0)
6329 Opc = AArch64::LD1Fourv1d;
6330 else
6331 llvm_unreachable("Unexpected type for ld4!");
6332 selectVectorLoadIntrinsic(Opc, 4, I);
6333 break;
6334 }
6335 case Intrinsic::aarch64_neon_ld4lane: {
6336 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6337 unsigned Opc;
6338 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6339 Opc = AArch64::LD4i8;
6340 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6341 Opc = AArch64::LD4i16;
6342 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6343 Opc = AArch64::LD4i32;
6344 else if (Ty == LLT::fixed_vector(2, S64) ||
6345 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6346 Opc = AArch64::LD4i64;
6347 else
6348 llvm_unreachable("Unexpected type for st4lane!");
6349 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6350 return false;
6351 break;
6352 }
6353 case Intrinsic::aarch64_neon_ld4r: {
6354 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6355 unsigned Opc = 0;
6356 if (Ty == LLT::fixed_vector(8, S8))
6357 Opc = AArch64::LD4Rv8b;
6358 else if (Ty == LLT::fixed_vector(16, S8))
6359 Opc = AArch64::LD4Rv16b;
6360 else if (Ty == LLT::fixed_vector(4, S16))
6361 Opc = AArch64::LD4Rv4h;
6362 else if (Ty == LLT::fixed_vector(8, S16))
6363 Opc = AArch64::LD4Rv8h;
6364 else if (Ty == LLT::fixed_vector(2, S32))
6365 Opc = AArch64::LD4Rv2s;
6366 else if (Ty == LLT::fixed_vector(4, S32))
6367 Opc = AArch64::LD4Rv4s;
6368 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6369 Opc = AArch64::LD4Rv2d;
6370 else if (Ty == S64 || Ty == P0)
6371 Opc = AArch64::LD4Rv1d;
6372 else
6373 llvm_unreachable("Unexpected type for ld4r!");
6374 selectVectorLoadIntrinsic(Opc, 4, I);
6375 break;
6376 }
6377 case Intrinsic::aarch64_neon_st1x2: {
6378 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6379 unsigned Opc;
6380 if (Ty == LLT::fixed_vector(8, S8))
6381 Opc = AArch64::ST1Twov8b;
6382 else if (Ty == LLT::fixed_vector(16, S8))
6383 Opc = AArch64::ST1Twov16b;
6384 else if (Ty == LLT::fixed_vector(4, S16))
6385 Opc = AArch64::ST1Twov4h;
6386 else if (Ty == LLT::fixed_vector(8, S16))
6387 Opc = AArch64::ST1Twov8h;
6388 else if (Ty == LLT::fixed_vector(2, S32))
6389 Opc = AArch64::ST1Twov2s;
6390 else if (Ty == LLT::fixed_vector(4, S32))
6391 Opc = AArch64::ST1Twov4s;
6392 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6393 Opc = AArch64::ST1Twov2d;
6394 else if (Ty == S64 || Ty == P0)
6395 Opc = AArch64::ST1Twov1d;
6396 else
6397 llvm_unreachable("Unexpected type for st1x2!");
6398 selectVectorStoreIntrinsic(I, 2, Opc);
6399 break;
6400 }
6401 case Intrinsic::aarch64_neon_st1x3: {
6402 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6403 unsigned Opc;
6404 if (Ty == LLT::fixed_vector(8, S8))
6405 Opc = AArch64::ST1Threev8b;
6406 else if (Ty == LLT::fixed_vector(16, S8))
6407 Opc = AArch64::ST1Threev16b;
6408 else if (Ty == LLT::fixed_vector(4, S16))
6409 Opc = AArch64::ST1Threev4h;
6410 else if (Ty == LLT::fixed_vector(8, S16))
6411 Opc = AArch64::ST1Threev8h;
6412 else if (Ty == LLT::fixed_vector(2, S32))
6413 Opc = AArch64::ST1Threev2s;
6414 else if (Ty == LLT::fixed_vector(4, S32))
6415 Opc = AArch64::ST1Threev4s;
6416 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6417 Opc = AArch64::ST1Threev2d;
6418 else if (Ty == S64 || Ty == P0)
6419 Opc = AArch64::ST1Threev1d;
6420 else
6421 llvm_unreachable("Unexpected type for st1x3!");
6422 selectVectorStoreIntrinsic(I, 3, Opc);
6423 break;
6424 }
6425 case Intrinsic::aarch64_neon_st1x4: {
6426 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6427 unsigned Opc;
6428 if (Ty == LLT::fixed_vector(8, S8))
6429 Opc = AArch64::ST1Fourv8b;
6430 else if (Ty == LLT::fixed_vector(16, S8))
6431 Opc = AArch64::ST1Fourv16b;
6432 else if (Ty == LLT::fixed_vector(4, S16))
6433 Opc = AArch64::ST1Fourv4h;
6434 else if (Ty == LLT::fixed_vector(8, S16))
6435 Opc = AArch64::ST1Fourv8h;
6436 else if (Ty == LLT::fixed_vector(2, S32))
6437 Opc = AArch64::ST1Fourv2s;
6438 else if (Ty == LLT::fixed_vector(4, S32))
6439 Opc = AArch64::ST1Fourv4s;
6440 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6441 Opc = AArch64::ST1Fourv2d;
6442 else if (Ty == S64 || Ty == P0)
6443 Opc = AArch64::ST1Fourv1d;
6444 else
6445 llvm_unreachable("Unexpected type for st1x4!");
6446 selectVectorStoreIntrinsic(I, 4, Opc);
6447 break;
6448 }
6449 case Intrinsic::aarch64_neon_st2: {
6450 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6451 unsigned Opc;
6452 if (Ty == LLT::fixed_vector(8, S8))
6453 Opc = AArch64::ST2Twov8b;
6454 else if (Ty == LLT::fixed_vector(16, S8))
6455 Opc = AArch64::ST2Twov16b;
6456 else if (Ty == LLT::fixed_vector(4, S16))
6457 Opc = AArch64::ST2Twov4h;
6458 else if (Ty == LLT::fixed_vector(8, S16))
6459 Opc = AArch64::ST2Twov8h;
6460 else if (Ty == LLT::fixed_vector(2, S32))
6461 Opc = AArch64::ST2Twov2s;
6462 else if (Ty == LLT::fixed_vector(4, S32))
6463 Opc = AArch64::ST2Twov4s;
6464 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6465 Opc = AArch64::ST2Twov2d;
6466 else if (Ty == S64 || Ty == P0)
6467 Opc = AArch64::ST1Twov1d;
6468 else
6469 llvm_unreachable("Unexpected type for st2!");
6470 selectVectorStoreIntrinsic(I, 2, Opc);
6471 break;
6472 }
6473 case Intrinsic::aarch64_neon_st3: {
6474 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6475 unsigned Opc;
6476 if (Ty == LLT::fixed_vector(8, S8))
6477 Opc = AArch64::ST3Threev8b;
6478 else if (Ty == LLT::fixed_vector(16, S8))
6479 Opc = AArch64::ST3Threev16b;
6480 else if (Ty == LLT::fixed_vector(4, S16))
6481 Opc = AArch64::ST3Threev4h;
6482 else if (Ty == LLT::fixed_vector(8, S16))
6483 Opc = AArch64::ST3Threev8h;
6484 else if (Ty == LLT::fixed_vector(2, S32))
6485 Opc = AArch64::ST3Threev2s;
6486 else if (Ty == LLT::fixed_vector(4, S32))
6487 Opc = AArch64::ST3Threev4s;
6488 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6489 Opc = AArch64::ST3Threev2d;
6490 else if (Ty == S64 || Ty == P0)
6491 Opc = AArch64::ST1Threev1d;
6492 else
6493 llvm_unreachable("Unexpected type for st3!");
6494 selectVectorStoreIntrinsic(I, 3, Opc);
6495 break;
6496 }
6497 case Intrinsic::aarch64_neon_st4: {
6498 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6499 unsigned Opc;
6500 if (Ty == LLT::fixed_vector(8, S8))
6501 Opc = AArch64::ST4Fourv8b;
6502 else if (Ty == LLT::fixed_vector(16, S8))
6503 Opc = AArch64::ST4Fourv16b;
6504 else if (Ty == LLT::fixed_vector(4, S16))
6505 Opc = AArch64::ST4Fourv4h;
6506 else if (Ty == LLT::fixed_vector(8, S16))
6507 Opc = AArch64::ST4Fourv8h;
6508 else if (Ty == LLT::fixed_vector(2, S32))
6509 Opc = AArch64::ST4Fourv2s;
6510 else if (Ty == LLT::fixed_vector(4, S32))
6511 Opc = AArch64::ST4Fourv4s;
6512 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6513 Opc = AArch64::ST4Fourv2d;
6514 else if (Ty == S64 || Ty == P0)
6515 Opc = AArch64::ST1Fourv1d;
6516 else
6517 llvm_unreachable("Unexpected type for st4!");
6518 selectVectorStoreIntrinsic(I, 4, Opc);
6519 break;
6520 }
6521 case Intrinsic::aarch64_neon_st2lane: {
6522 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6523 unsigned Opc;
6524 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6525 Opc = AArch64::ST2i8;
6526 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6527 Opc = AArch64::ST2i16;
6528 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6529 Opc = AArch64::ST2i32;
6530 else if (Ty == LLT::fixed_vector(2, S64) ||
6531 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6532 Opc = AArch64::ST2i64;
6533 else
6534 llvm_unreachable("Unexpected type for st2lane!");
6535 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6536 return false;
6537 break;
6538 }
6539 case Intrinsic::aarch64_neon_st3lane: {
6540 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6541 unsigned Opc;
6542 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6543 Opc = AArch64::ST3i8;
6544 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6545 Opc = AArch64::ST3i16;
6546 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6547 Opc = AArch64::ST3i32;
6548 else if (Ty == LLT::fixed_vector(2, S64) ||
6549 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6550 Opc = AArch64::ST3i64;
6551 else
6552 llvm_unreachable("Unexpected type for st3lane!");
6553 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6554 return false;
6555 break;
6556 }
6557 case Intrinsic::aarch64_neon_st4lane: {
6558 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6559 unsigned Opc;
6560 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6561 Opc = AArch64::ST4i8;
6562 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6563 Opc = AArch64::ST4i16;
6564 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6565 Opc = AArch64::ST4i32;
6566 else if (Ty == LLT::fixed_vector(2, S64) ||
6567 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6568 Opc = AArch64::ST4i64;
6569 else
6570 llvm_unreachable("Unexpected type for st4lane!");
6571 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6572 return false;
6573 break;
6574 }
6575 case Intrinsic::aarch64_mops_memset_tag: {
6576 // Transform
6577 // %dst:gpr(p0) = \
6578 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6579 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6580 // where %dst is updated, into
6581 // %Rd:GPR64common, %Rn:GPR64) = \
6582 // MOPSMemorySetTaggingPseudo \
6583 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6584 // where Rd and Rn are tied.
6585 // It is expected that %val has been extended to s64 in legalization.
6586 // Note that the order of the size/value operands are swapped.
6587
6588 Register DstDef = I.getOperand(0).getReg();
6589 // I.getOperand(1) is the intrinsic function
6590 Register DstUse = I.getOperand(2).getReg();
6591 Register ValUse = I.getOperand(3).getReg();
6592 Register SizeUse = I.getOperand(4).getReg();
6593
6594 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6595 // Therefore an additional virtual register is required for the updated size
6596 // operand. This value is not accessible via the semantics of the intrinsic.
6597 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6598
6599 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6600 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6601 Memset.cloneMemRefs(I);
6603 break;
6604 }
6605 }
6606
6607 I.eraseFromParent();
6608 return true;
6609}
6610
6611bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6612 MachineRegisterInfo &MRI) {
6613 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6614
6615 switch (IntrinID) {
6616 default:
6617 break;
6618 case Intrinsic::ptrauth_resign: {
6619 Register DstReg = I.getOperand(0).getReg();
6620 Register ValReg = I.getOperand(2).getReg();
6621 uint64_t AUTKey = I.getOperand(3).getImm();
6622 Register AUTDisc = I.getOperand(4).getReg();
6623 uint64_t PACKey = I.getOperand(5).getImm();
6624 Register PACDisc = I.getOperand(6).getReg();
6625
6626 Register AUTAddrDisc = AUTDisc;
6627 uint16_t AUTConstDiscC = 0;
6628 std::tie(AUTConstDiscC, AUTAddrDisc) =
6630
6631 Register PACAddrDisc = PACDisc;
6632 uint16_t PACConstDiscC = 0;
6633 std::tie(PACConstDiscC, PACAddrDisc) =
6635
6636 MIB.buildCopy({AArch64::X16}, {ValReg});
6637 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6638 MIB.buildInstr(AArch64::AUTPAC)
6639 .addImm(AUTKey)
6640 .addImm(AUTConstDiscC)
6641 .addUse(AUTAddrDisc)
6642 .addImm(PACKey)
6643 .addImm(PACConstDiscC)
6644 .addUse(PACAddrDisc)
6645 .constrainAllUses(TII, TRI, RBI);
6646 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6647
6648 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6649 I.eraseFromParent();
6650 return true;
6651 }
6652 case Intrinsic::ptrauth_auth: {
6653 Register DstReg = I.getOperand(0).getReg();
6654 Register ValReg = I.getOperand(2).getReg();
6655 uint64_t AUTKey = I.getOperand(3).getImm();
6656 Register AUTDisc = I.getOperand(4).getReg();
6657
6658 Register AUTAddrDisc = AUTDisc;
6659 uint16_t AUTConstDiscC = 0;
6660 std::tie(AUTConstDiscC, AUTAddrDisc) =
6662
6663 if (STI.isX16X17Safer()) {
6664 MIB.buildCopy({AArch64::X16}, {ValReg});
6665 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6666 MIB.buildInstr(AArch64::AUTx16x17)
6667 .addImm(AUTKey)
6668 .addImm(AUTConstDiscC)
6669 .addUse(AUTAddrDisc)
6670 .constrainAllUses(TII, TRI, RBI);
6671 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6672 } else {
6673 Register ScratchReg =
6674 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6675 MIB.buildInstr(AArch64::AUTxMxN)
6676 .addDef(DstReg)
6677 .addDef(ScratchReg)
6678 .addUse(ValReg)
6679 .addImm(AUTKey)
6680 .addImm(AUTConstDiscC)
6681 .addUse(AUTAddrDisc)
6682 .constrainAllUses(TII, TRI, RBI);
6683 }
6684
6685 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6686 I.eraseFromParent();
6687 return true;
6688 }
6689 case Intrinsic::frameaddress:
6690 case Intrinsic::returnaddress: {
6691 MachineFunction &MF = *I.getParent()->getParent();
6692 MachineFrameInfo &MFI = MF.getFrameInfo();
6693
6694 unsigned Depth = I.getOperand(2).getImm();
6695 Register DstReg = I.getOperand(0).getReg();
6696 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6697
6698 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6699 if (!MFReturnAddr) {
6700 // Insert the copy from LR/X30 into the entry block, before it can be
6701 // clobbered by anything.
6702 MFI.setReturnAddressIsTaken(true);
6703 MFReturnAddr = getFunctionLiveInPhysReg(
6704 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6705 }
6706
6707 if (STI.hasPAuth()) {
6708 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6709 } else {
6710 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6711 MIB.buildInstr(AArch64::XPACLRI);
6712 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6713 }
6714
6715 I.eraseFromParent();
6716 return true;
6717 }
6718
6719 MFI.setFrameAddressIsTaken(true);
6720 Register FrameAddr(AArch64::FP);
6721 while (Depth--) {
6722 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6723 auto Ldr =
6724 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6726 FrameAddr = NextFrame;
6727 }
6728
6729 if (IntrinID == Intrinsic::frameaddress)
6730 MIB.buildCopy({DstReg}, {FrameAddr});
6731 else {
6732 MFI.setReturnAddressIsTaken(true);
6733
6734 if (STI.hasPAuth()) {
6735 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6736 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6737 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6738 } else {
6739 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6740 .addImm(1);
6741 MIB.buildInstr(AArch64::XPACLRI);
6742 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6743 }
6744 }
6745
6746 I.eraseFromParent();
6747 return true;
6748 }
6749 case Intrinsic::aarch64_neon_tbl2:
6750 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6751 return true;
6752 case Intrinsic::aarch64_neon_tbl3:
6753 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6754 false);
6755 return true;
6756 case Intrinsic::aarch64_neon_tbl4:
6757 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6758 return true;
6759 case Intrinsic::aarch64_neon_tbx2:
6760 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6761 return true;
6762 case Intrinsic::aarch64_neon_tbx3:
6763 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6764 return true;
6765 case Intrinsic::aarch64_neon_tbx4:
6766 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6767 return true;
6768 case Intrinsic::swift_async_context_addr:
6769 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6770 {Register(AArch64::FP)})
6771 .addImm(8)
6772 .addImm(0);
6774
6776 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6777 I.eraseFromParent();
6778 return true;
6779 }
6780 return false;
6781}
6782
6783// G_PTRAUTH_GLOBAL_VALUE lowering
6784//
6785// We have 3 lowering alternatives to choose from:
6786// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6787// If the GV doesn't need a GOT load (i.e., is locally defined)
6788// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6789//
6790// - LOADgotPAC: similar to LOADgot, with added PAC.
6791// If the GV needs a GOT load, materialize the pointer using the usual
6792// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6793// section is assumed to be read-only (for example, via relro mechanism). See
6794// LowerMOVaddrPAC.
6795//
6796// - LOADauthptrstatic: similar to LOADgot, but use a
6797// special stub slot instead of a GOT slot.
6798// Load a signed pointer for symbol 'sym' from a stub slot named
6799// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6800// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6801// .data with an
6802// @AUTH relocation. See LowerLOADauthptrstatic.
6803//
6804// All 3 are pseudos that are expand late to longer sequences: this lets us
6805// provide integrity guarantees on the to-be-signed intermediate values.
6806//
6807// LOADauthptrstatic is undesirable because it requires a large section filled
6808// with often similarly-signed pointers, making it a good harvesting target.
6809// Thus, it's only used for ptrauth references to extern_weak to avoid null
6810// checks.
6811
6812bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6813 MachineInstr &I, MachineRegisterInfo &MRI) const {
6814 Register DefReg = I.getOperand(0).getReg();
6815 Register Addr = I.getOperand(1).getReg();
6816 uint64_t Key = I.getOperand(2).getImm();
6817 Register AddrDisc = I.getOperand(3).getReg();
6818 uint64_t Disc = I.getOperand(4).getImm();
6819 int64_t Offset = 0;
6820
6822 report_fatal_error("key in ptrauth global out of range [0, " +
6823 Twine((int)AArch64PACKey::LAST) + "]");
6824
6825 // Blend only works if the integer discriminator is 16-bit wide.
6826 if (!isUInt<16>(Disc))
6828 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6829
6830 // Choosing between 3 lowering alternatives is target-specific.
6831 if (!STI.isTargetELF() && !STI.isTargetMachO())
6832 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6833
6834 if (!MRI.hasOneDef(Addr))
6835 return false;
6836
6837 // First match any offset we take from the real global.
6838 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6839 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6840 Register OffsetReg = DefMI->getOperand(2).getReg();
6841 if (!MRI.hasOneDef(OffsetReg))
6842 return false;
6843 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6844 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6845 return false;
6846
6847 Addr = DefMI->getOperand(1).getReg();
6848 if (!MRI.hasOneDef(Addr))
6849 return false;
6850
6851 DefMI = &*MRI.def_instr_begin(Addr);
6852 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6853 }
6854
6855 // We should be left with a genuine unauthenticated GlobalValue.
6856 const GlobalValue *GV;
6857 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6858 GV = DefMI->getOperand(1).getGlobal();
6860 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6861 GV = DefMI->getOperand(2).getGlobal();
6863 } else {
6864 return false;
6865 }
6866
6867 MachineIRBuilder MIB(I);
6868
6869 // Classify the reference to determine whether it needs a GOT load.
6870 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6871 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6872 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6873 "unsupported non-GOT op flags on ptrauth global reference");
6874 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6875 "unsupported non-GOT reference to weak ptrauth global");
6876
6877 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6878 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6879
6880 // Non-extern_weak:
6881 // - No GOT load needed -> MOVaddrPAC
6882 // - GOT load for non-extern_weak -> LOADgotPAC
6883 // Note that we disallow extern_weak refs to avoid null checks later.
6884 if (!GV->hasExternalWeakLinkage()) {
6885 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6886 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6887 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6889 .addImm(Key)
6890 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6891 .addImm(Disc)
6892 .constrainAllUses(TII, TRI, RBI);
6893 MIB.buildCopy(DefReg, Register(AArch64::X16));
6894 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6895 I.eraseFromParent();
6896 return true;
6897 }
6898
6899 // extern_weak -> LOADauthptrstatic
6900
6901 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6902 // offset alone as a pointer if the symbol wasn't available, which would
6903 // probably break null checks in users. Ptrauth complicates things further:
6904 // error out.
6905 if (Offset != 0)
6907 "unsupported non-zero offset in weak ptrauth global reference");
6908
6909 if (HasAddrDisc)
6910 report_fatal_error("unsupported weak addr-div ptrauth global");
6911
6912 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6913 .addGlobalAddress(GV, Offset)
6914 .addImm(Key)
6915 .addImm(Disc);
6916 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6917
6918 I.eraseFromParent();
6919 return true;
6920}
6921
6922void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6923 MachineRegisterInfo &MRI,
6924 unsigned NumVec, unsigned Opc1,
6925 unsigned Opc2, bool isExt) {
6926 Register DstReg = I.getOperand(0).getReg();
6927 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6928
6929 // Create the REG_SEQUENCE
6931 for (unsigned i = 0; i < NumVec; i++)
6932 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6933 Register RegSeq = createQTuple(Regs, MIB);
6934
6935 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6936 MachineInstrBuilder Instr;
6937 if (isExt) {
6938 Register Reg = I.getOperand(2).getReg();
6939 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6940 } else
6941 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6943 I.eraseFromParent();
6944}
6945
6946InstructionSelector::ComplexRendererFns
6947AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6948 auto MaybeImmed = getImmedFromMO(Root);
6949 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6950 return std::nullopt;
6951 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6952 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6953}
6954
6955InstructionSelector::ComplexRendererFns
6956AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6957 auto MaybeImmed = getImmedFromMO(Root);
6958 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6959 return std::nullopt;
6960 uint64_t Enc = 31 - *MaybeImmed;
6961 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6962}
6963
6964InstructionSelector::ComplexRendererFns
6965AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6966 auto MaybeImmed = getImmedFromMO(Root);
6967 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6968 return std::nullopt;
6969 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6970 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6971}
6972
6973InstructionSelector::ComplexRendererFns
6974AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6975 auto MaybeImmed = getImmedFromMO(Root);
6976 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6977 return std::nullopt;
6978 uint64_t Enc = 63 - *MaybeImmed;
6979 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6980}
6981
6982/// Helper to select an immediate value that can be represented as a 12-bit
6983/// value shifted left by either 0 or 12. If it is possible to do so, return
6984/// the immediate and shift value. If not, return std::nullopt.
6985///
6986/// Used by selectArithImmed and selectNegArithImmed.
6987InstructionSelector::ComplexRendererFns
6988AArch64InstructionSelector::select12BitValueWithLeftShift(
6989 uint64_t Immed) const {
6990 unsigned ShiftAmt;
6991 if (Immed >> 12 == 0) {
6992 ShiftAmt = 0;
6993 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6994 ShiftAmt = 12;
6995 Immed = Immed >> 12;
6996 } else
6997 return std::nullopt;
6998
6999 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7000 return {{
7001 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7002 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7003 }};
7004}
7005
7006/// SelectArithImmed - Select an immediate value that can be represented as
7007/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7008/// Val set to the 12-bit value and Shift set to the shifter operand.
7009InstructionSelector::ComplexRendererFns
7010AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7011 // This function is called from the addsub_shifted_imm ComplexPattern,
7012 // which lists [imm] as the list of opcode it's interested in, however
7013 // we still need to check whether the operand is actually an immediate
7014 // here because the ComplexPattern opcode list is only used in
7015 // root-level opcode matching.
7016 auto MaybeImmed = getImmedFromMO(Root);
7017 if (MaybeImmed == std::nullopt)
7018 return std::nullopt;
7019 return select12BitValueWithLeftShift(*MaybeImmed);
7020}
7021
7022/// SelectNegArithImmed - As above, but negates the value before trying to
7023/// select it.
7024InstructionSelector::ComplexRendererFns
7025AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7026 // We need a register here, because we need to know if we have a 64 or 32
7027 // bit immediate.
7028 if (!Root.isReg())
7029 return std::nullopt;
7030 auto MaybeImmed = getImmedFromMO(Root);
7031 if (MaybeImmed == std::nullopt)
7032 return std::nullopt;
7033 uint64_t Immed = *MaybeImmed;
7034
7035 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7036 // have the opposite effect on the C flag, so this pattern mustn't match under
7037 // those circumstances.
7038 if (Immed == 0)
7039 return std::nullopt;
7040
7041 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7042 // the root.
7043 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7044 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7045 Immed = ~((uint32_t)Immed) + 1;
7046 else
7047 Immed = ~Immed + 1ULL;
7048
7049 if (Immed & 0xFFFFFFFFFF000000ULL)
7050 return std::nullopt;
7051
7052 Immed &= 0xFFFFFFULL;
7053 return select12BitValueWithLeftShift(Immed);
7054}
7055
7056/// Checks if we are sure that folding MI into load/store addressing mode is
7057/// beneficial or not.
7058///
7059/// Returns:
7060/// - true if folding MI would be beneficial.
7061/// - false if folding MI would be bad.
7062/// - std::nullopt if it is not sure whether folding MI is beneficial.
7063///
7064/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7065///
7066/// %13:gpr(s64) = G_CONSTANT i64 1
7067/// %8:gpr(s64) = G_SHL %6, %13(s64)
7068/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7069/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7070std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7071 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7072 if (MI.getOpcode() == AArch64::G_SHL) {
7073 // Address operands with shifts are free, except for running on subtargets
7074 // with AddrLSLSlow14.
7075 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7076 MI.getOperand(2).getReg(), MRI)) {
7077 const APInt ShiftVal = ValAndVeg->Value;
7078
7079 // Don't fold if we know this will be slow.
7080 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7081 }
7082 }
7083 return std::nullopt;
7084}
7085
7086/// Return true if it is worth folding MI into an extended register. That is,
7087/// if it's safe to pull it into the addressing mode of a load or store as a
7088/// shift.
7089/// \p IsAddrOperand whether the def of MI is used as an address operand
7090/// (e.g. feeding into an LDR/STR).
7091bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7092 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7093 bool IsAddrOperand) const {
7094
7095 // Always fold if there is one use, or if we're optimizing for size.
7096 Register DefReg = MI.getOperand(0).getReg();
7097 if (MRI.hasOneNonDBGUse(DefReg) ||
7098 MI.getParent()->getParent()->getFunction().hasOptSize())
7099 return true;
7100
7101 if (IsAddrOperand) {
7102 // If we are already sure that folding MI is good or bad, return the result.
7103 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7104 return *Worth;
7105
7106 // Fold G_PTR_ADD if its offset operand can be folded
7107 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7108 MachineInstr *OffsetInst =
7109 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7110
7111 // Note, we already know G_PTR_ADD is used by at least two instructions.
7112 // If we are also sure about whether folding is beneficial or not,
7113 // return the result.
7114 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7115 return *Worth;
7116 }
7117 }
7118
7119 // FIXME: Consider checking HasALULSLFast as appropriate.
7120
7121 // We have a fastpath, so folding a shift in and potentially computing it
7122 // many times may be beneficial. Check if this is only used in memory ops.
7123 // If it is, then we should fold.
7124 return all_of(MRI.use_nodbg_instructions(DefReg),
7125 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7126}
7127
7129 switch (Type) {
7130 case AArch64_AM::SXTB:
7131 case AArch64_AM::SXTH:
7132 case AArch64_AM::SXTW:
7133 return true;
7134 default:
7135 return false;
7136 }
7137}
7138
7139InstructionSelector::ComplexRendererFns
7140AArch64InstructionSelector::selectExtendedSHL(
7141 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7142 unsigned SizeInBytes, bool WantsExt) const {
7143 assert(Base.isReg() && "Expected base to be a register operand");
7144 assert(Offset.isReg() && "Expected offset to be a register operand");
7145
7146 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7147 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7148
7149 unsigned OffsetOpc = OffsetInst->getOpcode();
7150 bool LookedThroughZExt = false;
7151 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7152 // Try to look through a ZEXT.
7153 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7154 return std::nullopt;
7155
7156 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7157 OffsetOpc = OffsetInst->getOpcode();
7158 LookedThroughZExt = true;
7159
7160 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7161 return std::nullopt;
7162 }
7163 // Make sure that the memory op is a valid size.
7164 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7165 if (LegalShiftVal == 0)
7166 return std::nullopt;
7167 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7168 return std::nullopt;
7169
7170 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7171 // register we will offset is the LHS, and the register containing the
7172 // constant is the RHS.
7173 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7174 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7175 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7176 if (!ValAndVReg) {
7177 // We didn't get a constant on the RHS. If the opcode is a shift, then
7178 // we're done.
7179 if (OffsetOpc == TargetOpcode::G_SHL)
7180 return std::nullopt;
7181
7182 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7183 std::swap(OffsetReg, ConstantReg);
7184 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7185 if (!ValAndVReg)
7186 return std::nullopt;
7187 }
7188
7189 // The value must fit into 3 bits, and must be positive. Make sure that is
7190 // true.
7191 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7192
7193 // Since we're going to pull this into a shift, the constant value must be
7194 // a power of 2. If we got a multiply, then we need to check this.
7195 if (OffsetOpc == TargetOpcode::G_MUL) {
7196 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7197 return std::nullopt;
7198
7199 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7200 ImmVal = Log2_32(ImmVal);
7201 }
7202
7203 if ((ImmVal & 0x7) != ImmVal)
7204 return std::nullopt;
7205
7206 // We are only allowed to shift by LegalShiftVal. This shift value is built
7207 // into the instruction, so we can't just use whatever we want.
7208 if (ImmVal != LegalShiftVal)
7209 return std::nullopt;
7210
7211 unsigned SignExtend = 0;
7212 if (WantsExt) {
7213 // Check if the offset is defined by an extend, unless we looked through a
7214 // G_ZEXT earlier.
7215 if (!LookedThroughZExt) {
7216 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7217 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7219 return std::nullopt;
7220
7221 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7222 // We only support SXTW for signed extension here.
7223 if (SignExtend && Ext != AArch64_AM::SXTW)
7224 return std::nullopt;
7225 OffsetReg = ExtInst->getOperand(1).getReg();
7226 }
7227
7228 // Need a 32-bit wide register here.
7229 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7230 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7231 }
7232
7233 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7234 // offset. Signify that we are shifting by setting the shift flag to 1.
7235 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7236 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7237 [=](MachineInstrBuilder &MIB) {
7238 // Need to add both immediates here to make sure that they are both
7239 // added to the instruction.
7240 MIB.addImm(SignExtend);
7241 MIB.addImm(1);
7242 }}};
7243}
7244
7245/// This is used for computing addresses like this:
7246///
7247/// ldr x1, [x2, x3, lsl #3]
7248///
7249/// Where x2 is the base register, and x3 is an offset register. The shift-left
7250/// is a constant value specific to this load instruction. That is, we'll never
7251/// see anything other than a 3 here (which corresponds to the size of the
7252/// element being loaded.)
7253InstructionSelector::ComplexRendererFns
7254AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7255 MachineOperand &Root, unsigned SizeInBytes) const {
7256 if (!Root.isReg())
7257 return std::nullopt;
7258 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7259
7260 // We want to find something like this:
7261 //
7262 // val = G_CONSTANT LegalShiftVal
7263 // shift = G_SHL off_reg val
7264 // ptr = G_PTR_ADD base_reg shift
7265 // x = G_LOAD ptr
7266 //
7267 // And fold it into this addressing mode:
7268 //
7269 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7270
7271 // Check if we can find the G_PTR_ADD.
7272 MachineInstr *PtrAdd =
7273 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7274 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7275 return std::nullopt;
7276
7277 // Now, try to match an opcode which will match our specific offset.
7278 // We want a G_SHL or a G_MUL.
7279 MachineInstr *OffsetInst =
7281 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7282 OffsetInst->getOperand(0), SizeInBytes,
7283 /*WantsExt=*/false);
7284}
7285
7286/// This is used for computing addresses like this:
7287///
7288/// ldr x1, [x2, x3]
7289///
7290/// Where x2 is the base register, and x3 is an offset register.
7291///
7292/// When possible (or profitable) to fold a G_PTR_ADD into the address
7293/// calculation, this will do so. Otherwise, it will return std::nullopt.
7294InstructionSelector::ComplexRendererFns
7295AArch64InstructionSelector::selectAddrModeRegisterOffset(
7296 MachineOperand &Root) const {
7297 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7298
7299 // We need a GEP.
7300 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7301 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7302 return std::nullopt;
7303
7304 // If this is used more than once, let's not bother folding.
7305 // TODO: Check if they are memory ops. If they are, then we can still fold
7306 // without having to recompute anything.
7307 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7308 return std::nullopt;
7309
7310 // Base is the GEP's LHS, offset is its RHS.
7311 return {{[=](MachineInstrBuilder &MIB) {
7312 MIB.addUse(Gep->getOperand(1).getReg());
7313 },
7314 [=](MachineInstrBuilder &MIB) {
7315 MIB.addUse(Gep->getOperand(2).getReg());
7316 },
7317 [=](MachineInstrBuilder &MIB) {
7318 // Need to add both immediates here to make sure that they are both
7319 // added to the instruction.
7320 MIB.addImm(0);
7321 MIB.addImm(0);
7322 }}};
7323}
7324
7325/// This is intended to be equivalent to selectAddrModeXRO in
7326/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7327InstructionSelector::ComplexRendererFns
7328AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7329 unsigned SizeInBytes) const {
7330 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7331 if (!Root.isReg())
7332 return std::nullopt;
7333 MachineInstr *PtrAdd =
7334 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7335 if (!PtrAdd)
7336 return std::nullopt;
7337
7338 // Check for an immediates which cannot be encoded in the [base + imm]
7339 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7340 // end up with code like:
7341 //
7342 // mov x0, wide
7343 // add x1 base, x0
7344 // ldr x2, [x1, x0]
7345 //
7346 // In this situation, we can use the [base, xreg] addressing mode to save an
7347 // add/sub:
7348 //
7349 // mov x0, wide
7350 // ldr x2, [base, x0]
7351 auto ValAndVReg =
7353 if (ValAndVReg) {
7354 unsigned Scale = Log2_32(SizeInBytes);
7355 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7356
7357 // Skip immediates that can be selected in the load/store addressing
7358 // mode.
7359 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7360 ImmOff < (0x1000 << Scale))
7361 return std::nullopt;
7362
7363 // Helper lambda to decide whether or not it is preferable to emit an add.
7364 auto isPreferredADD = [](int64_t ImmOff) {
7365 // Constants in [0x0, 0xfff] can be encoded in an add.
7366 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7367 return true;
7368
7369 // Can it be encoded in an add lsl #12?
7370 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7371 return false;
7372
7373 // It can be encoded in an add lsl #12, but we may not want to. If it is
7374 // possible to select this as a single movz, then prefer that. A single
7375 // movz is faster than an add with a shift.
7376 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7377 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7378 };
7379
7380 // If the immediate can be encoded in a single add/sub, then bail out.
7381 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7382 return std::nullopt;
7383 }
7384
7385 // Try to fold shifts into the addressing mode.
7386 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7387 if (AddrModeFns)
7388 return AddrModeFns;
7389
7390 // If that doesn't work, see if it's possible to fold in registers from
7391 // a GEP.
7392 return selectAddrModeRegisterOffset(Root);
7393}
7394
7395/// This is used for computing addresses like this:
7396///
7397/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7398///
7399/// Where we have a 64-bit base register, a 32-bit offset register, and an
7400/// extend (which may or may not be signed).
7401InstructionSelector::ComplexRendererFns
7402AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7403 unsigned SizeInBytes) const {
7404 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7405
7406 MachineInstr *PtrAdd =
7407 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7408 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7409 return std::nullopt;
7410
7411 MachineOperand &LHS = PtrAdd->getOperand(1);
7412 MachineOperand &RHS = PtrAdd->getOperand(2);
7413 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7414
7415 // The first case is the same as selectAddrModeXRO, except we need an extend.
7416 // In this case, we try to find a shift and extend, and fold them into the
7417 // addressing mode.
7418 //
7419 // E.g.
7420 //
7421 // off_reg = G_Z/S/ANYEXT ext_reg
7422 // val = G_CONSTANT LegalShiftVal
7423 // shift = G_SHL off_reg val
7424 // ptr = G_PTR_ADD base_reg shift
7425 // x = G_LOAD ptr
7426 //
7427 // In this case we can get a load like this:
7428 //
7429 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7430 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7431 SizeInBytes, /*WantsExt=*/true);
7432 if (ExtendedShl)
7433 return ExtendedShl;
7434
7435 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7436 //
7437 // e.g.
7438 // ldr something, [base_reg, ext_reg, sxtw]
7439 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7440 return std::nullopt;
7441
7442 // Check if this is an extend. We'll get an extend type if it is.
7444 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7446 return std::nullopt;
7447
7448 // Need a 32-bit wide register.
7449 MachineIRBuilder MIB(*PtrAdd);
7450 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7451 AArch64::GPR32RegClass, MIB);
7452 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7453
7454 // Base is LHS, offset is ExtReg.
7455 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7456 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7457 [=](MachineInstrBuilder &MIB) {
7458 MIB.addImm(SignExtend);
7459 MIB.addImm(0);
7460 }}};
7461}
7462
7463/// Select a "register plus unscaled signed 9-bit immediate" address. This
7464/// should only match when there is an offset that is not valid for a scaled
7465/// immediate addressing mode. The "Size" argument is the size in bytes of the
7466/// memory reference, which is needed here to know what is valid for a scaled
7467/// immediate.
7468InstructionSelector::ComplexRendererFns
7469AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7470 unsigned Size) const {
7471 MachineRegisterInfo &MRI =
7472 Root.getParent()->getParent()->getParent()->getRegInfo();
7473
7474 if (!Root.isReg())
7475 return std::nullopt;
7476
7477 if (!isBaseWithConstantOffset(Root, MRI))
7478 return std::nullopt;
7479
7480 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7481
7482 MachineOperand &OffImm = RootDef->getOperand(2);
7483 if (!OffImm.isReg())
7484 return std::nullopt;
7485 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7486 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7487 return std::nullopt;
7488 int64_t RHSC;
7489 MachineOperand &RHSOp1 = RHS->getOperand(1);
7490 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7491 return std::nullopt;
7492 RHSC = RHSOp1.getCImm()->getSExtValue();
7493
7494 if (RHSC >= -256 && RHSC < 256) {
7495 MachineOperand &Base = RootDef->getOperand(1);
7496 return {{
7497 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7498 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7499 }};
7500 }
7501 return std::nullopt;
7502}
7503
7504InstructionSelector::ComplexRendererFns
7505AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7506 unsigned Size,
7507 MachineRegisterInfo &MRI) const {
7508 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7509 return std::nullopt;
7510 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7511 if (Adrp.getOpcode() != AArch64::ADRP)
7512 return std::nullopt;
7513
7514 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7515 auto Offset = Adrp.getOperand(1).getOffset();
7516 if (Offset % Size != 0)
7517 return std::nullopt;
7518
7519 auto GV = Adrp.getOperand(1).getGlobal();
7520 if (GV->isThreadLocal())
7521 return std::nullopt;
7522
7523 auto &MF = *RootDef.getParent()->getParent();
7524 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7525 return std::nullopt;
7526
7527 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7528 MachineIRBuilder MIRBuilder(RootDef);
7529 Register AdrpReg = Adrp.getOperand(0).getReg();
7530 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7531 [=](MachineInstrBuilder &MIB) {
7532 MIB.addGlobalAddress(GV, Offset,
7533 OpFlags | AArch64II::MO_PAGEOFF |
7535 }}};
7536}
7537
7538/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7539/// "Size" argument is the size in bytes of the memory reference, which
7540/// determines the scale.
7541InstructionSelector::ComplexRendererFns
7542AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7543 unsigned Size) const {
7544 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7545 MachineRegisterInfo &MRI = MF.getRegInfo();
7546
7547 if (!Root.isReg())
7548 return std::nullopt;
7549
7550 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7551 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7552 return {{
7553 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7554 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7555 }};
7556 }
7557
7559 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7560 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7561 // globals into the offset.
7562 MachineInstr *RootParent = Root.getParent();
7563 if (CM == CodeModel::Small &&
7564 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7565 STI.isTargetDarwin())) {
7566 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7567 if (OpFns)
7568 return OpFns;
7569 }
7570
7571 if (isBaseWithConstantOffset(Root, MRI)) {
7572 MachineOperand &LHS = RootDef->getOperand(1);
7573 MachineOperand &RHS = RootDef->getOperand(2);
7574 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7575 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7576
7577 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7578 unsigned Scale = Log2_32(Size);
7579 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7580 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7581 return {{
7582 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7583 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7584 }};
7585
7586 return {{
7587 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7588 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7589 }};
7590 }
7591 }
7592
7593 // Before falling back to our general case, check if the unscaled
7594 // instructions can handle this. If so, that's preferable.
7595 if (selectAddrModeUnscaled(Root, Size))
7596 return std::nullopt;
7597
7598 return {{
7599 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7600 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7601 }};
7602}
7603
7604/// Given a shift instruction, return the correct shift type for that
7605/// instruction.
7607 switch (MI.getOpcode()) {
7608 default:
7610 case TargetOpcode::G_SHL:
7611 return AArch64_AM::LSL;
7612 case TargetOpcode::G_LSHR:
7613 return AArch64_AM::LSR;
7614 case TargetOpcode::G_ASHR:
7615 return AArch64_AM::ASR;
7616 case TargetOpcode::G_ROTR:
7617 return AArch64_AM::ROR;
7618 }
7619}
7620
7621/// Select a "shifted register" operand. If the value is not shifted, set the
7622/// shift operand to a default value of "lsl 0".
7623InstructionSelector::ComplexRendererFns
7624AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7625 bool AllowROR) const {
7626 if (!Root.isReg())
7627 return std::nullopt;
7628 MachineRegisterInfo &MRI =
7629 Root.getParent()->getParent()->getParent()->getRegInfo();
7630
7631 // Check if the operand is defined by an instruction which corresponds to
7632 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7633 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7635 if (ShType == AArch64_AM::InvalidShiftExtend)
7636 return std::nullopt;
7637 if (ShType == AArch64_AM::ROR && !AllowROR)
7638 return std::nullopt;
7639 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7640 return std::nullopt;
7641
7642 // Need an immediate on the RHS.
7643 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7644 auto Immed = getImmedFromMO(ShiftRHS);
7645 if (!Immed)
7646 return std::nullopt;
7647
7648 // We have something that we can fold. Fold in the shift's LHS and RHS into
7649 // the instruction.
7650 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7651 Register ShiftReg = ShiftLHS.getReg();
7652
7653 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7654 unsigned Val = *Immed & (NumBits - 1);
7655 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7656
7657 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7658 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7659}
7660
7661AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7662 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7663 unsigned Opc = MI.getOpcode();
7664
7665 // Handle explicit extend instructions first.
7666 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7667 unsigned Size;
7668 if (Opc == TargetOpcode::G_SEXT)
7669 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7670 else
7671 Size = MI.getOperand(2).getImm();
7672 assert(Size != 64 && "Extend from 64 bits?");
7673 switch (Size) {
7674 case 8:
7675 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7676 case 16:
7677 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7678 case 32:
7679 return AArch64_AM::SXTW;
7680 default:
7682 }
7683 }
7684
7685 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7686 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7687 assert(Size != 64 && "Extend from 64 bits?");
7688 switch (Size) {
7689 case 8:
7690 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7691 case 16:
7692 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7693 case 32:
7694 return AArch64_AM::UXTW;
7695 default:
7697 }
7698 }
7699
7700 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7701 // on the RHS.
7702 if (Opc != TargetOpcode::G_AND)
7704
7705 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7706 if (!MaybeAndMask)
7708 uint64_t AndMask = *MaybeAndMask;
7709 switch (AndMask) {
7710 default:
7712 case 0xFF:
7713 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7714 case 0xFFFF:
7715 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7716 case 0xFFFFFFFF:
7717 return AArch64_AM::UXTW;
7718 }
7719}
7720
7721Register AArch64InstructionSelector::moveScalarRegClass(
7722 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7723 MachineRegisterInfo &MRI = *MIB.getMRI();
7724 auto Ty = MRI.getType(Reg);
7725 assert(!Ty.isVector() && "Expected scalars only!");
7726 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7727 return Reg;
7728
7729 // Create a copy and immediately select it.
7730 // FIXME: We should have an emitCopy function?
7731 auto Copy = MIB.buildCopy({&RC}, {Reg});
7732 selectCopy(*Copy, TII, MRI, TRI, RBI);
7733 return Copy.getReg(0);
7734}
7735
7736/// Select an "extended register" operand. This operand folds in an extend
7737/// followed by an optional left shift.
7738InstructionSelector::ComplexRendererFns
7739AArch64InstructionSelector::selectArithExtendedRegister(
7740 MachineOperand &Root) const {
7741 if (!Root.isReg())
7742 return std::nullopt;
7743 MachineRegisterInfo &MRI =
7744 Root.getParent()->getParent()->getParent()->getRegInfo();
7745
7746 uint64_t ShiftVal = 0;
7747 Register ExtReg;
7749 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7750 if (!RootDef)
7751 return std::nullopt;
7752
7753 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7754 return std::nullopt;
7755
7756 // Check if we can fold a shift and an extend.
7757 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7758 // Look for a constant on the RHS of the shift.
7759 MachineOperand &RHS = RootDef->getOperand(2);
7760 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7761 if (!MaybeShiftVal)
7762 return std::nullopt;
7763 ShiftVal = *MaybeShiftVal;
7764 if (ShiftVal > 4)
7765 return std::nullopt;
7766 // Look for a valid extend instruction on the LHS of the shift.
7767 MachineOperand &LHS = RootDef->getOperand(1);
7768 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7769 if (!ExtDef)
7770 return std::nullopt;
7771 Ext = getExtendTypeForInst(*ExtDef, MRI);
7773 return std::nullopt;
7774 ExtReg = ExtDef->getOperand(1).getReg();
7775 } else {
7776 // Didn't get a shift. Try just folding an extend.
7777 Ext = getExtendTypeForInst(*RootDef, MRI);
7779 return std::nullopt;
7780 ExtReg = RootDef->getOperand(1).getReg();
7781
7782 // If we have a 32 bit instruction which zeroes out the high half of a
7783 // register, we get an implicit zero extend for free. Check if we have one.
7784 // FIXME: We actually emit the extend right now even though we don't have
7785 // to.
7786 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7787 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7788 if (isDef32(*ExtInst))
7789 return std::nullopt;
7790 }
7791 }
7792
7793 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7794 // copy.
7795 MachineIRBuilder MIB(*RootDef);
7796 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7797
7798 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7799 [=](MachineInstrBuilder &MIB) {
7800 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7801 }}};
7802}
7803
7804InstructionSelector::ComplexRendererFns
7805AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7806 if (!Root.isReg())
7807 return std::nullopt;
7808 MachineRegisterInfo &MRI =
7809 Root.getParent()->getParent()->getParent()->getRegInfo();
7810
7811 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7812 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7813 STI.isLittleEndian())
7814 Extract =
7815 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7816 if (!Extract)
7817 return std::nullopt;
7818
7819 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7820 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7821 Register ExtReg = Extract->MI->getOperand(2).getReg();
7822 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7823 }
7824 }
7825 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7826 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7828 Extract->MI->getOperand(2).getReg(), MRI);
7829 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7830 LaneIdx->Value.getSExtValue() == 1) {
7831 Register ExtReg = Extract->MI->getOperand(1).getReg();
7832 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7833 }
7834 }
7835
7836 return std::nullopt;
7837}
7838
7839void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7840 const MachineInstr &MI,
7841 int OpIdx) const {
7842 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7843 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7844 "Expected G_CONSTANT");
7845 std::optional<int64_t> CstVal =
7846 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7847 assert(CstVal && "Expected constant value");
7848 MIB.addImm(*CstVal);
7849}
7850
7851void AArch64InstructionSelector::renderLogicalImm32(
7852 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7853 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7854 "Expected G_CONSTANT");
7855 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7856 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7857 MIB.addImm(Enc);
7858}
7859
7860void AArch64InstructionSelector::renderLogicalImm64(
7861 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7862 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7863 "Expected G_CONSTANT");
7864 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7865 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7866 MIB.addImm(Enc);
7867}
7868
7869void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7870 const MachineInstr &MI,
7871 int OpIdx) const {
7872 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7873 "Expected G_UBSANTRAP");
7874 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7875}
7876
7877void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7878 const MachineInstr &MI,
7879 int OpIdx) const {
7880 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7881 "Expected G_FCONSTANT");
7882 MIB.addImm(
7883 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7884}
7885
7886void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7887 const MachineInstr &MI,
7888 int OpIdx) const {
7889 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7890 "Expected G_FCONSTANT");
7891 MIB.addImm(
7892 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7893}
7894
7895void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7896 const MachineInstr &MI,
7897 int OpIdx) const {
7898 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7899 "Expected G_FCONSTANT");
7900 MIB.addImm(
7901 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7902}
7903
7904void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7905 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7906 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7907 "Expected G_FCONSTANT");
7909 .getFPImm()
7910 ->getValueAPF()
7911 .bitcastToAPInt()
7912 .getZExtValue()));
7913}
7914
7915bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7916 const MachineInstr &MI, unsigned NumBytes) const {
7917 if (!MI.mayLoadOrStore())
7918 return false;
7919 assert(MI.hasOneMemOperand() &&
7920 "Expected load/store to have only one mem op!");
7921 return (*MI.memoperands_begin())->getSize() == NumBytes;
7922}
7923
7924bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7925 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7926 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7927 return false;
7928
7929 // Only return true if we know the operation will zero-out the high half of
7930 // the 64-bit register. Truncates can be subregister copies, which don't
7931 // zero out the high bits. Copies and other copy-like instructions can be
7932 // fed by truncates, or could be lowered as subregister copies.
7933 switch (MI.getOpcode()) {
7934 default:
7935 return true;
7936 case TargetOpcode::COPY:
7937 case TargetOpcode::G_BITCAST:
7938 case TargetOpcode::G_TRUNC:
7939 case TargetOpcode::G_PHI:
7940 return false;
7941 }
7942}
7943
7944
7945// Perform fixups on the given PHI instruction's operands to force them all
7946// to be the same as the destination regbank.
7948 const AArch64RegisterBankInfo &RBI) {
7949 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7950 Register DstReg = MI.getOperand(0).getReg();
7951 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7952 assert(DstRB && "Expected PHI dst to have regbank assigned");
7953 MachineIRBuilder MIB(MI);
7954
7955 // Go through each operand and ensure it has the same regbank.
7956 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7957 if (!MO.isReg())
7958 continue;
7959 Register OpReg = MO.getReg();
7960 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7961 if (RB != DstRB) {
7962 // Insert a cross-bank copy.
7963 auto *OpDef = MRI.getVRegDef(OpReg);
7964 const LLT &Ty = MRI.getType(OpReg);
7965 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7966
7967 // Any instruction we insert must appear after all PHIs in the block
7968 // for the block to be valid MIR.
7969 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7970 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7971 InsertPt = OpDefBB.getFirstNonPHI();
7972 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7973 auto Copy = MIB.buildCopy(Ty, OpReg);
7974 MRI.setRegBank(Copy.getReg(0), *DstRB);
7975 MO.setReg(Copy.getReg(0));
7976 }
7977 }
7978}
7979
7980void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7981 // We're looking for PHIs, build a list so we don't invalidate iterators.
7982 MachineRegisterInfo &MRI = MF.getRegInfo();
7984 for (auto &BB : MF) {
7985 for (auto &MI : BB) {
7986 if (MI.getOpcode() == TargetOpcode::G_PHI)
7987 Phis.emplace_back(&MI);
7988 }
7989 }
7990
7991 for (auto *MI : Phis) {
7992 // We need to do some work here if the operand types are < 16 bit and they
7993 // are split across fpr/gpr banks. Since all types <32b on gpr
7994 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7995 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7996 // be selecting heterogenous regbanks for operands if possible, but we
7997 // still need to be able to deal with it here.
7998 //
7999 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8000 // one other operand is on the fpr bank, then we add cross-bank copies
8001 // to homogenize the operand banks. For simplicity the bank that we choose
8002 // to settle on is whatever bank the def operand has. For example:
8003 //
8004 // %endbb:
8005 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8006 // =>
8007 // %bb2:
8008 // ...
8009 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8010 // ...
8011 // %endbb:
8012 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8013 bool HasGPROp = false, HasFPROp = false;
8014 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8015 if (!MO.isReg())
8016 continue;
8017 const LLT &Ty = MRI.getType(MO.getReg());
8018 if (!Ty.isValid() || !Ty.isScalar())
8019 break;
8020 if (Ty.getSizeInBits() >= 32)
8021 break;
8022 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8023 // If for some reason we don't have a regbank yet. Don't try anything.
8024 if (!RB)
8025 break;
8026
8027 if (RB->getID() == AArch64::GPRRegBankID)
8028 HasGPROp = true;
8029 else
8030 HasFPROp = true;
8031 }
8032 // We have heterogenous regbanks, need to fixup.
8033 if (HasGPROp && HasFPROp)
8034 fixupPHIOpBanks(*MI, MRI, RBI);
8035 }
8036}
8037
8038namespace llvm {
8039InstructionSelector *
8041 const AArch64Subtarget &Subtarget,
8042 const AArch64RegisterBankInfo &RBI) {
8043 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8044}
8045}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:320
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:327
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:324
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:557
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:915
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.