LLVM 22.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
519 const AArch64TargetMachine &TM;
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
619 if (SizeInBits <= 32)
620 return GetAllRegSet ? &AArch64::GPR32allRegClass
621 : &AArch64::GPR32RegClass;
622 if (SizeInBits == 64)
623 return GetAllRegSet ? &AArch64::GPR64allRegClass
624 : &AArch64::GPR64RegClass;
625 if (SizeInBits == 128)
626 return &AArch64::XSeqPairsClassRegClass;
627 }
628
629 if (RegBankID == AArch64::FPRRegBankID) {
630 if (SizeInBits.isScalable()) {
631 assert(SizeInBits == TypeSize::getScalable(128) &&
632 "Unexpected scalable register size");
633 return &AArch64::ZPRRegClass;
634 }
635
636 switch (SizeInBits) {
637 default:
638 return nullptr;
639 case 8:
640 return &AArch64::FPR8RegClass;
641 case 16:
642 return &AArch64::FPR16RegClass;
643 case 32:
644 return &AArch64::FPR32RegClass;
645 case 64:
646 return &AArch64::FPR64RegClass;
647 case 128:
648 return &AArch64::FPR128RegClass;
649 }
650 }
651
652 return nullptr;
653}
654
655/// Returns the correct subregister to use for a given register class.
657 const TargetRegisterInfo &TRI, unsigned &SubReg) {
658 switch (TRI.getRegSizeInBits(*RC)) {
659 case 8:
660 SubReg = AArch64::bsub;
661 break;
662 case 16:
663 SubReg = AArch64::hsub;
664 break;
665 case 32:
666 if (RC != &AArch64::FPR32RegClass)
667 SubReg = AArch64::sub_32;
668 else
669 SubReg = AArch64::ssub;
670 break;
671 case 64:
672 SubReg = AArch64::dsub;
673 break;
674 default:
676 dbgs() << "Couldn't find appropriate subregister for register class.");
677 return false;
678 }
679
680 return true;
681}
682
683/// Returns the minimum size the given register bank can hold.
684static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
685 switch (RB.getID()) {
686 case AArch64::GPRRegBankID:
687 return 32;
688 case AArch64::FPRRegBankID:
689 return 8;
690 default:
691 llvm_unreachable("Tried to get minimum size for unknown register bank.");
692 }
693}
694
695/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
696/// Helper function for functions like createDTuple and createQTuple.
697///
698/// \p RegClassIDs - The list of register class IDs available for some tuple of
699/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
700/// expected to contain between 2 and 4 tuple classes.
701///
702/// \p SubRegs - The list of subregister classes associated with each register
703/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
704/// subregister class. The index of each subregister class is expected to
705/// correspond with the index of each register class.
706///
707/// \returns Either the destination register of REG_SEQUENCE instruction that
708/// was created, or the 0th element of \p Regs if \p Regs contains a single
709/// element.
711 const unsigned RegClassIDs[],
712 const unsigned SubRegs[], MachineIRBuilder &MIB) {
713 unsigned NumRegs = Regs.size();
714 if (NumRegs == 1)
715 return Regs[0];
716 assert(NumRegs >= 2 && NumRegs <= 4 &&
717 "Only support between two and 4 registers in a tuple!");
719 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
720 auto RegSequence =
721 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
722 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
723 RegSequence.addUse(Regs[I]);
724 RegSequence.addImm(SubRegs[I]);
725 }
726 return RegSequence.getReg(0);
727}
728
729/// Create a tuple of D-registers using the registers in \p Regs.
731 static const unsigned RegClassIDs[] = {
732 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
733 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
734 AArch64::dsub2, AArch64::dsub3};
735 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
736}
737
738/// Create a tuple of Q-registers using the registers in \p Regs.
740 static const unsigned RegClassIDs[] = {
741 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
742 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
743 AArch64::qsub2, AArch64::qsub3};
744 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
745}
746
747static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
748 auto &MI = *Root.getParent();
749 auto &MBB = *MI.getParent();
750 auto &MF = *MBB.getParent();
751 auto &MRI = MF.getRegInfo();
752 uint64_t Immed;
753 if (Root.isImm())
754 Immed = Root.getImm();
755 else if (Root.isCImm())
756 Immed = Root.getCImm()->getZExtValue();
757 else if (Root.isReg()) {
758 auto ValAndVReg =
760 if (!ValAndVReg)
761 return std::nullopt;
762 Immed = ValAndVReg->Value.getSExtValue();
763 } else
764 return std::nullopt;
765 return Immed;
766}
767
768/// Check whether \p I is a currently unsupported binary operation:
769/// - it has an unsized type
770/// - an operand is not a vreg
771/// - all operands are not in the same bank
772/// These are checks that should someday live in the verifier, but right now,
773/// these are mostly limitations of the aarch64 selector.
774static bool unsupportedBinOp(const MachineInstr &I,
775 const AArch64RegisterBankInfo &RBI,
777 const AArch64RegisterInfo &TRI) {
778 LLT Ty = MRI.getType(I.getOperand(0).getReg());
779 if (!Ty.isValid()) {
780 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
781 return true;
782 }
783
784 const RegisterBank *PrevOpBank = nullptr;
785 for (auto &MO : I.operands()) {
786 // FIXME: Support non-register operands.
787 if (!MO.isReg()) {
788 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
789 return true;
790 }
791
792 // FIXME: Can generic operations have physical registers operands? If
793 // so, this will need to be taught about that, and we'll need to get the
794 // bank out of the minimal class for the register.
795 // Either way, this needs to be documented (and possibly verified).
796 if (!MO.getReg().isVirtual()) {
797 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
798 return true;
799 }
800
801 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
802 if (!OpBank) {
803 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
804 return true;
805 }
806
807 if (PrevOpBank && OpBank != PrevOpBank) {
808 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
809 return true;
810 }
811 PrevOpBank = OpBank;
812 }
813 return false;
814}
815
816/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
817/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
818/// and of size \p OpSize.
819/// \returns \p GenericOpc if the combination is unsupported.
820static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
821 unsigned OpSize) {
822 switch (RegBankID) {
823 case AArch64::GPRRegBankID:
824 if (OpSize == 32) {
825 switch (GenericOpc) {
826 case TargetOpcode::G_SHL:
827 return AArch64::LSLVWr;
828 case TargetOpcode::G_LSHR:
829 return AArch64::LSRVWr;
830 case TargetOpcode::G_ASHR:
831 return AArch64::ASRVWr;
832 default:
833 return GenericOpc;
834 }
835 } else if (OpSize == 64) {
836 switch (GenericOpc) {
837 case TargetOpcode::G_PTR_ADD:
838 return AArch64::ADDXrr;
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVXr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVXr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVXr;
845 default:
846 return GenericOpc;
847 }
848 }
849 break;
850 case AArch64::FPRRegBankID:
851 switch (OpSize) {
852 case 32:
853 switch (GenericOpc) {
854 case TargetOpcode::G_FADD:
855 return AArch64::FADDSrr;
856 case TargetOpcode::G_FSUB:
857 return AArch64::FSUBSrr;
858 case TargetOpcode::G_FMUL:
859 return AArch64::FMULSrr;
860 case TargetOpcode::G_FDIV:
861 return AArch64::FDIVSrr;
862 default:
863 return GenericOpc;
864 }
865 case 64:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDDrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBDrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULDrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVDrr;
875 case TargetOpcode::G_OR:
876 return AArch64::ORRv8i8;
877 default:
878 return GenericOpc;
879 }
880 }
881 break;
882 }
883 return GenericOpc;
884}
885
886/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
887/// appropriate for the (value) register bank \p RegBankID and of memory access
888/// size \p OpSize. This returns the variant with the base+unsigned-immediate
889/// addressing mode (e.g., LDRXui).
890/// \returns \p GenericOpc if the combination is unsupported.
891static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
892 unsigned OpSize) {
893 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
894 switch (RegBankID) {
895 case AArch64::GPRRegBankID:
896 switch (OpSize) {
897 case 8:
898 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
899 case 16:
900 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
901 case 32:
902 return isStore ? AArch64::STRWui : AArch64::LDRWui;
903 case 64:
904 return isStore ? AArch64::STRXui : AArch64::LDRXui;
905 }
906 break;
907 case AArch64::FPRRegBankID:
908 switch (OpSize) {
909 case 8:
910 return isStore ? AArch64::STRBui : AArch64::LDRBui;
911 case 16:
912 return isStore ? AArch64::STRHui : AArch64::LDRHui;
913 case 32:
914 return isStore ? AArch64::STRSui : AArch64::LDRSui;
915 case 64:
916 return isStore ? AArch64::STRDui : AArch64::LDRDui;
917 case 128:
918 return isStore ? AArch64::STRQui : AArch64::LDRQui;
919 }
920 break;
921 }
922 return GenericOpc;
923}
924
925/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
926/// to \p *To.
927///
928/// E.g "To = COPY SrcReg:SubReg"
930 const RegisterBankInfo &RBI, Register SrcReg,
931 const TargetRegisterClass *To, unsigned SubReg) {
932 assert(SrcReg.isValid() && "Expected a valid source register?");
933 assert(To && "Destination register class cannot be null");
934 assert(SubReg && "Expected a valid subregister");
935
936 MachineIRBuilder MIB(I);
937 auto SubRegCopy =
938 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
939 MachineOperand &RegOp = I.getOperand(1);
940 RegOp.setReg(SubRegCopy.getReg(0));
941
942 // It's possible that the destination register won't be constrained. Make
943 // sure that happens.
944 if (!I.getOperand(0).getReg().isPhysical())
945 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
946
947 return true;
948}
949
950/// Helper function to get the source and destination register classes for a
951/// copy. Returns a std::pair containing the source register class for the
952/// copy, and the destination register class for the copy. If a register class
953/// cannot be determined, then it will be nullptr.
954static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
957 const RegisterBankInfo &RBI) {
958 Register DstReg = I.getOperand(0).getReg();
959 Register SrcReg = I.getOperand(1).getReg();
960 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
961 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
962
963 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
964 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
965
966 // Special casing for cross-bank copies of s1s. We can technically represent
967 // a 1-bit value with any size of register. The minimum size for a GPR is 32
968 // bits. So, we need to put the FPR on 32 bits as well.
969 //
970 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
971 // then we can pull it into the helpers that get the appropriate class for a
972 // register bank. Or make a new helper that carries along some constraint
973 // information.
974 if (SrcRegBank != DstRegBank &&
975 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
976 SrcSize = DstSize = TypeSize::getFixed(32);
977
978 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
979 getMinClassForRegBank(DstRegBank, DstSize, true)};
980}
981
982// FIXME: We need some sort of API in RBI/TRI to allow generic code to
983// constrain operands of simple instructions given a TargetRegisterClass
984// and LLT
986 const RegisterBankInfo &RBI) {
987 for (MachineOperand &MO : I.operands()) {
988 if (!MO.isReg())
989 continue;
990 Register Reg = MO.getReg();
991 if (!Reg)
992 continue;
993 if (Reg.isPhysical())
994 continue;
995 LLT Ty = MRI.getType(Reg);
996 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
997 const TargetRegisterClass *RC =
999 if (!RC) {
1000 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1001 RC = getRegClassForTypeOnBank(Ty, RB);
1002 if (!RC) {
1003 LLVM_DEBUG(
1004 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1005 break;
1006 }
1007 }
1008 RBI.constrainGenericRegister(Reg, *RC, MRI);
1009 }
1010
1011 return true;
1012}
1013
1016 const RegisterBankInfo &RBI) {
1017 Register DstReg = I.getOperand(0).getReg();
1018 Register SrcReg = I.getOperand(1).getReg();
1019 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1020 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1021
1022 // Find the correct register classes for the source and destination registers.
1023 const TargetRegisterClass *SrcRC;
1024 const TargetRegisterClass *DstRC;
1025 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1026
1027 if (!DstRC) {
1028 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1029 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1030 return false;
1031 }
1032
1033 // Is this a copy? If so, then we may need to insert a subregister copy.
1034 if (I.isCopy()) {
1035 // Yes. Check if there's anything to fix up.
1036 if (!SrcRC) {
1037 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1038 return false;
1039 }
1040
1041 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1042 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1043 unsigned SubReg;
1044
1045 // If the source bank doesn't support a subregister copy small enough,
1046 // then we first need to copy to the destination bank.
1047 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1048 const TargetRegisterClass *DstTempRC =
1049 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1050 getSubRegForClass(DstRC, TRI, SubReg);
1051
1052 MachineIRBuilder MIB(I);
1053 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1054 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1055 } else if (SrcSize > DstSize) {
1056 // If the source register is bigger than the destination we need to
1057 // perform a subregister copy.
1058 const TargetRegisterClass *SubRegRC =
1059 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1060 getSubRegForClass(SubRegRC, TRI, SubReg);
1061 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1062 } else if (DstSize > SrcSize) {
1063 // If the destination register is bigger than the source we need to do
1064 // a promotion using SUBREG_TO_REG.
1065 const TargetRegisterClass *PromotionRC =
1066 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1067 getSubRegForClass(SrcRC, TRI, SubReg);
1068
1069 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1070 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1071 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1072 .addImm(0)
1073 .addUse(SrcReg)
1074 .addImm(SubReg);
1075 MachineOperand &RegOp = I.getOperand(1);
1076 RegOp.setReg(PromoteReg);
1077 }
1078
1079 // If the destination is a physical register, then there's nothing to
1080 // change, so we're done.
1081 if (DstReg.isPhysical())
1082 return true;
1083 }
1084
1085 // No need to constrain SrcReg. It will get constrained when we hit another
1086 // of its use or its defs. Copies do not have constraints.
1087 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1088 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1089 << " operand\n");
1090 return false;
1091 }
1092
1093 // If this a GPR ZEXT that we want to just reduce down into a copy.
1094 // The sizes will be mismatched with the source < 32b but that's ok.
1095 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1096 I.setDesc(TII.get(AArch64::COPY));
1097 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1098 return selectCopy(I, TII, MRI, TRI, RBI);
1099 }
1100
1101 I.setDesc(TII.get(AArch64::COPY));
1102 return true;
1103}
1104
1106AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1107 Register False, AArch64CC::CondCode CC,
1108 MachineIRBuilder &MIB) const {
1109 MachineRegisterInfo &MRI = *MIB.getMRI();
1110 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1111 RBI.getRegBank(True, MRI, TRI)->getID() &&
1112 "Expected both select operands to have the same regbank?");
1113 LLT Ty = MRI.getType(True);
1114 if (Ty.isVector())
1115 return nullptr;
1116 const unsigned Size = Ty.getSizeInBits();
1117 assert((Size == 32 || Size == 64) &&
1118 "Expected 32 bit or 64 bit select only?");
1119 const bool Is32Bit = Size == 32;
1120 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1121 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1122 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1124 return &*FCSel;
1125 }
1126
1127 // By default, we'll try and emit a CSEL.
1128 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1129 bool Optimized = false;
1130 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1131 &Optimized](Register &Reg, Register &OtherReg,
1132 bool Invert) {
1133 if (Optimized)
1134 return false;
1135
1136 // Attempt to fold:
1137 //
1138 // %sub = G_SUB 0, %x
1139 // %select = G_SELECT cc, %reg, %sub
1140 //
1141 // Into:
1142 // %select = CSNEG %reg, %x, cc
1143 Register MatchReg;
1144 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1145 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1146 Reg = MatchReg;
1147 if (Invert) {
1149 std::swap(Reg, OtherReg);
1150 }
1151 return true;
1152 }
1153
1154 // Attempt to fold:
1155 //
1156 // %xor = G_XOR %x, -1
1157 // %select = G_SELECT cc, %reg, %xor
1158 //
1159 // Into:
1160 // %select = CSINV %reg, %x, cc
1161 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1162 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1163 Reg = MatchReg;
1164 if (Invert) {
1166 std::swap(Reg, OtherReg);
1167 }
1168 return true;
1169 }
1170
1171 // Attempt to fold:
1172 //
1173 // %add = G_ADD %x, 1
1174 // %select = G_SELECT cc, %reg, %add
1175 //
1176 // Into:
1177 // %select = CSINC %reg, %x, cc
1178 if (mi_match(Reg, MRI,
1179 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1180 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1181 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1182 Reg = MatchReg;
1183 if (Invert) {
1185 std::swap(Reg, OtherReg);
1186 }
1187 return true;
1188 }
1189
1190 return false;
1191 };
1192
1193 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1194 // true/false values are constants.
1195 // FIXME: All of these patterns already exist in tablegen. We should be
1196 // able to import these.
1197 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1198 &Optimized]() {
1199 if (Optimized)
1200 return false;
1201 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1202 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1203 if (!TrueCst && !FalseCst)
1204 return false;
1205
1206 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1207 if (TrueCst && FalseCst) {
1208 int64_t T = TrueCst->Value.getSExtValue();
1209 int64_t F = FalseCst->Value.getSExtValue();
1210
1211 if (T == 0 && F == 1) {
1212 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1213 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1214 True = ZReg;
1215 False = ZReg;
1216 return true;
1217 }
1218
1219 if (T == 0 && F == -1) {
1220 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1221 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1222 True = ZReg;
1223 False = ZReg;
1224 return true;
1225 }
1226 }
1227
1228 if (TrueCst) {
1229 int64_t T = TrueCst->Value.getSExtValue();
1230 if (T == 1) {
1231 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1232 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1233 True = False;
1234 False = ZReg;
1236 return true;
1237 }
1238
1239 if (T == -1) {
1240 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1241 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1242 True = False;
1243 False = ZReg;
1245 return true;
1246 }
1247 }
1248
1249 if (FalseCst) {
1250 int64_t F = FalseCst->Value.getSExtValue();
1251 if (F == 1) {
1252 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1253 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1254 False = ZReg;
1255 return true;
1256 }
1257
1258 if (F == -1) {
1259 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1260 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1261 False = ZReg;
1262 return true;
1263 }
1264 }
1265 return false;
1266 };
1267
1268 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1269 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1270 Optimized |= TryOptSelectCst();
1271 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1272 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1273 return &*SelectInst;
1274}
1275
1278 MachineRegisterInfo *MRI = nullptr) {
1279 switch (P) {
1280 default:
1281 llvm_unreachable("Unknown condition code!");
1282 case CmpInst::ICMP_NE:
1283 return AArch64CC::NE;
1284 case CmpInst::ICMP_EQ:
1285 return AArch64CC::EQ;
1286 case CmpInst::ICMP_SGT:
1287 return AArch64CC::GT;
1288 case CmpInst::ICMP_SGE:
1289 if (RHS && MRI) {
1290 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1291 if (ValAndVReg && ValAndVReg->Value == 0)
1292 return AArch64CC::PL;
1293 }
1294 return AArch64CC::GE;
1295 case CmpInst::ICMP_SLT:
1296 if (RHS && MRI) {
1297 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1299 return AArch64CC::MI;
1300 }
1301 return AArch64CC::LT;
1302 case CmpInst::ICMP_SLE:
1303 return AArch64CC::LE;
1304 case CmpInst::ICMP_UGT:
1305 return AArch64CC::HI;
1306 case CmpInst::ICMP_UGE:
1307 return AArch64CC::HS;
1308 case CmpInst::ICMP_ULT:
1309 return AArch64CC::LO;
1310 case CmpInst::ICMP_ULE:
1311 return AArch64CC::LS;
1312 }
1313}
1314
1315/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1317 AArch64CC::CondCode &CondCode,
1318 AArch64CC::CondCode &CondCode2) {
1319 CondCode2 = AArch64CC::AL;
1320 switch (CC) {
1321 default:
1322 llvm_unreachable("Unknown FP condition!");
1323 case CmpInst::FCMP_OEQ:
1324 CondCode = AArch64CC::EQ;
1325 break;
1326 case CmpInst::FCMP_OGT:
1327 CondCode = AArch64CC::GT;
1328 break;
1329 case CmpInst::FCMP_OGE:
1330 CondCode = AArch64CC::GE;
1331 break;
1332 case CmpInst::FCMP_OLT:
1333 CondCode = AArch64CC::MI;
1334 break;
1335 case CmpInst::FCMP_OLE:
1336 CondCode = AArch64CC::LS;
1337 break;
1338 case CmpInst::FCMP_ONE:
1339 CondCode = AArch64CC::MI;
1340 CondCode2 = AArch64CC::GT;
1341 break;
1342 case CmpInst::FCMP_ORD:
1343 CondCode = AArch64CC::VC;
1344 break;
1345 case CmpInst::FCMP_UNO:
1346 CondCode = AArch64CC::VS;
1347 break;
1348 case CmpInst::FCMP_UEQ:
1349 CondCode = AArch64CC::EQ;
1350 CondCode2 = AArch64CC::VS;
1351 break;
1352 case CmpInst::FCMP_UGT:
1353 CondCode = AArch64CC::HI;
1354 break;
1355 case CmpInst::FCMP_UGE:
1356 CondCode = AArch64CC::PL;
1357 break;
1358 case CmpInst::FCMP_ULT:
1359 CondCode = AArch64CC::LT;
1360 break;
1361 case CmpInst::FCMP_ULE:
1362 CondCode = AArch64CC::LE;
1363 break;
1364 case CmpInst::FCMP_UNE:
1365 CondCode = AArch64CC::NE;
1366 break;
1367 }
1368}
1369
1370/// Convert an IR fp condition code to an AArch64 CC.
1371/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1372/// should be AND'ed instead of OR'ed.
1374 AArch64CC::CondCode &CondCode,
1375 AArch64CC::CondCode &CondCode2) {
1376 CondCode2 = AArch64CC::AL;
1377 switch (CC) {
1378 default:
1379 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1380 assert(CondCode2 == AArch64CC::AL);
1381 break;
1382 case CmpInst::FCMP_ONE:
1383 // (a one b)
1384 // == ((a olt b) || (a ogt b))
1385 // == ((a ord b) && (a une b))
1386 CondCode = AArch64CC::VC;
1387 CondCode2 = AArch64CC::NE;
1388 break;
1389 case CmpInst::FCMP_UEQ:
1390 // (a ueq b)
1391 // == ((a uno b) || (a oeq b))
1392 // == ((a ule b) && (a uge b))
1393 CondCode = AArch64CC::PL;
1394 CondCode2 = AArch64CC::LE;
1395 break;
1396 }
1397}
1398
1399/// Return a register which can be used as a bit to test in a TB(N)Z.
1400static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1402 assert(Reg.isValid() && "Expected valid register!");
1403 bool HasZext = false;
1405 unsigned Opc = MI->getOpcode();
1406
1407 if (!MI->getOperand(0).isReg() ||
1408 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1409 break;
1410
1411 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1412 //
1413 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1414 // on the truncated x is the same as the bit number on x.
1415 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1416 Opc == TargetOpcode::G_TRUNC) {
1417 if (Opc == TargetOpcode::G_ZEXT)
1418 HasZext = true;
1419
1420 Register NextReg = MI->getOperand(1).getReg();
1421 // Did we find something worth folding?
1422 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1423 break;
1424
1425 // NextReg is worth folding. Keep looking.
1426 Reg = NextReg;
1427 continue;
1428 }
1429
1430 // Attempt to find a suitable operation with a constant on one side.
1431 std::optional<uint64_t> C;
1432 Register TestReg;
1433 switch (Opc) {
1434 default:
1435 break;
1436 case TargetOpcode::G_AND:
1437 case TargetOpcode::G_XOR: {
1438 TestReg = MI->getOperand(1).getReg();
1439 Register ConstantReg = MI->getOperand(2).getReg();
1440 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1441 if (!VRegAndVal) {
1442 // AND commutes, check the other side for a constant.
1443 // FIXME: Can we canonicalize the constant so that it's always on the
1444 // same side at some point earlier?
1445 std::swap(ConstantReg, TestReg);
1446 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1447 }
1448 if (VRegAndVal) {
1449 if (HasZext)
1450 C = VRegAndVal->Value.getZExtValue();
1451 else
1452 C = VRegAndVal->Value.getSExtValue();
1453 }
1454 break;
1455 }
1456 case TargetOpcode::G_ASHR:
1457 case TargetOpcode::G_LSHR:
1458 case TargetOpcode::G_SHL: {
1459 TestReg = MI->getOperand(1).getReg();
1460 auto VRegAndVal =
1461 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1462 if (VRegAndVal)
1463 C = VRegAndVal->Value.getSExtValue();
1464 break;
1465 }
1466 }
1467
1468 // Didn't find a constant or viable register. Bail out of the loop.
1469 if (!C || !TestReg.isValid())
1470 break;
1471
1472 // We found a suitable instruction with a constant. Check to see if we can
1473 // walk through the instruction.
1474 Register NextReg;
1475 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1476 switch (Opc) {
1477 default:
1478 break;
1479 case TargetOpcode::G_AND:
1480 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1481 if ((*C >> Bit) & 1)
1482 NextReg = TestReg;
1483 break;
1484 case TargetOpcode::G_SHL:
1485 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1486 // the type of the register.
1487 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1488 NextReg = TestReg;
1489 Bit = Bit - *C;
1490 }
1491 break;
1492 case TargetOpcode::G_ASHR:
1493 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1494 // in x
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 if (Bit >= TestRegSize)
1498 Bit = TestRegSize - 1;
1499 break;
1500 case TargetOpcode::G_LSHR:
1501 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1502 if ((Bit + *C) < TestRegSize) {
1503 NextReg = TestReg;
1504 Bit = Bit + *C;
1505 }
1506 break;
1507 case TargetOpcode::G_XOR:
1508 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1509 // appropriate.
1510 //
1511 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1512 //
1513 // tbz x', b -> tbnz x, b
1514 //
1515 // Because x' only has the b-th bit set if x does not.
1516 if ((*C >> Bit) & 1)
1517 Invert = !Invert;
1518 NextReg = TestReg;
1519 break;
1520 }
1521
1522 // Check if we found anything worth folding.
1523 if (!NextReg.isValid())
1524 return Reg;
1525 Reg = NextReg;
1526 }
1527
1528 return Reg;
1529}
1530
1531MachineInstr *AArch64InstructionSelector::emitTestBit(
1532 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1533 MachineIRBuilder &MIB) const {
1534 assert(TestReg.isValid());
1535 assert(ProduceNonFlagSettingCondBr &&
1536 "Cannot emit TB(N)Z with speculation tracking!");
1537 MachineRegisterInfo &MRI = *MIB.getMRI();
1538
1539 // Attempt to optimize the test bit by walking over instructions.
1540 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1541 LLT Ty = MRI.getType(TestReg);
1542 unsigned Size = Ty.getSizeInBits();
1543 assert(!Ty.isVector() && "Expected a scalar!");
1544 assert(Bit < 64 && "Bit is too large!");
1545
1546 // When the test register is a 64-bit register, we have to narrow to make
1547 // TBNZW work.
1548 bool UseWReg = Bit < 32;
1549 unsigned NecessarySize = UseWReg ? 32 : 64;
1550 if (Size != NecessarySize)
1551 TestReg = moveScalarRegClass(
1552 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1553 MIB);
1554
1555 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1556 {AArch64::TBZW, AArch64::TBNZW}};
1557 unsigned Opc = OpcTable[UseWReg][IsNegative];
1558 auto TestBitMI =
1559 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1560 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1561 return &*TestBitMI;
1562}
1563
1564bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1565 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1566 MachineIRBuilder &MIB) const {
1567 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1568 // Given something like this:
1569 //
1570 // %x = ...Something...
1571 // %one = G_CONSTANT i64 1
1572 // %zero = G_CONSTANT i64 0
1573 // %and = G_AND %x, %one
1574 // %cmp = G_ICMP intpred(ne), %and, %zero
1575 // %cmp_trunc = G_TRUNC %cmp
1576 // G_BRCOND %cmp_trunc, %bb.3
1577 //
1578 // We want to try and fold the AND into the G_BRCOND and produce either a
1579 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1580 //
1581 // In this case, we'd get
1582 //
1583 // TBNZ %x %bb.3
1584 //
1585
1586 // Check if the AND has a constant on its RHS which we can use as a mask.
1587 // If it's a power of 2, then it's the same as checking a specific bit.
1588 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1589 auto MaybeBit = getIConstantVRegValWithLookThrough(
1590 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1591 if (!MaybeBit)
1592 return false;
1593
1594 int32_t Bit = MaybeBit->Value.exactLogBase2();
1595 if (Bit < 0)
1596 return false;
1597
1598 Register TestReg = AndInst.getOperand(1).getReg();
1599
1600 // Emit a TB(N)Z.
1601 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1602 return true;
1603}
1604
1605MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1606 bool IsNegative,
1607 MachineBasicBlock *DestMBB,
1608 MachineIRBuilder &MIB) const {
1609 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1610 MachineRegisterInfo &MRI = *MIB.getMRI();
1611 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1612 AArch64::GPRRegBankID &&
1613 "Expected GPRs only?");
1614 auto Ty = MRI.getType(CompareReg);
1615 unsigned Width = Ty.getSizeInBits();
1616 assert(!Ty.isVector() && "Expected scalar only?");
1617 assert(Width <= 64 && "Expected width to be at most 64?");
1618 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1619 {AArch64::CBNZW, AArch64::CBNZX}};
1620 unsigned Opc = OpcTable[IsNegative][Width == 64];
1621 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1622 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1623 return &*BranchMI;
1624}
1625
1626bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1627 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1628 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1629 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1630 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1631 // totally clean. Some of them require two branches to implement.
1632 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1633 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1634 Pred);
1635 AArch64CC::CondCode CC1, CC2;
1636 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1637 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1638 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1639 if (CC2 != AArch64CC::AL)
1640 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1641 I.eraseFromParent();
1642 return true;
1643}
1644
1645bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1646 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1647 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1648 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1649 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1650 //
1651 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1652 // instructions will not be produced, as they are conditional branch
1653 // instructions that do not set flags.
1654 if (!ProduceNonFlagSettingCondBr)
1655 return false;
1656
1657 MachineRegisterInfo &MRI = *MIB.getMRI();
1658 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1659 auto Pred =
1660 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1661 Register LHS = ICmp.getOperand(2).getReg();
1662 Register RHS = ICmp.getOperand(3).getReg();
1663
1664 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1665 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1666 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1667
1668 // When we can emit a TB(N)Z, prefer that.
1669 //
1670 // Handle non-commutative condition codes first.
1671 // Note that we don't want to do this when we have a G_AND because it can
1672 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1673 if (VRegAndVal && !AndInst) {
1674 int64_t C = VRegAndVal->Value.getSExtValue();
1675
1676 // When we have a greater-than comparison, we can just test if the msb is
1677 // zero.
1678 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1679 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1680 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1681 I.eraseFromParent();
1682 return true;
1683 }
1684
1685 // When we have a less than comparison, we can just test if the msb is not
1686 // zero.
1687 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1688 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1689 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1690 I.eraseFromParent();
1691 return true;
1692 }
1693
1694 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1695 // we can test if the msb is zero.
1696 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1697 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1698 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1699 I.eraseFromParent();
1700 return true;
1701 }
1702 }
1703
1704 // Attempt to handle commutative condition codes. Right now, that's only
1705 // eq/ne.
1706 if (ICmpInst::isEquality(Pred)) {
1707 if (!VRegAndVal) {
1708 std::swap(RHS, LHS);
1710 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1711 }
1712
1713 if (VRegAndVal && VRegAndVal->Value == 0) {
1714 // If there's a G_AND feeding into this branch, try to fold it away by
1715 // emitting a TB(N)Z instead.
1716 //
1717 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1718 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1719 // would be redundant.
1720 if (AndInst &&
1721 tryOptAndIntoCompareBranch(
1722 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1723 I.eraseFromParent();
1724 return true;
1725 }
1726
1727 // Otherwise, try to emit a CB(N)Z instead.
1728 auto LHSTy = MRI.getType(LHS);
1729 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1730 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1731 I.eraseFromParent();
1732 return true;
1733 }
1734 }
1735 }
1736
1737 return false;
1738}
1739
1740bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1741 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1742 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1743 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1744 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1745 return true;
1746
1747 // Couldn't optimize. Emit a compare + a Bcc.
1748 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1749 auto &PredOp = ICmp.getOperand(1);
1750 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1752 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1753 ICmp.getOperand(3).getReg(), MIB.getMRI());
1754 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1755 I.eraseFromParent();
1756 return true;
1757}
1758
1759bool AArch64InstructionSelector::selectCompareBranch(
1760 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1761 Register CondReg = I.getOperand(0).getReg();
1762 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1763 // Try to select the G_BRCOND using whatever is feeding the condition if
1764 // possible.
1765 unsigned CCMIOpc = CCMI->getOpcode();
1766 if (CCMIOpc == TargetOpcode::G_FCMP)
1767 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1768 if (CCMIOpc == TargetOpcode::G_ICMP)
1769 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1770
1771 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1772 // instructions will not be produced, as they are conditional branch
1773 // instructions that do not set flags.
1774 if (ProduceNonFlagSettingCondBr) {
1775 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1776 I.getOperand(1).getMBB(), MIB);
1777 I.eraseFromParent();
1778 return true;
1779 }
1780
1781 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1782 auto TstMI =
1783 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1785 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1787 .addMBB(I.getOperand(1).getMBB());
1788 I.eraseFromParent();
1789 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1790}
1791
1792/// Returns the element immediate value of a vector shift operand if found.
1793/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1794static std::optional<int64_t> getVectorShiftImm(Register Reg,
1796 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1797 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1798 return getAArch64VectorSplatScalar(*OpMI, MRI);
1799}
1800
1801/// Matches and returns the shift immediate value for a SHL instruction given
1802/// a shift operand.
1803static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1805 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1806 if (!ShiftImm)
1807 return std::nullopt;
1808 // Check the immediate is in range for a SHL.
1809 int64_t Imm = *ShiftImm;
1810 if (Imm < 0)
1811 return std::nullopt;
1812 switch (SrcTy.getElementType().getSizeInBits()) {
1813 default:
1814 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1815 return std::nullopt;
1816 case 8:
1817 if (Imm > 7)
1818 return std::nullopt;
1819 break;
1820 case 16:
1821 if (Imm > 15)
1822 return std::nullopt;
1823 break;
1824 case 32:
1825 if (Imm > 31)
1826 return std::nullopt;
1827 break;
1828 case 64:
1829 if (Imm > 63)
1830 return std::nullopt;
1831 break;
1832 }
1833 return Imm;
1834}
1835
1836bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1837 MachineRegisterInfo &MRI) {
1838 assert(I.getOpcode() == TargetOpcode::G_SHL);
1839 Register DstReg = I.getOperand(0).getReg();
1840 const LLT Ty = MRI.getType(DstReg);
1841 Register Src1Reg = I.getOperand(1).getReg();
1842 Register Src2Reg = I.getOperand(2).getReg();
1843
1844 if (!Ty.isVector())
1845 return false;
1846
1847 // Check if we have a vector of constants on RHS that we can select as the
1848 // immediate form.
1849 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1850
1851 unsigned Opc = 0;
1852 if (Ty == LLT::fixed_vector(2, 64)) {
1853 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1854 } else if (Ty == LLT::fixed_vector(4, 32)) {
1855 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1856 } else if (Ty == LLT::fixed_vector(2, 32)) {
1857 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1858 } else if (Ty == LLT::fixed_vector(4, 16)) {
1859 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1860 } else if (Ty == LLT::fixed_vector(8, 16)) {
1861 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1862 } else if (Ty == LLT::fixed_vector(16, 8)) {
1863 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1864 } else if (Ty == LLT::fixed_vector(8, 8)) {
1865 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1866 } else {
1867 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1868 return false;
1869 }
1870
1871 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1872 if (ImmVal)
1873 Shl.addImm(*ImmVal);
1874 else
1875 Shl.addUse(Src2Reg);
1877 I.eraseFromParent();
1878 return true;
1879}
1880
1881bool AArch64InstructionSelector::selectVectorAshrLshr(
1882 MachineInstr &I, MachineRegisterInfo &MRI) {
1883 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1884 I.getOpcode() == TargetOpcode::G_LSHR);
1885 Register DstReg = I.getOperand(0).getReg();
1886 const LLT Ty = MRI.getType(DstReg);
1887 Register Src1Reg = I.getOperand(1).getReg();
1888 Register Src2Reg = I.getOperand(2).getReg();
1889
1890 if (!Ty.isVector())
1891 return false;
1892
1893 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1894
1895 // We expect the immediate case to be lowered in the PostLegalCombiner to
1896 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1897
1898 // There is not a shift right register instruction, but the shift left
1899 // register instruction takes a signed value, where negative numbers specify a
1900 // right shift.
1901
1902 unsigned Opc = 0;
1903 unsigned NegOpc = 0;
1904 const TargetRegisterClass *RC =
1905 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1906 if (Ty == LLT::fixed_vector(2, 64)) {
1907 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1908 NegOpc = AArch64::NEGv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1911 NegOpc = AArch64::NEGv4i32;
1912 } else if (Ty == LLT::fixed_vector(2, 32)) {
1913 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1914 NegOpc = AArch64::NEGv2i32;
1915 } else if (Ty == LLT::fixed_vector(4, 16)) {
1916 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1917 NegOpc = AArch64::NEGv4i16;
1918 } else if (Ty == LLT::fixed_vector(8, 16)) {
1919 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1920 NegOpc = AArch64::NEGv8i16;
1921 } else if (Ty == LLT::fixed_vector(16, 8)) {
1922 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1923 NegOpc = AArch64::NEGv16i8;
1924 } else if (Ty == LLT::fixed_vector(8, 8)) {
1925 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1926 NegOpc = AArch64::NEGv8i8;
1927 } else {
1928 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1929 return false;
1930 }
1931
1932 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1934 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1936 I.eraseFromParent();
1937 return true;
1938}
1939
1940bool AArch64InstructionSelector::selectVaStartAAPCS(
1941 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1942
1944 MF.getFunction().isVarArg()))
1945 return false;
1946
1947 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1948 // Standard, section 10.1.5.
1949
1950 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1951 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1952 const auto *PtrRegClass =
1953 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1954
1955 const MCInstrDesc &MCIDAddAddr =
1956 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1957 const MCInstrDesc &MCIDStoreAddr =
1958 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1959
1960 /*
1961 * typedef struct va_list {
1962 * void * stack; // next stack param
1963 * void * gr_top; // end of GP arg reg save area
1964 * void * vr_top; // end of FP/SIMD arg reg save area
1965 * int gr_offs; // offset from gr_top to next GP register arg
1966 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1967 * } va_list;
1968 */
1969 const auto VAList = I.getOperand(0).getReg();
1970
1971 // Our current offset in bytes from the va_list struct (VAList).
1972 unsigned OffsetBytes = 0;
1973
1974 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1975 // and increment OffsetBytes by PtrSize.
1976 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1977 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1978 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1979 .addDef(Top)
1980 .addFrameIndex(FrameIndex)
1981 .addImm(Imm)
1982 .addImm(0);
1984
1985 const auto *MMO = *I.memoperands_begin();
1986 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
1987 .addUse(Top)
1988 .addUse(VAList)
1989 .addImm(OffsetBytes / PtrSize)
1991 MMO->getPointerInfo().getWithOffset(OffsetBytes),
1992 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
1994
1995 OffsetBytes += PtrSize;
1996 };
1997
1998 // void* stack at offset 0
1999 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2000
2001 // void* gr_top at offset 8 (4 on ILP32)
2002 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2003 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2004
2005 // void* vr_top at offset 16 (8 on ILP32)
2006 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2007 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2008
2009 // Helper function to store a 4-byte integer constant to VAList at offset
2010 // OffsetBytes, and increment OffsetBytes by 4.
2011 const auto PushIntConstant = [&](const int32_t Value) {
2012 constexpr int IntSize = 4;
2013 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2014 auto MIB =
2015 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2016 .addDef(Temp)
2017 .addImm(Value);
2019
2020 const auto *MMO = *I.memoperands_begin();
2021 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2022 .addUse(Temp)
2023 .addUse(VAList)
2024 .addImm(OffsetBytes / IntSize)
2026 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2027 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2029 OffsetBytes += IntSize;
2030 };
2031
2032 // int gr_offs at offset 24 (12 on ILP32)
2033 PushIntConstant(-static_cast<int32_t>(GPRSize));
2034
2035 // int vr_offs at offset 28 (16 on ILP32)
2036 PushIntConstant(-static_cast<int32_t>(FPRSize));
2037
2038 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2039
2040 I.eraseFromParent();
2041 return true;
2042}
2043
2044bool AArch64InstructionSelector::selectVaStartDarwin(
2045 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2046 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2047 Register ListReg = I.getOperand(0).getReg();
2048
2049 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2050
2051 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2052 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2054 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2055 ? FuncInfo->getVarArgsGPRIndex()
2056 : FuncInfo->getVarArgsStackIndex();
2057 }
2058
2059 auto MIB =
2060 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2061 .addDef(ArgsAddrReg)
2062 .addFrameIndex(FrameIdx)
2063 .addImm(0)
2064 .addImm(0);
2065
2067
2068 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2069 .addUse(ArgsAddrReg)
2070 .addUse(ListReg)
2071 .addImm(0)
2072 .addMemOperand(*I.memoperands_begin());
2073
2075 I.eraseFromParent();
2076 return true;
2077}
2078
2079void AArch64InstructionSelector::materializeLargeCMVal(
2080 MachineInstr &I, const Value *V, unsigned OpFlags) {
2081 MachineBasicBlock &MBB = *I.getParent();
2082 MachineFunction &MF = *MBB.getParent();
2083 MachineRegisterInfo &MRI = MF.getRegInfo();
2084
2085 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2086 MovZ->addOperand(MF, I.getOperand(1));
2087 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2089 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2091
2092 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2093 Register ForceDstReg) {
2094 Register DstReg = ForceDstReg
2095 ? ForceDstReg
2096 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2097 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2098 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2099 MovI->addOperand(MF, MachineOperand::CreateGA(
2100 GV, MovZ->getOperand(1).getOffset(), Flags));
2101 } else {
2102 MovI->addOperand(
2104 MovZ->getOperand(1).getOffset(), Flags));
2105 }
2108 return DstReg;
2109 };
2110 Register DstReg = BuildMovK(MovZ.getReg(0),
2112 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2113 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2114}
2115
2116bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2117 MachineBasicBlock &MBB = *I.getParent();
2118 MachineFunction &MF = *MBB.getParent();
2119 MachineRegisterInfo &MRI = MF.getRegInfo();
2120
2121 switch (I.getOpcode()) {
2122 case TargetOpcode::G_STORE: {
2123 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2124 MachineOperand &SrcOp = I.getOperand(0);
2125 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2126 // Allow matching with imported patterns for stores of pointers. Unlike
2127 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2128 // and constrain.
2129 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2130 Register NewSrc = Copy.getReg(0);
2131 SrcOp.setReg(NewSrc);
2132 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2133 Changed = true;
2134 }
2135 return Changed;
2136 }
2137 case TargetOpcode::G_PTR_ADD: {
2138 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2139 // arithmetic semantics instead of falling back to regular arithmetic.
2140 const auto &TL = STI.getTargetLowering();
2141 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2142 return false;
2143 return convertPtrAddToAdd(I, MRI);
2144 }
2145 case TargetOpcode::G_LOAD: {
2146 // For scalar loads of pointers, we try to convert the dest type from p0
2147 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2148 // conversion, this should be ok because all users should have been
2149 // selected already, so the type doesn't matter for them.
2150 Register DstReg = I.getOperand(0).getReg();
2151 const LLT DstTy = MRI.getType(DstReg);
2152 if (!DstTy.isPointer())
2153 return false;
2154 MRI.setType(DstReg, LLT::scalar(64));
2155 return true;
2156 }
2157 case AArch64::G_DUP: {
2158 // Convert the type from p0 to s64 to help selection.
2159 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2160 if (!DstTy.isPointerVector())
2161 return false;
2162 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2163 MRI.setType(I.getOperand(0).getReg(),
2164 DstTy.changeElementType(LLT::scalar(64)));
2165 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2166 I.getOperand(1).setReg(NewSrc.getReg(0));
2167 return true;
2168 }
2169 case AArch64::G_INSERT_VECTOR_ELT: {
2170 // Convert the type from p0 to s64 to help selection.
2171 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2172 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2173 if (!SrcVecTy.isPointerVector())
2174 return false;
2175 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2176 MRI.setType(I.getOperand(1).getReg(),
2177 DstTy.changeElementType(LLT::scalar(64)));
2178 MRI.setType(I.getOperand(0).getReg(),
2179 DstTy.changeElementType(LLT::scalar(64)));
2180 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2181 I.getOperand(2).setReg(NewSrc.getReg(0));
2182 return true;
2183 }
2184 case TargetOpcode::G_UITOFP:
2185 case TargetOpcode::G_SITOFP: {
2186 // If both source and destination regbanks are FPR, then convert the opcode
2187 // to G_SITOF so that the importer can select it to an fpr variant.
2188 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2189 // copy.
2190 Register SrcReg = I.getOperand(1).getReg();
2191 LLT SrcTy = MRI.getType(SrcReg);
2192 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2193 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2194 return false;
2195
2196 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2197 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2198 I.setDesc(TII.get(AArch64::G_SITOF));
2199 else
2200 I.setDesc(TII.get(AArch64::G_UITOF));
2201 return true;
2202 }
2203 return false;
2204 }
2205 default:
2206 return false;
2207 }
2208}
2209
2210/// This lowering tries to look for G_PTR_ADD instructions and then converts
2211/// them to a standard G_ADD with a COPY on the source.
2212///
2213/// The motivation behind this is to expose the add semantics to the imported
2214/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2215/// because the selector works bottom up, uses before defs. By the time we
2216/// end up trying to select a G_PTR_ADD, we should have already attempted to
2217/// fold this into addressing modes and were therefore unsuccessful.
2218bool AArch64InstructionSelector::convertPtrAddToAdd(
2219 MachineInstr &I, MachineRegisterInfo &MRI) {
2220 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2221 Register DstReg = I.getOperand(0).getReg();
2222 Register AddOp1Reg = I.getOperand(1).getReg();
2223 const LLT PtrTy = MRI.getType(DstReg);
2224 if (PtrTy.getAddressSpace() != 0)
2225 return false;
2226
2227 const LLT CastPtrTy =
2228 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2229 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2230 // Set regbanks on the registers.
2231 if (PtrTy.isVector())
2232 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2233 else
2234 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2235
2236 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2237 // %dst(intty) = G_ADD %intbase, off
2238 I.setDesc(TII.get(TargetOpcode::G_ADD));
2239 MRI.setType(DstReg, CastPtrTy);
2240 I.getOperand(1).setReg(PtrToInt.getReg(0));
2241 if (!select(*PtrToInt)) {
2242 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2243 return false;
2244 }
2245
2246 // Also take the opportunity here to try to do some optimization.
2247 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2248 Register NegatedReg;
2249 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2250 return true;
2251 I.getOperand(2).setReg(NegatedReg);
2252 I.setDesc(TII.get(TargetOpcode::G_SUB));
2253 return true;
2254}
2255
2256bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2257 MachineRegisterInfo &MRI) {
2258 // We try to match the immediate variant of LSL, which is actually an alias
2259 // for a special case of UBFM. Otherwise, we fall back to the imported
2260 // selector which will match the register variant.
2261 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2262 const auto &MO = I.getOperand(2);
2263 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2264 if (!VRegAndVal)
2265 return false;
2266
2267 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2268 if (DstTy.isVector())
2269 return false;
2270 bool Is64Bit = DstTy.getSizeInBits() == 64;
2271 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2272 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2273
2274 if (!Imm1Fn || !Imm2Fn)
2275 return false;
2276
2277 auto NewI =
2278 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2279 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2280
2281 for (auto &RenderFn : *Imm1Fn)
2282 RenderFn(NewI);
2283 for (auto &RenderFn : *Imm2Fn)
2284 RenderFn(NewI);
2285
2286 I.eraseFromParent();
2287 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2288}
2289
2290bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2291 MachineInstr &I, MachineRegisterInfo &MRI) {
2292 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2293 // If we're storing a scalar, it doesn't matter what register bank that
2294 // scalar is on. All that matters is the size.
2295 //
2296 // So, if we see something like this (with a 32-bit scalar as an example):
2297 //
2298 // %x:gpr(s32) = ... something ...
2299 // %y:fpr(s32) = COPY %x:gpr(s32)
2300 // G_STORE %y:fpr(s32)
2301 //
2302 // We can fix this up into something like this:
2303 //
2304 // G_STORE %x:gpr(s32)
2305 //
2306 // And then continue the selection process normally.
2307 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2308 if (!DefDstReg.isValid())
2309 return false;
2310 LLT DefDstTy = MRI.getType(DefDstReg);
2311 Register StoreSrcReg = I.getOperand(0).getReg();
2312 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2313
2314 // If we get something strange like a physical register, then we shouldn't
2315 // go any further.
2316 if (!DefDstTy.isValid())
2317 return false;
2318
2319 // Are the source and dst types the same size?
2320 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2321 return false;
2322
2323 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2324 RBI.getRegBank(DefDstReg, MRI, TRI))
2325 return false;
2326
2327 // We have a cross-bank copy, which is entering a store. Let's fold it.
2328 I.getOperand(0).setReg(DefDstReg);
2329 return true;
2330}
2331
2332bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2333 assert(I.getParent() && "Instruction should be in a basic block!");
2334 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2335
2336 MachineBasicBlock &MBB = *I.getParent();
2337 MachineFunction &MF = *MBB.getParent();
2338 MachineRegisterInfo &MRI = MF.getRegInfo();
2339
2340 switch (I.getOpcode()) {
2341 case AArch64::G_DUP: {
2342 // Before selecting a DUP instruction, check if it is better selected as a
2343 // MOV or load from a constant pool.
2344 Register Src = I.getOperand(1).getReg();
2345 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2346 if (!ValAndVReg)
2347 return false;
2348 LLVMContext &Ctx = MF.getFunction().getContext();
2349 Register Dst = I.getOperand(0).getReg();
2351 MRI.getType(Dst).getNumElements(),
2352 ConstantInt::get(
2353 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2354 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2355 if (!emitConstantVector(Dst, CV, MIB, MRI))
2356 return false;
2357 I.eraseFromParent();
2358 return true;
2359 }
2360 case TargetOpcode::G_SEXT:
2361 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2362 // over a normal extend.
2363 if (selectUSMovFromExtend(I, MRI))
2364 return true;
2365 return false;
2366 case TargetOpcode::G_BR:
2367 return false;
2368 case TargetOpcode::G_SHL:
2369 return earlySelectSHL(I, MRI);
2370 case TargetOpcode::G_CONSTANT: {
2371 bool IsZero = false;
2372 if (I.getOperand(1).isCImm())
2373 IsZero = I.getOperand(1).getCImm()->isZero();
2374 else if (I.getOperand(1).isImm())
2375 IsZero = I.getOperand(1).getImm() == 0;
2376
2377 if (!IsZero)
2378 return false;
2379
2380 Register DefReg = I.getOperand(0).getReg();
2381 LLT Ty = MRI.getType(DefReg);
2382 if (Ty.getSizeInBits() == 64) {
2383 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2384 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2385 } else if (Ty.getSizeInBits() == 32) {
2386 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2387 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2388 } else
2389 return false;
2390
2391 I.setDesc(TII.get(TargetOpcode::COPY));
2392 return true;
2393 }
2394
2395 case TargetOpcode::G_ADD: {
2396 // Check if this is being fed by a G_ICMP on either side.
2397 //
2398 // (cmp pred, x, y) + z
2399 //
2400 // In the above case, when the cmp is true, we increment z by 1. So, we can
2401 // fold the add into the cset for the cmp by using cinc.
2402 //
2403 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2404 Register AddDst = I.getOperand(0).getReg();
2405 Register AddLHS = I.getOperand(1).getReg();
2406 Register AddRHS = I.getOperand(2).getReg();
2407 // Only handle scalars.
2408 LLT Ty = MRI.getType(AddLHS);
2409 if (Ty.isVector())
2410 return false;
2411 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2412 // bits.
2413 unsigned Size = Ty.getSizeInBits();
2414 if (Size != 32 && Size != 64)
2415 return false;
2416 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2417 if (!MRI.hasOneNonDBGUse(Reg))
2418 return nullptr;
2419 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2420 // compare.
2421 if (Size == 32)
2422 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2423 // We model scalar compares using 32-bit destinations right now.
2424 // If it's a 64-bit compare, it'll have 64-bit sources.
2425 Register ZExt;
2426 if (!mi_match(Reg, MRI,
2428 return nullptr;
2429 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2430 if (!Cmp ||
2431 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2432 return nullptr;
2433 return Cmp;
2434 };
2435 // Try to match
2436 // z + (cmp pred, x, y)
2437 MachineInstr *Cmp = MatchCmp(AddRHS);
2438 if (!Cmp) {
2439 // (cmp pred, x, y) + z
2440 std::swap(AddLHS, AddRHS);
2441 Cmp = MatchCmp(AddRHS);
2442 if (!Cmp)
2443 return false;
2444 }
2445 auto &PredOp = Cmp->getOperand(1);
2447 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2448 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2449 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2451 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2452 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2453 I.eraseFromParent();
2454 return true;
2455 }
2456 case TargetOpcode::G_OR: {
2457 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2458 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2459 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2460 Register Dst = I.getOperand(0).getReg();
2461 LLT Ty = MRI.getType(Dst);
2462
2463 if (!Ty.isScalar())
2464 return false;
2465
2466 unsigned Size = Ty.getSizeInBits();
2467 if (Size != 32 && Size != 64)
2468 return false;
2469
2470 Register ShiftSrc;
2471 int64_t ShiftImm;
2472 Register MaskSrc;
2473 int64_t MaskImm;
2474 if (!mi_match(
2475 Dst, MRI,
2476 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2477 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2478 return false;
2479
2480 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2481 return false;
2482
2483 int64_t Immr = Size - ShiftImm;
2484 int64_t Imms = Size - ShiftImm - 1;
2485 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2486 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2487 I.eraseFromParent();
2488 return true;
2489 }
2490 case TargetOpcode::G_FENCE: {
2491 if (I.getOperand(1).getImm() == 0)
2492 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2493 else
2494 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2495 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2496 I.eraseFromParent();
2497 return true;
2498 }
2499 default:
2500 return false;
2501 }
2502}
2503
2504bool AArch64InstructionSelector::select(MachineInstr &I) {
2505 assert(I.getParent() && "Instruction should be in a basic block!");
2506 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2507
2508 MachineBasicBlock &MBB = *I.getParent();
2509 MachineFunction &MF = *MBB.getParent();
2510 MachineRegisterInfo &MRI = MF.getRegInfo();
2511
2512 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2513 if (Subtarget->requiresStrictAlign()) {
2514 // We don't support this feature yet.
2515 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2516 return false;
2517 }
2518
2520
2521 unsigned Opcode = I.getOpcode();
2522 // G_PHI requires same handling as PHI
2523 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2524 // Certain non-generic instructions also need some special handling.
2525
2526 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2528
2529 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2530 const Register DefReg = I.getOperand(0).getReg();
2531 const LLT DefTy = MRI.getType(DefReg);
2532
2533 const RegClassOrRegBank &RegClassOrBank =
2534 MRI.getRegClassOrRegBank(DefReg);
2535
2536 const TargetRegisterClass *DefRC =
2538 if (!DefRC) {
2539 if (!DefTy.isValid()) {
2540 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2541 return false;
2542 }
2543 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2544 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2545 if (!DefRC) {
2546 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2547 return false;
2548 }
2549 }
2550
2551 I.setDesc(TII.get(TargetOpcode::PHI));
2552
2553 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2554 }
2555
2556 if (I.isCopy())
2557 return selectCopy(I, TII, MRI, TRI, RBI);
2558
2559 if (I.isDebugInstr())
2560 return selectDebugInstr(I, MRI, RBI);
2561
2562 return true;
2563 }
2564
2565
2566 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2567 LLVM_DEBUG(
2568 dbgs() << "Generic instruction has unexpected implicit operands\n");
2569 return false;
2570 }
2571
2572 // Try to do some lowering before we start instruction selecting. These
2573 // lowerings are purely transformations on the input G_MIR and so selection
2574 // must continue after any modification of the instruction.
2575 if (preISelLower(I)) {
2576 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2577 }
2578
2579 // There may be patterns where the importer can't deal with them optimally,
2580 // but does select it to a suboptimal sequence so our custom C++ selection
2581 // code later never has a chance to work on it. Therefore, we have an early
2582 // selection attempt here to give priority to certain selection routines
2583 // over the imported ones.
2584 if (earlySelect(I))
2585 return true;
2586
2587 if (selectImpl(I, *CoverageInfo))
2588 return true;
2589
2590 LLT Ty =
2591 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2592
2593 switch (Opcode) {
2594 case TargetOpcode::G_SBFX:
2595 case TargetOpcode::G_UBFX: {
2596 static const unsigned OpcTable[2][2] = {
2597 {AArch64::UBFMWri, AArch64::UBFMXri},
2598 {AArch64::SBFMWri, AArch64::SBFMXri}};
2599 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2600 unsigned Size = Ty.getSizeInBits();
2601 unsigned Opc = OpcTable[IsSigned][Size == 64];
2602 auto Cst1 =
2603 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2604 assert(Cst1 && "Should have gotten a constant for src 1?");
2605 auto Cst2 =
2606 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2607 assert(Cst2 && "Should have gotten a constant for src 2?");
2608 auto LSB = Cst1->Value.getZExtValue();
2609 auto Width = Cst2->Value.getZExtValue();
2610 auto BitfieldInst =
2611 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2612 .addImm(LSB)
2613 .addImm(LSB + Width - 1);
2614 I.eraseFromParent();
2615 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2616 }
2617 case TargetOpcode::G_BRCOND:
2618 return selectCompareBranch(I, MF, MRI);
2619
2620 case TargetOpcode::G_BRINDIRECT: {
2621 const Function &Fn = MF.getFunction();
2622 if (std::optional<uint16_t> BADisc =
2624 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2625 MI.addImm(AArch64PACKey::IA);
2626 MI.addImm(*BADisc);
2627 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2628 I.eraseFromParent();
2630 }
2631 I.setDesc(TII.get(AArch64::BR));
2633 }
2634
2635 case TargetOpcode::G_BRJT:
2636 return selectBrJT(I, MRI);
2637
2638 case AArch64::G_ADD_LOW: {
2639 // This op may have been separated from it's ADRP companion by the localizer
2640 // or some other code motion pass. Given that many CPUs will try to
2641 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2642 // which will later be expanded into an ADRP+ADD pair after scheduling.
2643 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2644 if (BaseMI->getOpcode() != AArch64::ADRP) {
2645 I.setDesc(TII.get(AArch64::ADDXri));
2646 I.addOperand(MachineOperand::CreateImm(0));
2648 }
2650 "Expected small code model");
2651 auto Op1 = BaseMI->getOperand(1);
2652 auto Op2 = I.getOperand(2);
2653 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2654 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2655 Op1.getTargetFlags())
2656 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2657 Op2.getTargetFlags());
2658 I.eraseFromParent();
2659 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2660 }
2661
2662 case TargetOpcode::G_FCONSTANT:
2663 case TargetOpcode::G_CONSTANT: {
2664 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2665
2666 const LLT s8 = LLT::scalar(8);
2667 const LLT s16 = LLT::scalar(16);
2668 const LLT s32 = LLT::scalar(32);
2669 const LLT s64 = LLT::scalar(64);
2670 const LLT s128 = LLT::scalar(128);
2671 const LLT p0 = LLT::pointer(0, 64);
2672
2673 const Register DefReg = I.getOperand(0).getReg();
2674 const LLT DefTy = MRI.getType(DefReg);
2675 const unsigned DefSize = DefTy.getSizeInBits();
2676 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2677
2678 // FIXME: Redundant check, but even less readable when factored out.
2679 if (isFP) {
2680 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2681 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2682 << " constant, expected: " << s16 << " or " << s32
2683 << " or " << s64 << " or " << s128 << '\n');
2684 return false;
2685 }
2686
2687 if (RB.getID() != AArch64::FPRRegBankID) {
2688 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2689 << " constant on bank: " << RB
2690 << ", expected: FPR\n");
2691 return false;
2692 }
2693
2694 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2695 // can be sure tablegen works correctly and isn't rescued by this code.
2696 // 0.0 is not covered by tablegen for FP128. So we will handle this
2697 // scenario in the code here.
2698 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2699 return false;
2700 } else {
2701 // s32 and s64 are covered by tablegen.
2702 if (Ty != p0 && Ty != s8 && Ty != s16) {
2703 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2704 << " constant, expected: " << s32 << ", " << s64
2705 << ", or " << p0 << '\n');
2706 return false;
2707 }
2708
2709 if (RB.getID() != AArch64::GPRRegBankID) {
2710 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2711 << " constant on bank: " << RB
2712 << ", expected: GPR\n");
2713 return false;
2714 }
2715 }
2716
2717 if (isFP) {
2718 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2719 // For 16, 64, and 128b values, emit a constant pool load.
2720 switch (DefSize) {
2721 default:
2722 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2723 case 32:
2724 case 64: {
2725 bool OptForSize = shouldOptForSize(&MF);
2726 const auto &TLI = MF.getSubtarget().getTargetLowering();
2727 // If TLI says that this fpimm is illegal, then we'll expand to a
2728 // constant pool load.
2729 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2730 EVT::getFloatingPointVT(DefSize), OptForSize))
2731 break;
2732 [[fallthrough]];
2733 }
2734 case 16:
2735 case 128: {
2736 auto *FPImm = I.getOperand(1).getFPImm();
2737 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2738 if (!LoadMI) {
2739 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2740 return false;
2741 }
2742 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2743 I.eraseFromParent();
2744 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2745 }
2746 }
2747
2748 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2749 // Either emit a FMOV, or emit a copy to emit a normal mov.
2750 const Register DefGPRReg = MRI.createVirtualRegister(
2751 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2752 MachineOperand &RegOp = I.getOperand(0);
2753 RegOp.setReg(DefGPRReg);
2754 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2755 MIB.buildCopy({DefReg}, {DefGPRReg});
2756
2757 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2758 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2759 return false;
2760 }
2761
2762 MachineOperand &ImmOp = I.getOperand(1);
2763 // FIXME: Is going through int64_t always correct?
2764 ImmOp.ChangeToImmediate(
2766 } else if (I.getOperand(1).isCImm()) {
2767 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2768 I.getOperand(1).ChangeToImmediate(Val);
2769 } else if (I.getOperand(1).isImm()) {
2770 uint64_t Val = I.getOperand(1).getImm();
2771 I.getOperand(1).ChangeToImmediate(Val);
2772 }
2773
2774 const unsigned MovOpc =
2775 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2776 I.setDesc(TII.get(MovOpc));
2778 return true;
2779 }
2780 case TargetOpcode::G_EXTRACT: {
2781 Register DstReg = I.getOperand(0).getReg();
2782 Register SrcReg = I.getOperand(1).getReg();
2783 LLT SrcTy = MRI.getType(SrcReg);
2784 LLT DstTy = MRI.getType(DstReg);
2785 (void)DstTy;
2786 unsigned SrcSize = SrcTy.getSizeInBits();
2787
2788 if (SrcTy.getSizeInBits() > 64) {
2789 // This should be an extract of an s128, which is like a vector extract.
2790 if (SrcTy.getSizeInBits() != 128)
2791 return false;
2792 // Only support extracting 64 bits from an s128 at the moment.
2793 if (DstTy.getSizeInBits() != 64)
2794 return false;
2795
2796 unsigned Offset = I.getOperand(2).getImm();
2797 if (Offset % 64 != 0)
2798 return false;
2799
2800 // Check we have the right regbank always.
2801 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2802 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2803 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2804
2805 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2806 auto NewI =
2807 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2808 .addUse(SrcReg, 0,
2809 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2810 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2811 AArch64::GPR64RegClass, NewI->getOperand(0));
2812 I.eraseFromParent();
2813 return true;
2814 }
2815
2816 // Emit the same code as a vector extract.
2817 // Offset must be a multiple of 64.
2818 unsigned LaneIdx = Offset / 64;
2819 MachineInstr *Extract = emitExtractVectorElt(
2820 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2821 if (!Extract)
2822 return false;
2823 I.eraseFromParent();
2824 return true;
2825 }
2826
2827 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2828 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2829 Ty.getSizeInBits() - 1);
2830
2831 if (SrcSize < 64) {
2832 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2833 "unexpected G_EXTRACT types");
2835 }
2836
2837 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2838 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2839 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2840 .addReg(DstReg, 0, AArch64::sub_32);
2841 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2842 AArch64::GPR32RegClass, MRI);
2843 I.getOperand(0).setReg(DstReg);
2844
2846 }
2847
2848 case TargetOpcode::G_INSERT: {
2849 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2850 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2851 unsigned DstSize = DstTy.getSizeInBits();
2852 // Larger inserts are vectors, same-size ones should be something else by
2853 // now (split up or turned into COPYs).
2854 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2855 return false;
2856
2857 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2858 unsigned LSB = I.getOperand(3).getImm();
2859 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2860 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2861 MachineInstrBuilder(MF, I).addImm(Width - 1);
2862
2863 if (DstSize < 64) {
2864 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2865 "unexpected G_INSERT types");
2867 }
2868
2869 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2870 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2871 TII.get(AArch64::SUBREG_TO_REG))
2872 .addDef(SrcReg)
2873 .addImm(0)
2874 .addUse(I.getOperand(2).getReg())
2875 .addImm(AArch64::sub_32);
2876 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2877 AArch64::GPR32RegClass, MRI);
2878 I.getOperand(2).setReg(SrcReg);
2879
2881 }
2882 case TargetOpcode::G_FRAME_INDEX: {
2883 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2884 if (Ty != LLT::pointer(0, 64)) {
2885 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2886 << ", expected: " << LLT::pointer(0, 64) << '\n');
2887 return false;
2888 }
2889 I.setDesc(TII.get(AArch64::ADDXri));
2890
2891 // MOs for a #0 shifted immediate.
2892 I.addOperand(MachineOperand::CreateImm(0));
2893 I.addOperand(MachineOperand::CreateImm(0));
2894
2896 }
2897
2898 case TargetOpcode::G_GLOBAL_VALUE: {
2899 const GlobalValue *GV = nullptr;
2900 unsigned OpFlags;
2901 if (I.getOperand(1).isSymbol()) {
2902 OpFlags = I.getOperand(1).getTargetFlags();
2903 // Currently only used by "RtLibUseGOT".
2904 assert(OpFlags == AArch64II::MO_GOT);
2905 } else {
2906 GV = I.getOperand(1).getGlobal();
2907 if (GV->isThreadLocal()) {
2908 // We don't support instructions with emulated TLS variables yet
2909 if (TM.useEmulatedTLS())
2910 return false;
2911 return selectTLSGlobalValue(I, MRI);
2912 }
2913 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2914 }
2915
2916 if (OpFlags & AArch64II::MO_GOT) {
2917 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2918 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2919 I.getOperand(1).setTargetFlags(OpFlags);
2920 I.addImplicitDefUseOperands(MF);
2921 } else if (TM.getCodeModel() == CodeModel::Large &&
2922 !TM.isPositionIndependent()) {
2923 // Materialize the global using movz/movk instructions.
2924 materializeLargeCMVal(I, GV, OpFlags);
2925 I.eraseFromParent();
2926 return true;
2927 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2928 I.setDesc(TII.get(AArch64::ADR));
2929 I.getOperand(1).setTargetFlags(OpFlags);
2930 } else {
2931 I.setDesc(TII.get(AArch64::MOVaddr));
2932 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2933 MachineInstrBuilder MIB(MF, I);
2934 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2936 }
2938 }
2939
2940 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2941 return selectPtrAuthGlobalValue(I, MRI);
2942
2943 case TargetOpcode::G_ZEXTLOAD:
2944 case TargetOpcode::G_LOAD:
2945 case TargetOpcode::G_STORE: {
2946 GLoadStore &LdSt = cast<GLoadStore>(I);
2947 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2948 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2949
2950 // Can only handle AddressSpace 0, 64-bit pointers.
2951 if (PtrTy != LLT::pointer(0, 64)) {
2952 return false;
2953 }
2954
2955 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2956 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2957 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2958
2959 // Need special instructions for atomics that affect ordering.
2960 if (isStrongerThanMonotonic(Order)) {
2961 assert(!isa<GZExtLoad>(LdSt));
2962 assert(MemSizeInBytes <= 8 &&
2963 "128-bit atomics should already be custom-legalized");
2964
2965 if (isa<GLoad>(LdSt)) {
2966 static constexpr unsigned LDAPROpcodes[] = {
2967 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2968 static constexpr unsigned LDAROpcodes[] = {
2969 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2970 ArrayRef<unsigned> Opcodes =
2971 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2972 ? LDAPROpcodes
2973 : LDAROpcodes;
2974 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2975 } else {
2976 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2977 AArch64::STLRW, AArch64::STLRX};
2978 Register ValReg = LdSt.getReg(0);
2979 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2980 // Emit a subreg copy of 32 bits.
2981 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2982 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2983 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2984 I.getOperand(0).setReg(NewVal);
2985 }
2986 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2987 }
2989 return true;
2990 }
2991
2992#ifndef NDEBUG
2993 const Register PtrReg = LdSt.getPointerReg();
2994 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2995 // Check that the pointer register is valid.
2996 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2997 "Load/Store pointer operand isn't a GPR");
2998 assert(MRI.getType(PtrReg).isPointer() &&
2999 "Load/Store pointer operand isn't a pointer");
3000#endif
3001
3002 const Register ValReg = LdSt.getReg(0);
3003 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3004 LLT ValTy = MRI.getType(ValReg);
3005
3006 // The code below doesn't support truncating stores, so we need to split it
3007 // again.
3008 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3009 unsigned SubReg;
3010 LLT MemTy = LdSt.getMMO().getMemoryType();
3011 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3012 if (!getSubRegForClass(RC, TRI, SubReg))
3013 return false;
3014
3015 // Generate a subreg copy.
3016 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3017 .addReg(ValReg, 0, SubReg)
3018 .getReg(0);
3019 RBI.constrainGenericRegister(Copy, *RC, MRI);
3020 LdSt.getOperand(0).setReg(Copy);
3021 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3022 // If this is an any-extending load from the FPR bank, split it into a regular
3023 // load + extend.
3024 if (RB.getID() == AArch64::FPRRegBankID) {
3025 unsigned SubReg;
3026 LLT MemTy = LdSt.getMMO().getMemoryType();
3027 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3028 if (!getSubRegForClass(RC, TRI, SubReg))
3029 return false;
3030 Register OldDst = LdSt.getReg(0);
3031 Register NewDst =
3032 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
3033 LdSt.getOperand(0).setReg(NewDst);
3034 MRI.setRegBank(NewDst, RB);
3035 // Generate a SUBREG_TO_REG to extend it.
3036 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3037 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3038 .addImm(0)
3039 .addUse(NewDst)
3040 .addImm(SubReg);
3041 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3042 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3043 MIB.setInstr(LdSt);
3044 ValTy = MemTy; // This is no longer an extending load.
3045 }
3046 }
3047
3048 // Helper lambda for partially selecting I. Either returns the original
3049 // instruction with an updated opcode, or a new instruction.
3050 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3051 bool IsStore = isa<GStore>(I);
3052 const unsigned NewOpc =
3053 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3054 if (NewOpc == I.getOpcode())
3055 return nullptr;
3056 // Check if we can fold anything into the addressing mode.
3057 auto AddrModeFns =
3058 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3059 if (!AddrModeFns) {
3060 // Can't fold anything. Use the original instruction.
3061 I.setDesc(TII.get(NewOpc));
3062 I.addOperand(MachineOperand::CreateImm(0));
3063 return &I;
3064 }
3065
3066 // Folded something. Create a new instruction and return it.
3067 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3068 Register CurValReg = I.getOperand(0).getReg();
3069 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3070 NewInst.cloneMemRefs(I);
3071 for (auto &Fn : *AddrModeFns)
3072 Fn(NewInst);
3073 I.eraseFromParent();
3074 return &*NewInst;
3075 };
3076
3077 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3078 if (!LoadStore)
3079 return false;
3080
3081 // If we're storing a 0, use WZR/XZR.
3082 if (Opcode == TargetOpcode::G_STORE) {
3084 LoadStore->getOperand(0).getReg(), MRI);
3085 if (CVal && CVal->Value == 0) {
3086 switch (LoadStore->getOpcode()) {
3087 case AArch64::STRWui:
3088 case AArch64::STRHHui:
3089 case AArch64::STRBBui:
3090 LoadStore->getOperand(0).setReg(AArch64::WZR);
3091 break;
3092 case AArch64::STRXui:
3093 LoadStore->getOperand(0).setReg(AArch64::XZR);
3094 break;
3095 }
3096 }
3097 }
3098
3099 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3100 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3101 // The any/zextload from a smaller type to i32 should be handled by the
3102 // importer.
3103 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3104 return false;
3105 // If we have an extending load then change the load's type to be a
3106 // narrower reg and zero_extend with SUBREG_TO_REG.
3107 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3108 Register DstReg = LoadStore->getOperand(0).getReg();
3109 LoadStore->getOperand(0).setReg(LdReg);
3110
3111 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3112 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3113 .addImm(0)
3114 .addUse(LdReg)
3115 .addImm(AArch64::sub_32);
3116 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3117 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3118 MRI);
3119 }
3120 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3121 }
3122
3123 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3124 case TargetOpcode::G_INDEXED_SEXTLOAD:
3125 return selectIndexedExtLoad(I, MRI);
3126 case TargetOpcode::G_INDEXED_LOAD:
3127 return selectIndexedLoad(I, MRI);
3128 case TargetOpcode::G_INDEXED_STORE:
3129 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3130
3131 case TargetOpcode::G_LSHR:
3132 case TargetOpcode::G_ASHR:
3133 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3134 return selectVectorAshrLshr(I, MRI);
3135 [[fallthrough]];
3136 case TargetOpcode::G_SHL:
3137 if (Opcode == TargetOpcode::G_SHL &&
3138 MRI.getType(I.getOperand(0).getReg()).isVector())
3139 return selectVectorSHL(I, MRI);
3140
3141 // These shifts were legalized to have 64 bit shift amounts because we
3142 // want to take advantage of the selection patterns that assume the
3143 // immediates are s64s, however, selectBinaryOp will assume both operands
3144 // will have the same bit size.
3145 {
3146 Register SrcReg = I.getOperand(1).getReg();
3147 Register ShiftReg = I.getOperand(2).getReg();
3148 const LLT ShiftTy = MRI.getType(ShiftReg);
3149 const LLT SrcTy = MRI.getType(SrcReg);
3150 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3151 ShiftTy.getSizeInBits() == 64) {
3152 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3153 // Insert a subregister copy to implement a 64->32 trunc
3154 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3155 .addReg(ShiftReg, 0, AArch64::sub_32);
3156 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3157 I.getOperand(2).setReg(Trunc.getReg(0));
3158 }
3159 }
3160 [[fallthrough]];
3161 case TargetOpcode::G_OR: {
3162 // Reject the various things we don't support yet.
3163 if (unsupportedBinOp(I, RBI, MRI, TRI))
3164 return false;
3165
3166 const unsigned OpSize = Ty.getSizeInBits();
3167
3168 const Register DefReg = I.getOperand(0).getReg();
3169 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3170
3171 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3172 if (NewOpc == I.getOpcode())
3173 return false;
3174
3175 I.setDesc(TII.get(NewOpc));
3176 // FIXME: Should the type be always reset in setDesc?
3177
3178 // Now that we selected an opcode, we need to constrain the register
3179 // operands to use appropriate classes.
3181 }
3182
3183 case TargetOpcode::G_PTR_ADD: {
3184 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3185 I.eraseFromParent();
3186 return true;
3187 }
3188
3189 case TargetOpcode::G_SADDE:
3190 case TargetOpcode::G_UADDE:
3191 case TargetOpcode::G_SSUBE:
3192 case TargetOpcode::G_USUBE:
3193 case TargetOpcode::G_SADDO:
3194 case TargetOpcode::G_UADDO:
3195 case TargetOpcode::G_SSUBO:
3196 case TargetOpcode::G_USUBO:
3197 return selectOverflowOp(I, MRI);
3198
3199 case TargetOpcode::G_PTRMASK: {
3200 Register MaskReg = I.getOperand(2).getReg();
3201 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3202 // TODO: Implement arbitrary cases
3203 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3204 return false;
3205
3206 uint64_t Mask = *MaskVal;
3207 I.setDesc(TII.get(AArch64::ANDXri));
3208 I.getOperand(2).ChangeToImmediate(
3210
3212 }
3213 case TargetOpcode::G_PTRTOINT:
3214 case TargetOpcode::G_TRUNC: {
3215 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3216 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3217
3218 const Register DstReg = I.getOperand(0).getReg();
3219 const Register SrcReg = I.getOperand(1).getReg();
3220
3221 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3222 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3223
3224 if (DstRB.getID() != SrcRB.getID()) {
3225 LLVM_DEBUG(
3226 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3227 return false;
3228 }
3229
3230 if (DstRB.getID() == AArch64::GPRRegBankID) {
3231 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3232 if (!DstRC)
3233 return false;
3234
3235 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3236 if (!SrcRC)
3237 return false;
3238
3239 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3240 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3241 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3242 return false;
3243 }
3244
3245 if (DstRC == SrcRC) {
3246 // Nothing to be done
3247 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3248 SrcTy == LLT::scalar(64)) {
3249 llvm_unreachable("TableGen can import this case");
3250 return false;
3251 } else if (DstRC == &AArch64::GPR32RegClass &&
3252 SrcRC == &AArch64::GPR64RegClass) {
3253 I.getOperand(1).setSubReg(AArch64::sub_32);
3254 } else {
3255 LLVM_DEBUG(
3256 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3257 return false;
3258 }
3259
3260 I.setDesc(TII.get(TargetOpcode::COPY));
3261 return true;
3262 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3263 if (DstTy == LLT::fixed_vector(4, 16) &&
3264 SrcTy == LLT::fixed_vector(4, 32)) {
3265 I.setDesc(TII.get(AArch64::XTNv4i16));
3267 return true;
3268 }
3269
3270 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3271 MachineInstr *Extract = emitExtractVectorElt(
3272 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3273 if (!Extract)
3274 return false;
3275 I.eraseFromParent();
3276 return true;
3277 }
3278
3279 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3280 if (Opcode == TargetOpcode::G_PTRTOINT) {
3281 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3282 I.setDesc(TII.get(TargetOpcode::COPY));
3283 return selectCopy(I, TII, MRI, TRI, RBI);
3284 }
3285 }
3286
3287 return false;
3288 }
3289
3290 case TargetOpcode::G_ANYEXT: {
3291 if (selectUSMovFromExtend(I, MRI))
3292 return true;
3293
3294 const Register DstReg = I.getOperand(0).getReg();
3295 const Register SrcReg = I.getOperand(1).getReg();
3296
3297 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3298 if (RBDst.getID() != AArch64::GPRRegBankID) {
3299 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3300 << ", expected: GPR\n");
3301 return false;
3302 }
3303
3304 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3305 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3306 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3307 << ", expected: GPR\n");
3308 return false;
3309 }
3310
3311 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3312
3313 if (DstSize == 0) {
3314 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3315 return false;
3316 }
3317
3318 if (DstSize != 64 && DstSize > 32) {
3319 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3320 << ", expected: 32 or 64\n");
3321 return false;
3322 }
3323 // At this point G_ANYEXT is just like a plain COPY, but we need
3324 // to explicitly form the 64-bit value if any.
3325 if (DstSize > 32) {
3326 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3327 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3328 .addDef(ExtSrc)
3329 .addImm(0)
3330 .addUse(SrcReg)
3331 .addImm(AArch64::sub_32);
3332 I.getOperand(1).setReg(ExtSrc);
3333 }
3334 return selectCopy(I, TII, MRI, TRI, RBI);
3335 }
3336
3337 case TargetOpcode::G_ZEXT:
3338 case TargetOpcode::G_SEXT_INREG:
3339 case TargetOpcode::G_SEXT: {
3340 if (selectUSMovFromExtend(I, MRI))
3341 return true;
3342
3343 unsigned Opcode = I.getOpcode();
3344 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3345 const Register DefReg = I.getOperand(0).getReg();
3346 Register SrcReg = I.getOperand(1).getReg();
3347 const LLT DstTy = MRI.getType(DefReg);
3348 const LLT SrcTy = MRI.getType(SrcReg);
3349 unsigned DstSize = DstTy.getSizeInBits();
3350 unsigned SrcSize = SrcTy.getSizeInBits();
3351
3352 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3353 // extended is encoded in the imm.
3354 if (Opcode == TargetOpcode::G_SEXT_INREG)
3355 SrcSize = I.getOperand(2).getImm();
3356
3357 if (DstTy.isVector())
3358 return false; // Should be handled by imported patterns.
3359
3360 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3361 AArch64::GPRRegBankID &&
3362 "Unexpected ext regbank");
3363
3364 MachineInstr *ExtI;
3365
3366 // First check if we're extending the result of a load which has a dest type
3367 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3368 // GPR register on AArch64 and all loads which are smaller automatically
3369 // zero-extend the upper bits. E.g.
3370 // %v(s8) = G_LOAD %p, :: (load 1)
3371 // %v2(s32) = G_ZEXT %v(s8)
3372 if (!IsSigned) {
3373 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3374 bool IsGPR =
3375 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3376 if (LoadMI && IsGPR) {
3377 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3378 unsigned BytesLoaded = MemOp->getSize().getValue();
3379 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3380 return selectCopy(I, TII, MRI, TRI, RBI);
3381 }
3382
3383 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3384 // + SUBREG_TO_REG.
3385 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3386 Register SubregToRegSrc =
3387 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3388 const Register ZReg = AArch64::WZR;
3389 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3390 .addImm(0);
3391
3392 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3393 .addImm(0)
3394 .addUse(SubregToRegSrc)
3395 .addImm(AArch64::sub_32);
3396
3397 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3398 MRI)) {
3399 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3400 return false;
3401 }
3402
3403 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3404 MRI)) {
3405 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3406 return false;
3407 }
3408
3409 I.eraseFromParent();
3410 return true;
3411 }
3412 }
3413
3414 if (DstSize == 64) {
3415 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3416 // FIXME: Can we avoid manually doing this?
3417 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3418 MRI)) {
3419 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3420 << " operand\n");
3421 return false;
3422 }
3423 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3424 {&AArch64::GPR64RegClass}, {})
3425 .addImm(0)
3426 .addUse(SrcReg)
3427 .addImm(AArch64::sub_32)
3428 .getReg(0);
3429 }
3430
3431 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3432 {DefReg}, {SrcReg})
3433 .addImm(0)
3434 .addImm(SrcSize - 1);
3435 } else if (DstSize <= 32) {
3436 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3437 {DefReg}, {SrcReg})
3438 .addImm(0)
3439 .addImm(SrcSize - 1);
3440 } else {
3441 return false;
3442 }
3443
3445 I.eraseFromParent();
3446 return true;
3447 }
3448
3449 case TargetOpcode::G_FREEZE:
3450 return selectCopy(I, TII, MRI, TRI, RBI);
3451
3452 case TargetOpcode::G_INTTOPTR:
3453 // The importer is currently unable to import pointer types since they
3454 // didn't exist in SelectionDAG.
3455 return selectCopy(I, TII, MRI, TRI, RBI);
3456
3457 case TargetOpcode::G_BITCAST:
3458 // Imported SelectionDAG rules can handle every bitcast except those that
3459 // bitcast from a type to the same type. Ideally, these shouldn't occur
3460 // but we might not run an optimizer that deletes them. The other exception
3461 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3462 // of them.
3463 return selectCopy(I, TII, MRI, TRI, RBI);
3464
3465 case TargetOpcode::G_SELECT: {
3466 auto &Sel = cast<GSelect>(I);
3467 const Register CondReg = Sel.getCondReg();
3468 const Register TReg = Sel.getTrueReg();
3469 const Register FReg = Sel.getFalseReg();
3470
3471 if (tryOptSelect(Sel))
3472 return true;
3473
3474 // Make sure to use an unused vreg instead of wzr, so that the peephole
3475 // optimizations will be able to optimize these.
3476 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3477 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3478 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3480 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3481 return false;
3482 Sel.eraseFromParent();
3483 return true;
3484 }
3485 case TargetOpcode::G_ICMP: {
3486 if (Ty.isVector())
3487 return false;
3488
3489 if (Ty != LLT::scalar(32)) {
3490 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3491 << ", expected: " << LLT::scalar(32) << '\n');
3492 return false;
3493 }
3494
3495 auto &PredOp = I.getOperand(1);
3496 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3497 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3499 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3500 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3501 /*Src2=*/AArch64::WZR, InvCC, MIB);
3502 I.eraseFromParent();
3503 return true;
3504 }
3505
3506 case TargetOpcode::G_FCMP: {
3507 CmpInst::Predicate Pred =
3508 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3509 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3510 Pred) ||
3511 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3512 return false;
3513 I.eraseFromParent();
3514 return true;
3515 }
3516 case TargetOpcode::G_VASTART:
3517 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3518 : selectVaStartAAPCS(I, MF, MRI);
3519 case TargetOpcode::G_INTRINSIC:
3520 return selectIntrinsic(I, MRI);
3521 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3522 return selectIntrinsicWithSideEffects(I, MRI);
3523 case TargetOpcode::G_IMPLICIT_DEF: {
3524 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3525 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3526 const Register DstReg = I.getOperand(0).getReg();
3527 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3528 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3529 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3530 return true;
3531 }
3532 case TargetOpcode::G_BLOCK_ADDR: {
3533 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3534 if (std::optional<uint16_t> BADisc =
3536 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3537 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3538 MIB.buildInstr(AArch64::MOVaddrPAC)
3539 .addBlockAddress(I.getOperand(1).getBlockAddress())
3541 .addReg(/*AddrDisc=*/AArch64::XZR)
3542 .addImm(*BADisc)
3543 .constrainAllUses(TII, TRI, RBI);
3544 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3545 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3546 AArch64::GPR64RegClass, MRI);
3547 I.eraseFromParent();
3548 return true;
3549 }
3551 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3552 I.eraseFromParent();
3553 return true;
3554 } else {
3555 I.setDesc(TII.get(AArch64::MOVaddrBA));
3556 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3557 I.getOperand(0).getReg())
3558 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3559 /* Offset */ 0, AArch64II::MO_PAGE)
3561 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3563 I.eraseFromParent();
3564 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3565 }
3566 }
3567 case AArch64::G_DUP: {
3568 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3569 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3570 // difficult because at RBS we may end up pessimizing the fpr case if we
3571 // decided to add an anyextend to fix this. Manual selection is the most
3572 // robust solution for now.
3573 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3574 AArch64::GPRRegBankID)
3575 return false; // We expect the fpr regbank case to be imported.
3576 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3577 if (VecTy == LLT::fixed_vector(8, 8))
3578 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3579 else if (VecTy == LLT::fixed_vector(16, 8))
3580 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3581 else if (VecTy == LLT::fixed_vector(4, 16))
3582 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3583 else if (VecTy == LLT::fixed_vector(8, 16))
3584 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3585 else
3586 return false;
3588 }
3589 case TargetOpcode::G_BUILD_VECTOR:
3590 return selectBuildVector(I, MRI);
3591 case TargetOpcode::G_MERGE_VALUES:
3592 return selectMergeValues(I, MRI);
3593 case TargetOpcode::G_UNMERGE_VALUES:
3594 return selectUnmergeValues(I, MRI);
3595 case TargetOpcode::G_SHUFFLE_VECTOR:
3596 return selectShuffleVector(I, MRI);
3597 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3598 return selectExtractElt(I, MRI);
3599 case TargetOpcode::G_CONCAT_VECTORS:
3600 return selectConcatVectors(I, MRI);
3601 case TargetOpcode::G_JUMP_TABLE:
3602 return selectJumpTable(I, MRI);
3603 case TargetOpcode::G_MEMCPY:
3604 case TargetOpcode::G_MEMCPY_INLINE:
3605 case TargetOpcode::G_MEMMOVE:
3606 case TargetOpcode::G_MEMSET:
3607 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3608 return selectMOPS(I, MRI);
3609 }
3610
3611 return false;
3612}
3613
3614bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3615 MachineIRBuilderState OldMIBState = MIB.getState();
3616 bool Success = select(I);
3617 MIB.setState(OldMIBState);
3618 return Success;
3619}
3620
3621bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3622 MachineRegisterInfo &MRI) {
3623 unsigned Mopcode;
3624 switch (GI.getOpcode()) {
3625 case TargetOpcode::G_MEMCPY:
3626 case TargetOpcode::G_MEMCPY_INLINE:
3627 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3628 break;
3629 case TargetOpcode::G_MEMMOVE:
3630 Mopcode = AArch64::MOPSMemoryMovePseudo;
3631 break;
3632 case TargetOpcode::G_MEMSET:
3633 // For tagged memset see llvm.aarch64.mops.memset.tag
3634 Mopcode = AArch64::MOPSMemorySetPseudo;
3635 break;
3636 }
3637
3638 auto &DstPtr = GI.getOperand(0);
3639 auto &SrcOrVal = GI.getOperand(1);
3640 auto &Size = GI.getOperand(2);
3641
3642 // Create copies of the registers that can be clobbered.
3643 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3644 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3645 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3646
3647 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3648 const auto &SrcValRegClass =
3649 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3650
3651 // Constrain to specific registers
3652 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3653 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3654 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3655
3656 MIB.buildCopy(DstPtrCopy, DstPtr);
3657 MIB.buildCopy(SrcValCopy, SrcOrVal);
3658 MIB.buildCopy(SizeCopy, Size);
3659
3660 // New instruction uses the copied registers because it must update them.
3661 // The defs are not used since they don't exist in G_MEM*. They are still
3662 // tied.
3663 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3664 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3665 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3666 if (IsSet) {
3667 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3668 {DstPtrCopy, SizeCopy, SrcValCopy});
3669 } else {
3670 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3671 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3672 {DstPtrCopy, SrcValCopy, SizeCopy});
3673 }
3674
3675 GI.eraseFromParent();
3676 return true;
3677}
3678
3679bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3680 MachineRegisterInfo &MRI) {
3681 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3682 Register JTAddr = I.getOperand(0).getReg();
3683 unsigned JTI = I.getOperand(1).getIndex();
3684 Register Index = I.getOperand(2).getReg();
3685
3686 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3687
3688 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3689 // sequence later, to guarantee the integrity of the intermediate values.
3690 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3692 if (STI.isTargetMachO()) {
3693 if (CM != CodeModel::Small && CM != CodeModel::Large)
3694 report_fatal_error("Unsupported code-model for hardened jump-table");
3695 } else {
3696 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3697 assert(STI.isTargetELF() &&
3698 "jump table hardening only supported on MachO/ELF");
3699 if (CM != CodeModel::Small)
3700 report_fatal_error("Unsupported code-model for hardened jump-table");
3701 }
3702
3703 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3704 MIB.buildInstr(AArch64::BR_JumpTable)
3705 .addJumpTableIndex(I.getOperand(1).getIndex());
3706 I.eraseFromParent();
3707 return true;
3708 }
3709
3710 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3711 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3712
3713 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3714 {TargetReg, ScratchReg}, {JTAddr, Index})
3715 .addJumpTableIndex(JTI);
3716 // Save the jump table info.
3717 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3718 {static_cast<int64_t>(JTI)});
3719 // Build the indirect branch.
3720 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3721 I.eraseFromParent();
3722 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3723}
3724
3725bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3726 MachineRegisterInfo &MRI) {
3727 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3728 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3729
3730 Register DstReg = I.getOperand(0).getReg();
3731 unsigned JTI = I.getOperand(1).getIndex();
3732 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3733 auto MovMI =
3734 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3735 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3737 I.eraseFromParent();
3738 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3739}
3740
3741bool AArch64InstructionSelector::selectTLSGlobalValue(
3742 MachineInstr &I, MachineRegisterInfo &MRI) {
3743 if (!STI.isTargetMachO())
3744 return false;
3745 MachineFunction &MF = *I.getParent()->getParent();
3746 MF.getFrameInfo().setAdjustsStack(true);
3747
3748 const auto &GlobalOp = I.getOperand(1);
3749 assert(GlobalOp.getOffset() == 0 &&
3750 "Shouldn't have an offset on TLS globals!");
3751 const GlobalValue &GV = *GlobalOp.getGlobal();
3752
3753 auto LoadGOT =
3754 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3755 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3756
3757 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3758 {LoadGOT.getReg(0)})
3759 .addImm(0);
3760
3761 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3762 // TLS calls preserve all registers except those that absolutely must be
3763 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3764 // silly).
3765 unsigned Opcode = getBLRCallOpcode(MF);
3766
3767 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3768 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3769 assert(Opcode == AArch64::BLR);
3770 Opcode = AArch64::BLRAAZ;
3771 }
3772
3773 MIB.buildInstr(Opcode, {}, {Load})
3774 .addUse(AArch64::X0, RegState::Implicit)
3775 .addDef(AArch64::X0, RegState::Implicit)
3776 .addRegMask(TRI.getTLSCallPreservedMask());
3777
3778 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3779 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3780 MRI);
3781 I.eraseFromParent();
3782 return true;
3783}
3784
3785MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3786 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3787 MachineIRBuilder &MIRBuilder) const {
3788 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3789
3790 auto BuildFn = [&](unsigned SubregIndex) {
3791 auto Ins =
3792 MIRBuilder
3793 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3794 .addImm(SubregIndex);
3797 return &*Ins;
3798 };
3799
3800 switch (EltSize) {
3801 case 8:
3802 return BuildFn(AArch64::bsub);
3803 case 16:
3804 return BuildFn(AArch64::hsub);
3805 case 32:
3806 return BuildFn(AArch64::ssub);
3807 case 64:
3808 return BuildFn(AArch64::dsub);
3809 default:
3810 return nullptr;
3811 }
3812}
3813
3814MachineInstr *
3815AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3816 MachineIRBuilder &MIB,
3817 MachineRegisterInfo &MRI) const {
3818 LLT DstTy = MRI.getType(DstReg);
3819 const TargetRegisterClass *RC =
3820 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3821 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3822 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3823 return nullptr;
3824 }
3825 unsigned SubReg = 0;
3826 if (!getSubRegForClass(RC, TRI, SubReg))
3827 return nullptr;
3828 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3829 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3830 << DstTy.getSizeInBits() << "\n");
3831 return nullptr;
3832 }
3833 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3834 .addReg(SrcReg, 0, SubReg);
3835 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3836 return Copy;
3837}
3838
3839bool AArch64InstructionSelector::selectMergeValues(
3840 MachineInstr &I, MachineRegisterInfo &MRI) {
3841 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3842 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3843 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3844 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3845 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3846
3847 if (I.getNumOperands() != 3)
3848 return false;
3849
3850 // Merging 2 s64s into an s128.
3851 if (DstTy == LLT::scalar(128)) {
3852 if (SrcTy.getSizeInBits() != 64)
3853 return false;
3854 Register DstReg = I.getOperand(0).getReg();
3855 Register Src1Reg = I.getOperand(1).getReg();
3856 Register Src2Reg = I.getOperand(2).getReg();
3857 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3858 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3859 /* LaneIdx */ 0, RB, MIB);
3860 if (!InsMI)
3861 return false;
3862 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3863 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3864 if (!Ins2MI)
3865 return false;
3868 I.eraseFromParent();
3869 return true;
3870 }
3871
3872 if (RB.getID() != AArch64::GPRRegBankID)
3873 return false;
3874
3875 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3876 return false;
3877
3878 auto *DstRC = &AArch64::GPR64RegClass;
3879 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3880 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3881 TII.get(TargetOpcode::SUBREG_TO_REG))
3882 .addDef(SubToRegDef)
3883 .addImm(0)
3884 .addUse(I.getOperand(1).getReg())
3885 .addImm(AArch64::sub_32);
3886 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3887 // Need to anyext the second scalar before we can use bfm
3888 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3889 TII.get(TargetOpcode::SUBREG_TO_REG))
3890 .addDef(SubToRegDef2)
3891 .addImm(0)
3892 .addUse(I.getOperand(2).getReg())
3893 .addImm(AArch64::sub_32);
3894 MachineInstr &BFM =
3895 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3896 .addDef(I.getOperand(0).getReg())
3897 .addUse(SubToRegDef)
3898 .addUse(SubToRegDef2)
3899 .addImm(32)
3900 .addImm(31);
3901 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3902 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3904 I.eraseFromParent();
3905 return true;
3906}
3907
3908static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3909 const unsigned EltSize) {
3910 // Choose a lane copy opcode and subregister based off of the size of the
3911 // vector's elements.
3912 switch (EltSize) {
3913 case 8:
3914 CopyOpc = AArch64::DUPi8;
3915 ExtractSubReg = AArch64::bsub;
3916 break;
3917 case 16:
3918 CopyOpc = AArch64::DUPi16;
3919 ExtractSubReg = AArch64::hsub;
3920 break;
3921 case 32:
3922 CopyOpc = AArch64::DUPi32;
3923 ExtractSubReg = AArch64::ssub;
3924 break;
3925 case 64:
3926 CopyOpc = AArch64::DUPi64;
3927 ExtractSubReg = AArch64::dsub;
3928 break;
3929 default:
3930 // Unknown size, bail out.
3931 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3932 return false;
3933 }
3934 return true;
3935}
3936
3937MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3938 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3939 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3940 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3941 unsigned CopyOpc = 0;
3942 unsigned ExtractSubReg = 0;
3943 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3944 LLVM_DEBUG(
3945 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3946 return nullptr;
3947 }
3948
3949 const TargetRegisterClass *DstRC =
3950 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3951 if (!DstRC) {
3952 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3953 return nullptr;
3954 }
3955
3956 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3957 const LLT &VecTy = MRI.getType(VecReg);
3958 const TargetRegisterClass *VecRC =
3959 getRegClassForTypeOnBank(VecTy, VecRB, true);
3960 if (!VecRC) {
3961 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3962 return nullptr;
3963 }
3964
3965 // The register that we're going to copy into.
3966 Register InsertReg = VecReg;
3967 if (!DstReg)
3968 DstReg = MRI.createVirtualRegister(DstRC);
3969 // If the lane index is 0, we just use a subregister COPY.
3970 if (LaneIdx == 0) {
3971 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3972 .addReg(VecReg, 0, ExtractSubReg);
3973 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3974 return &*Copy;
3975 }
3976
3977 // Lane copies require 128-bit wide registers. If we're dealing with an
3978 // unpacked vector, then we need to move up to that width. Insert an implicit
3979 // def and a subregister insert to get us there.
3980 if (VecTy.getSizeInBits() != 128) {
3981 MachineInstr *ScalarToVector = emitScalarToVector(
3982 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3983 if (!ScalarToVector)
3984 return nullptr;
3985 InsertReg = ScalarToVector->getOperand(0).getReg();
3986 }
3987
3988 MachineInstr *LaneCopyMI =
3989 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3990 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3991
3992 // Make sure that we actually constrain the initial copy.
3993 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3994 return LaneCopyMI;
3995}
3996
3997bool AArch64InstructionSelector::selectExtractElt(
3998 MachineInstr &I, MachineRegisterInfo &MRI) {
3999 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4000 "unexpected opcode!");
4001 Register DstReg = I.getOperand(0).getReg();
4002 const LLT NarrowTy = MRI.getType(DstReg);
4003 const Register SrcReg = I.getOperand(1).getReg();
4004 const LLT WideTy = MRI.getType(SrcReg);
4005 (void)WideTy;
4006 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4007 "source register size too small!");
4008 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4009
4010 // Need the lane index to determine the correct copy opcode.
4011 MachineOperand &LaneIdxOp = I.getOperand(2);
4012 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4013
4014 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4015 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4016 return false;
4017 }
4018
4019 // Find the index to extract from.
4020 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4021 if (!VRegAndVal)
4022 return false;
4023 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4024
4025
4026 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4027 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4028 LaneIdx, MIB);
4029 if (!Extract)
4030 return false;
4031
4032 I.eraseFromParent();
4033 return true;
4034}
4035
4036bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4037 MachineInstr &I, MachineRegisterInfo &MRI) {
4038 unsigned NumElts = I.getNumOperands() - 1;
4039 Register SrcReg = I.getOperand(NumElts).getReg();
4040 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4041 const LLT SrcTy = MRI.getType(SrcReg);
4042
4043 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4044 if (SrcTy.getSizeInBits() > 128) {
4045 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4046 return false;
4047 }
4048
4049 // We implement a split vector operation by treating the sub-vectors as
4050 // scalars and extracting them.
4051 const RegisterBank &DstRB =
4052 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4053 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4054 Register Dst = I.getOperand(OpIdx).getReg();
4055 MachineInstr *Extract =
4056 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4057 if (!Extract)
4058 return false;
4059 }
4060 I.eraseFromParent();
4061 return true;
4062}
4063
4064bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4065 MachineRegisterInfo &MRI) {
4066 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4067 "unexpected opcode");
4068
4069 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4070 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4071 AArch64::FPRRegBankID ||
4072 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4073 AArch64::FPRRegBankID) {
4074 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4075 "currently unsupported.\n");
4076 return false;
4077 }
4078
4079 // The last operand is the vector source register, and every other operand is
4080 // a register to unpack into.
4081 unsigned NumElts = I.getNumOperands() - 1;
4082 Register SrcReg = I.getOperand(NumElts).getReg();
4083 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4084 const LLT WideTy = MRI.getType(SrcReg);
4085 (void)WideTy;
4086 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4087 "can only unmerge from vector or s128 types!");
4088 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4089 "source register size too small!");
4090
4091 if (!NarrowTy.isScalar())
4092 return selectSplitVectorUnmerge(I, MRI);
4093
4094 // Choose a lane copy opcode and subregister based off of the size of the
4095 // vector's elements.
4096 unsigned CopyOpc = 0;
4097 unsigned ExtractSubReg = 0;
4098 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4099 return false;
4100
4101 // Set up for the lane copies.
4102 MachineBasicBlock &MBB = *I.getParent();
4103
4104 // Stores the registers we'll be copying from.
4105 SmallVector<Register, 4> InsertRegs;
4106
4107 // We'll use the first register twice, so we only need NumElts-1 registers.
4108 unsigned NumInsertRegs = NumElts - 1;
4109
4110 // If our elements fit into exactly 128 bits, then we can copy from the source
4111 // directly. Otherwise, we need to do a bit of setup with some subregister
4112 // inserts.
4113 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4114 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4115 } else {
4116 // No. We have to perform subregister inserts. For each insert, create an
4117 // implicit def and a subregister insert, and save the register we create.
4118 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4119 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4120 *RBI.getRegBank(SrcReg, MRI, TRI));
4121 unsigned SubReg = 0;
4122 bool Found = getSubRegForClass(RC, TRI, SubReg);
4123 (void)Found;
4124 assert(Found && "expected to find last operand's subeg idx");
4125 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4126 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4127 MachineInstr &ImpDefMI =
4128 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4129 ImpDefReg);
4130
4131 // Now, create the subregister insert from SrcReg.
4132 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4133 MachineInstr &InsMI =
4134 *BuildMI(MBB, I, I.getDebugLoc(),
4135 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4136 .addUse(ImpDefReg)
4137 .addUse(SrcReg)
4138 .addImm(SubReg);
4139
4140 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4142
4143 // Save the register so that we can copy from it after.
4144 InsertRegs.push_back(InsertReg);
4145 }
4146 }
4147
4148 // Now that we've created any necessary subregister inserts, we can
4149 // create the copies.
4150 //
4151 // Perform the first copy separately as a subregister copy.
4152 Register CopyTo = I.getOperand(0).getReg();
4153 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4154 .addReg(InsertRegs[0], 0, ExtractSubReg);
4155 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4156
4157 // Now, perform the remaining copies as vector lane copies.
4158 unsigned LaneIdx = 1;
4159 for (Register InsReg : InsertRegs) {
4160 Register CopyTo = I.getOperand(LaneIdx).getReg();
4161 MachineInstr &CopyInst =
4162 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4163 .addUse(InsReg)
4164 .addImm(LaneIdx);
4165 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4166 ++LaneIdx;
4167 }
4168
4169 // Separately constrain the first copy's destination. Because of the
4170 // limitation in constrainOperandRegClass, we can't guarantee that this will
4171 // actually be constrained. So, do it ourselves using the second operand.
4172 const TargetRegisterClass *RC =
4173 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4174 if (!RC) {
4175 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4176 return false;
4177 }
4178
4179 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4180 I.eraseFromParent();
4181 return true;
4182}
4183
4184bool AArch64InstructionSelector::selectConcatVectors(
4185 MachineInstr &I, MachineRegisterInfo &MRI) {
4186 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4187 "Unexpected opcode");
4188 Register Dst = I.getOperand(0).getReg();
4189 Register Op1 = I.getOperand(1).getReg();
4190 Register Op2 = I.getOperand(2).getReg();
4191 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4192 if (!ConcatMI)
4193 return false;
4194 I.eraseFromParent();
4195 return true;
4196}
4197
4198unsigned
4199AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4200 MachineFunction &MF) const {
4201 Type *CPTy = CPVal->getType();
4202 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4203
4204 MachineConstantPool *MCP = MF.getConstantPool();
4205 return MCP->getConstantPoolIndex(CPVal, Alignment);
4206}
4207
4208MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4209 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4210 const TargetRegisterClass *RC;
4211 unsigned Opc;
4212 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4213 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4214 switch (Size) {
4215 case 16:
4216 RC = &AArch64::FPR128RegClass;
4217 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4218 break;
4219 case 8:
4220 RC = &AArch64::FPR64RegClass;
4221 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4222 break;
4223 case 4:
4224 RC = &AArch64::FPR32RegClass;
4225 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4226 break;
4227 case 2:
4228 RC = &AArch64::FPR16RegClass;
4229 Opc = AArch64::LDRHui;
4230 break;
4231 default:
4232 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4233 << *CPVal->getType());
4234 return nullptr;
4235 }
4236
4237 MachineInstr *LoadMI = nullptr;
4238 auto &MF = MIRBuilder.getMF();
4239 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4240 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4241 // Use load(literal) for tiny code model.
4242 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4243 } else {
4244 auto Adrp =
4245 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4246 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4247
4248 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4249 .addConstantPoolIndex(
4251
4253 }
4254
4255 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4256 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4258 Size, Align(Size)));
4260 return LoadMI;
4261}
4262
4263/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4264/// size and RB.
4265static std::pair<unsigned, unsigned>
4266getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4267 unsigned Opc, SubregIdx;
4268 if (RB.getID() == AArch64::GPRRegBankID) {
4269 if (EltSize == 8) {
4270 Opc = AArch64::INSvi8gpr;
4271 SubregIdx = AArch64::bsub;
4272 } else if (EltSize == 16) {
4273 Opc = AArch64::INSvi16gpr;
4274 SubregIdx = AArch64::ssub;
4275 } else if (EltSize == 32) {
4276 Opc = AArch64::INSvi32gpr;
4277 SubregIdx = AArch64::ssub;
4278 } else if (EltSize == 64) {
4279 Opc = AArch64::INSvi64gpr;
4280 SubregIdx = AArch64::dsub;
4281 } else {
4282 llvm_unreachable("invalid elt size!");
4283 }
4284 } else {
4285 if (EltSize == 8) {
4286 Opc = AArch64::INSvi8lane;
4287 SubregIdx = AArch64::bsub;
4288 } else if (EltSize == 16) {
4289 Opc = AArch64::INSvi16lane;
4290 SubregIdx = AArch64::hsub;
4291 } else if (EltSize == 32) {
4292 Opc = AArch64::INSvi32lane;
4293 SubregIdx = AArch64::ssub;
4294 } else if (EltSize == 64) {
4295 Opc = AArch64::INSvi64lane;
4296 SubregIdx = AArch64::dsub;
4297 } else {
4298 llvm_unreachable("invalid elt size!");
4299 }
4300 }
4301 return std::make_pair(Opc, SubregIdx);
4302}
4303
4304MachineInstr *AArch64InstructionSelector::emitInstr(
4305 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4306 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4307 const ComplexRendererFns &RenderFns) const {
4308 assert(Opcode && "Expected an opcode?");
4309 assert(!isPreISelGenericOpcode(Opcode) &&
4310 "Function should only be used to produce selected instructions!");
4311 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4312 if (RenderFns)
4313 for (auto &Fn : *RenderFns)
4314 Fn(MI);
4316 return &*MI;
4317}
4318
4319MachineInstr *AArch64InstructionSelector::emitAddSub(
4320 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4321 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4322 MachineIRBuilder &MIRBuilder) const {
4323 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4324 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4325 auto Ty = MRI.getType(LHS.getReg());
4326 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4327 unsigned Size = Ty.getSizeInBits();
4328 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4329 bool Is32Bit = Size == 32;
4330
4331 // INSTRri form with positive arithmetic immediate.
4332 if (auto Fns = selectArithImmed(RHS))
4333 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4334 MIRBuilder, Fns);
4335
4336 // INSTRri form with negative arithmetic immediate.
4337 if (auto Fns = selectNegArithImmed(RHS))
4338 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4339 MIRBuilder, Fns);
4340
4341 // INSTRrx form.
4342 if (auto Fns = selectArithExtendedRegister(RHS))
4343 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4344 MIRBuilder, Fns);
4345
4346 // INSTRrs form.
4347 if (auto Fns = selectShiftedRegister(RHS))
4348 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4349 MIRBuilder, Fns);
4350 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4351 MIRBuilder);
4352}
4353
4354MachineInstr *
4355AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4356 MachineOperand &RHS,
4357 MachineIRBuilder &MIRBuilder) const {
4358 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4359 {{AArch64::ADDXri, AArch64::ADDWri},
4360 {AArch64::ADDXrs, AArch64::ADDWrs},
4361 {AArch64::ADDXrr, AArch64::ADDWrr},
4362 {AArch64::SUBXri, AArch64::SUBWri},
4363 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4364 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4365}
4366
4367MachineInstr *
4368AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4369 MachineOperand &RHS,
4370 MachineIRBuilder &MIRBuilder) const {
4371 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4372 {{AArch64::ADDSXri, AArch64::ADDSWri},
4373 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4374 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4375 {AArch64::SUBSXri, AArch64::SUBSWri},
4376 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4377 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4378}
4379
4380MachineInstr *
4381AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4382 MachineOperand &RHS,
4383 MachineIRBuilder &MIRBuilder) const {
4384 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4385 {{AArch64::SUBSXri, AArch64::SUBSWri},
4386 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4387 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4388 {AArch64::ADDSXri, AArch64::ADDSWri},
4389 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4390 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4391}
4392
4393MachineInstr *
4394AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4395 MachineOperand &RHS,
4396 MachineIRBuilder &MIRBuilder) const {
4397 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4398 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4399 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4400 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4401 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4402}
4403
4404MachineInstr *
4405AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4406 MachineOperand &RHS,
4407 MachineIRBuilder &MIRBuilder) const {
4408 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4409 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4410 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4411 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4412 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4413}
4414
4415MachineInstr *
4416AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4417 MachineIRBuilder &MIRBuilder) const {
4418 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4419 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4420 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4421 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4422}
4423
4424MachineInstr *
4425AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4426 MachineIRBuilder &MIRBuilder) const {
4427 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4428 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4429 LLT Ty = MRI.getType(LHS.getReg());
4430 unsigned RegSize = Ty.getSizeInBits();
4431 bool Is32Bit = (RegSize == 32);
4432 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4433 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4434 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4435 // ANDS needs a logical immediate for its immediate form. Check if we can
4436 // fold one in.
4437 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4438 int64_t Imm = ValAndVReg->Value.getSExtValue();
4439
4441 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4444 return &*TstMI;
4445 }
4446 }
4447
4448 if (auto Fns = selectLogicalShiftedRegister(RHS))
4449 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4450 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4451}
4452
4453MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4454 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4455 MachineIRBuilder &MIRBuilder) const {
4456 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4457 assert(Predicate.isPredicate() && "Expected predicate?");
4458 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4459 LLT CmpTy = MRI.getType(LHS.getReg());
4460 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4461 unsigned Size = CmpTy.getSizeInBits();
4462 (void)Size;
4463 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4464 // Fold the compare into a cmn or tst if possible.
4465 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4466 return FoldCmp;
4467 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4468 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4469}
4470
4471MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4472 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4473 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4474#ifndef NDEBUG
4475 LLT Ty = MRI.getType(Dst);
4476 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4477 "Expected a 32-bit scalar register?");
4478#endif
4479 const Register ZReg = AArch64::WZR;
4480 AArch64CC::CondCode CC1, CC2;
4481 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4482 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4483 if (CC2 == AArch64CC::AL)
4484 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4485 MIRBuilder);
4486 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4487 Register Def1Reg = MRI.createVirtualRegister(RC);
4488 Register Def2Reg = MRI.createVirtualRegister(RC);
4489 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4490 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4491 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4492 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4494 return &*OrMI;
4495}
4496
4497MachineInstr *AArch64InstructionSelector::emitFPCompare(
4498 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4499 std::optional<CmpInst::Predicate> Pred) const {
4500 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4501 LLT Ty = MRI.getType(LHS);
4502 if (Ty.isVector())
4503 return nullptr;
4504 unsigned OpSize = Ty.getSizeInBits();
4505 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4506
4507 // If this is a compare against +0.0, then we don't have
4508 // to explicitly materialize a constant.
4509 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4510 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4511
4512 auto IsEqualityPred = [](CmpInst::Predicate P) {
4513 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4515 };
4516 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4517 // Try commutating the operands.
4518 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4519 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4520 ShouldUseImm = true;
4521 std::swap(LHS, RHS);
4522 }
4523 }
4524 unsigned CmpOpcTbl[2][3] = {
4525 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4526 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4527 unsigned CmpOpc =
4528 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4529
4530 // Partially build the compare. Decide if we need to add a use for the
4531 // third operand based off whether or not we're comparing against 0.0.
4532 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4534 if (!ShouldUseImm)
4535 CmpMI.addUse(RHS);
4537 return &*CmpMI;
4538}
4539
4540MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4541 std::optional<Register> Dst, Register Op1, Register Op2,
4542 MachineIRBuilder &MIRBuilder) const {
4543 // We implement a vector concat by:
4544 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4545 // 2. Insert the upper vector into the destination's upper element
4546 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4547 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4548
4549 const LLT Op1Ty = MRI.getType(Op1);
4550 const LLT Op2Ty = MRI.getType(Op2);
4551
4552 if (Op1Ty != Op2Ty) {
4553 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4554 return nullptr;
4555 }
4556 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4557
4558 if (Op1Ty.getSizeInBits() >= 128) {
4559 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4560 return nullptr;
4561 }
4562
4563 // At the moment we just support 64 bit vector concats.
4564 if (Op1Ty.getSizeInBits() != 64) {
4565 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4566 return nullptr;
4567 }
4568
4569 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4570 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4571 const TargetRegisterClass *DstRC =
4572 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4573
4574 MachineInstr *WidenedOp1 =
4575 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4576 MachineInstr *WidenedOp2 =
4577 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4578 if (!WidenedOp1 || !WidenedOp2) {
4579 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4580 return nullptr;
4581 }
4582
4583 // Now do the insert of the upper element.
4584 unsigned InsertOpc, InsSubRegIdx;
4585 std::tie(InsertOpc, InsSubRegIdx) =
4586 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4587
4588 if (!Dst)
4589 Dst = MRI.createVirtualRegister(DstRC);
4590 auto InsElt =
4591 MIRBuilder
4592 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4593 .addImm(1) /* Lane index */
4594 .addUse(WidenedOp2->getOperand(0).getReg())
4595 .addImm(0);
4597 return &*InsElt;
4598}
4599
4600MachineInstr *
4601AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4602 Register Src2, AArch64CC::CondCode Pred,
4603 MachineIRBuilder &MIRBuilder) const {
4604 auto &MRI = *MIRBuilder.getMRI();
4605 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4606 // If we used a register class, then this won't necessarily have an LLT.
4607 // Compute the size based off whether or not we have a class or bank.
4608 unsigned Size;
4609 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4610 Size = TRI.getRegSizeInBits(*RC);
4611 else
4612 Size = MRI.getType(Dst).getSizeInBits();
4613 // Some opcodes use s1.
4614 assert(Size <= 64 && "Expected 64 bits or less only!");
4615 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4616 unsigned Opc = OpcTable[Size == 64];
4617 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4619 return &*CSINC;
4620}
4621
4622MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4623 Register CarryReg) {
4624 MachineRegisterInfo *MRI = MIB.getMRI();
4625 unsigned Opcode = I.getOpcode();
4626
4627 // If the instruction is a SUB, we need to negate the carry,
4628 // because borrowing is indicated by carry-flag == 0.
4629 bool NeedsNegatedCarry =
4630 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4631
4632 // If the previous instruction will already produce the correct carry, do not
4633 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4634 // generated during legalization of wide add/sub. This optimization depends on
4635 // these sequences not being interrupted by other instructions.
4636 // We have to select the previous instruction before the carry-using
4637 // instruction is deleted by the calling function, otherwise the previous
4638 // instruction might become dead and would get deleted.
4639 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4640 if (SrcMI == I.getPrevNode()) {
4641 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4642 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4643 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4644 CarrySrcMI->isUnsigned() &&
4645 CarrySrcMI->getCarryOutReg() == CarryReg &&
4646 selectAndRestoreState(*SrcMI))
4647 return nullptr;
4648 }
4649 }
4650
4651 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4652
4653 if (NeedsNegatedCarry) {
4654 // (0 - Carry) sets !C in NZCV when Carry == 1
4655 Register ZReg = AArch64::WZR;
4656 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4657 }
4658
4659 // (Carry - 1) sets !C in NZCV when Carry == 0
4660 auto Fns = select12BitValueWithLeftShift(1);
4661 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4662}
4663
4664bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4665 MachineRegisterInfo &MRI) {
4666 auto &CarryMI = cast<GAddSubCarryOut>(I);
4667
4668 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4669 // Set NZCV carry according to carry-in VReg
4670 emitCarryIn(I, CarryInMI->getCarryInReg());
4671 }
4672
4673 // Emit the operation and get the correct condition code.
4674 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4675 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4676
4677 Register CarryOutReg = CarryMI.getCarryOutReg();
4678
4679 // Don't convert carry-out to VReg if it is never used
4680 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4681 // Now, put the overflow result in the register given by the first operand
4682 // to the overflow op. CSINC increments the result when the predicate is
4683 // false, so to get the increment when it's true, we need to use the
4684 // inverse. In this case, we want to increment when carry is set.
4685 Register ZReg = AArch64::WZR;
4686 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4687 getInvertedCondCode(OpAndCC.second), MIB);
4688 }
4689
4690 I.eraseFromParent();
4691 return true;
4692}
4693
4694std::pair<MachineInstr *, AArch64CC::CondCode>
4695AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4696 MachineOperand &LHS,
4697 MachineOperand &RHS,
4698 MachineIRBuilder &MIRBuilder) const {
4699 switch (Opcode) {
4700 default:
4701 llvm_unreachable("Unexpected opcode!");
4702 case TargetOpcode::G_SADDO:
4703 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4704 case TargetOpcode::G_UADDO:
4705 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4706 case TargetOpcode::G_SSUBO:
4707 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4708 case TargetOpcode::G_USUBO:
4709 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4710 case TargetOpcode::G_SADDE:
4711 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4712 case TargetOpcode::G_UADDE:
4713 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4714 case TargetOpcode::G_SSUBE:
4715 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4716 case TargetOpcode::G_USUBE:
4717 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4718 }
4719}
4720
4721/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4722/// expressed as a conjunction.
4723/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4724/// changing the conditions on the CMP tests.
4725/// (this means we can call emitConjunctionRec() with
4726/// Negate==true on this sub-tree)
4727/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4728/// cannot do the negation naturally. We are required to
4729/// emit the subtree first in this case.
4730/// \param WillNegate Is true if are called when the result of this
4731/// subexpression must be negated. This happens when the
4732/// outer expression is an OR. We can use this fact to know
4733/// that we have a double negation (or (or ...) ...) that
4734/// can be implemented for free.
4735static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4736 bool WillNegate, MachineRegisterInfo &MRI,
4737 unsigned Depth = 0) {
4738 if (!MRI.hasOneNonDBGUse(Val))
4739 return false;
4740 MachineInstr *ValDef = MRI.getVRegDef(Val);
4741 unsigned Opcode = ValDef->getOpcode();
4742 if (isa<GAnyCmp>(ValDef)) {
4743 CanNegate = true;
4744 MustBeFirst = false;
4745 return true;
4746 }
4747 // Protect against exponential runtime and stack overflow.
4748 if (Depth > 6)
4749 return false;
4750 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4751 bool IsOR = Opcode == TargetOpcode::G_OR;
4752 Register O0 = ValDef->getOperand(1).getReg();
4753 Register O1 = ValDef->getOperand(2).getReg();
4754 bool CanNegateL;
4755 bool MustBeFirstL;
4756 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4757 return false;
4758 bool CanNegateR;
4759 bool MustBeFirstR;
4760 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4761 return false;
4762
4763 if (MustBeFirstL && MustBeFirstR)
4764 return false;
4765
4766 if (IsOR) {
4767 // For an OR expression we need to be able to naturally negate at least
4768 // one side or we cannot do the transformation at all.
4769 if (!CanNegateL && !CanNegateR)
4770 return false;
4771 // If we the result of the OR will be negated and we can naturally negate
4772 // the leaves, then this sub-tree as a whole negates naturally.
4773 CanNegate = WillNegate && CanNegateL && CanNegateR;
4774 // If we cannot naturally negate the whole sub-tree, then this must be
4775 // emitted first.
4776 MustBeFirst = !CanNegate;
4777 } else {
4778 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4779 // We cannot naturally negate an AND operation.
4780 CanNegate = false;
4781 MustBeFirst = MustBeFirstL || MustBeFirstR;
4782 }
4783 return true;
4784 }
4785 return false;
4786}
4787
4788MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4791 MachineIRBuilder &MIB) const {
4792 auto &MRI = *MIB.getMRI();
4793 LLT OpTy = MRI.getType(LHS);
4794 unsigned CCmpOpc;
4795 std::optional<ValueAndVReg> C;
4796 if (CmpInst::isIntPredicate(CC)) {
4797 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4799 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4800 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4801 else if (C->Value.ule(31))
4802 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4803 else
4804 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4805 } else {
4806 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4807 OpTy.getSizeInBits() == 64);
4808 switch (OpTy.getSizeInBits()) {
4809 case 16:
4810 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4811 CCmpOpc = AArch64::FCCMPHrr;
4812 break;
4813 case 32:
4814 CCmpOpc = AArch64::FCCMPSrr;
4815 break;
4816 case 64:
4817 CCmpOpc = AArch64::FCCMPDrr;
4818 break;
4819 default:
4820 return nullptr;
4821 }
4822 }
4824 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4825 auto CCmp =
4826 MIB.buildInstr(CCmpOpc, {}, {LHS});
4827 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4828 CCmp.addImm(C->Value.getZExtValue());
4829 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4830 CCmp.addImm(C->Value.abs().getZExtValue());
4831 else
4832 CCmp.addReg(RHS);
4833 CCmp.addImm(NZCV).addImm(Predicate);
4835 return &*CCmp;
4836}
4837
4838MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4839 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4840 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4841 // We're at a tree leaf, produce a conditional comparison operation.
4842 auto &MRI = *MIB.getMRI();
4843 MachineInstr *ValDef = MRI.getVRegDef(Val);
4844 unsigned Opcode = ValDef->getOpcode();
4845 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4846 Register LHS = Cmp->getLHSReg();
4847 Register RHS = Cmp->getRHSReg();
4848 CmpInst::Predicate CC = Cmp->getCond();
4849 if (Negate)
4851 if (isa<GICmp>(Cmp)) {
4852 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4853 } else {
4854 // Handle special FP cases.
4855 AArch64CC::CondCode ExtraCC;
4856 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4857 // Some floating point conditions can't be tested with a single condition
4858 // code. Construct an additional comparison in this case.
4859 if (ExtraCC != AArch64CC::AL) {
4860 MachineInstr *ExtraCmp;
4861 if (!CCOp)
4862 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4863 else
4864 ExtraCmp =
4865 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4866 CCOp = ExtraCmp->getOperand(0).getReg();
4867 Predicate = ExtraCC;
4868 }
4869 }
4870
4871 // Produce a normal comparison if we are first in the chain
4872 if (!CCOp) {
4873 auto Dst = MRI.cloneVirtualRegister(LHS);
4874 if (isa<GICmp>(Cmp))
4875 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4876 return emitFPCompare(Cmp->getOperand(2).getReg(),
4877 Cmp->getOperand(3).getReg(), MIB);
4878 }
4879 // Otherwise produce a ccmp.
4880 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4881 }
4882 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4883
4884 bool IsOR = Opcode == TargetOpcode::G_OR;
4885
4886 Register LHS = ValDef->getOperand(1).getReg();
4887 bool CanNegateL;
4888 bool MustBeFirstL;
4889 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4890 assert(ValidL && "Valid conjunction/disjunction tree");
4891 (void)ValidL;
4892
4893 Register RHS = ValDef->getOperand(2).getReg();
4894 bool CanNegateR;
4895 bool MustBeFirstR;
4896 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4897 assert(ValidR && "Valid conjunction/disjunction tree");
4898 (void)ValidR;
4899
4900 // Swap sub-tree that must come first to the right side.
4901 if (MustBeFirstL) {
4902 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4903 std::swap(LHS, RHS);
4904 std::swap(CanNegateL, CanNegateR);
4905 std::swap(MustBeFirstL, MustBeFirstR);
4906 }
4907
4908 bool NegateR;
4909 bool NegateAfterR;
4910 bool NegateL;
4911 bool NegateAfterAll;
4912 if (Opcode == TargetOpcode::G_OR) {
4913 // Swap the sub-tree that we can negate naturally to the left.
4914 if (!CanNegateL) {
4915 assert(CanNegateR && "at least one side must be negatable");
4916 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4917 assert(!Negate);
4918 std::swap(LHS, RHS);
4919 NegateR = false;
4920 NegateAfterR = true;
4921 } else {
4922 // Negate the left sub-tree if possible, otherwise negate the result.
4923 NegateR = CanNegateR;
4924 NegateAfterR = !CanNegateR;
4925 }
4926 NegateL = true;
4927 NegateAfterAll = !Negate;
4928 } else {
4929 assert(Opcode == TargetOpcode::G_AND &&
4930 "Valid conjunction/disjunction tree");
4931 assert(!Negate && "Valid conjunction/disjunction tree");
4932
4933 NegateL = false;
4934 NegateR = false;
4935 NegateAfterR = false;
4936 NegateAfterAll = false;
4937 }
4938
4939 // Emit sub-trees.
4940 AArch64CC::CondCode RHSCC;
4941 MachineInstr *CmpR =
4942 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4943 if (NegateAfterR)
4944 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4945 MachineInstr *CmpL = emitConjunctionRec(
4946 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4947 if (NegateAfterAll)
4948 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4949 return CmpL;
4950}
4951
4952MachineInstr *AArch64InstructionSelector::emitConjunction(
4953 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4954 bool DummyCanNegate;
4955 bool DummyMustBeFirst;
4956 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4957 *MIB.getMRI()))
4958 return nullptr;
4959 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4960}
4961
4962bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4963 MachineInstr &CondMI) {
4964 AArch64CC::CondCode AArch64CC;
4965 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4966 if (!ConjMI)
4967 return false;
4968
4969 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4970 SelI.eraseFromParent();
4971 return true;
4972}
4973
4974bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4975 MachineRegisterInfo &MRI = *MIB.getMRI();
4976 // We want to recognize this pattern:
4977 //
4978 // $z = G_FCMP pred, $x, $y
4979 // ...
4980 // $w = G_SELECT $z, $a, $b
4981 //
4982 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4983 // some copies/truncs in between.)
4984 //
4985 // If we see this, then we can emit something like this:
4986 //
4987 // fcmp $x, $y
4988 // fcsel $w, $a, $b, pred
4989 //
4990 // Rather than emitting both of the rather long sequences in the standard
4991 // G_FCMP/G_SELECT select methods.
4992
4993 // First, check if the condition is defined by a compare.
4994 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4995
4996 // We can only fold if all of the defs have one use.
4997 Register CondDefReg = CondDef->getOperand(0).getReg();
4998 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4999 // Unless it's another select.
5000 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5001 if (CondDef == &UI)
5002 continue;
5003 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5004 return false;
5005 }
5006 }
5007
5008 // Is the condition defined by a compare?
5009 unsigned CondOpc = CondDef->getOpcode();
5010 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5011 if (tryOptSelectConjunction(I, *CondDef))
5012 return true;
5013 return false;
5014 }
5015
5017 if (CondOpc == TargetOpcode::G_ICMP) {
5018 auto &PredOp = CondDef->getOperand(1);
5019 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5020 MIB);
5021 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5022 CondCode =
5023 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5024 } else {
5025 // Get the condition code for the select.
5026 auto Pred =
5027 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5028 AArch64CC::CondCode CondCode2;
5029 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5030
5031 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5032 // instructions to emit the comparison.
5033 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5034 // unnecessary.
5035 if (CondCode2 != AArch64CC::AL)
5036 return false;
5037
5038 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5039 CondDef->getOperand(3).getReg(), MIB)) {
5040 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5041 return false;
5042 }
5043 }
5044
5045 // Emit the select.
5046 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5047 I.getOperand(3).getReg(), CondCode, MIB);
5048 I.eraseFromParent();
5049 return true;
5050}
5051
5052MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5053 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5054 MachineIRBuilder &MIRBuilder) const {
5055 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5056 "Unexpected MachineOperand");
5057 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5058 // We want to find this sort of thing:
5059 // x = G_SUB 0, y
5060 // G_ICMP z, x
5061 //
5062 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5063 // e.g:
5064 //
5065 // cmn z, y
5066
5067 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5068 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5069 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5070 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5071
5072 // Given this:
5073 //
5074 // x = G_SUB 0, y
5075 // G_ICMP z, x
5076 //
5077 // Produce this:
5078 //
5079 // cmn z, y
5080 if (isCMN(RHSDef, P, MRI))
5081 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5082
5083 // Same idea here, but with the LHS of the compare instead:
5084 //
5085 // Given this:
5086 //
5087 // x = G_SUB 0, y
5088 // G_ICMP x, z
5089 //
5090 // Produce this:
5091 //
5092 // cmn y, z
5093 //
5094 // But be careful! We need to swap the predicate!
5095 if (isCMN(LHSDef, P, MRI)) {
5096 if (!CmpInst::isEquality(P)) {
5099 }
5100 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5101 }
5102
5103 // Given this:
5104 //
5105 // z = G_AND x, y
5106 // G_ICMP z, 0
5107 //
5108 // Produce this if the compare is signed:
5109 //
5110 // tst x, y
5111 if (!CmpInst::isUnsigned(P) && LHSDef &&
5112 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5113 // Make sure that the RHS is 0.
5114 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5115 if (!ValAndVReg || ValAndVReg->Value != 0)
5116 return nullptr;
5117
5118 return emitTST(LHSDef->getOperand(1),
5119 LHSDef->getOperand(2), MIRBuilder);
5120 }
5121
5122 return nullptr;
5123}
5124
5125bool AArch64InstructionSelector::selectShuffleVector(
5126 MachineInstr &I, MachineRegisterInfo &MRI) {
5127 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5128 Register Src1Reg = I.getOperand(1).getReg();
5129 Register Src2Reg = I.getOperand(2).getReg();
5130 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5131
5132 MachineBasicBlock &MBB = *I.getParent();
5133 MachineFunction &MF = *MBB.getParent();
5134 LLVMContext &Ctx = MF.getFunction().getContext();
5135
5136 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5137
5139 for (int Val : Mask) {
5140 // For now, any undef indexes we'll just assume to be 0. This should be
5141 // optimized in future, e.g. to select DUP etc.
5142 Val = Val < 0 ? 0 : Val;
5143 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5144 unsigned Offset = Byte + Val * BytesPerElt;
5145 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5146 }
5147 }
5148
5149 // Use a constant pool to load the index vector for TBL.
5150 Constant *CPVal = ConstantVector::get(CstIdxs);
5151 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5152 if (!IndexLoad) {
5153 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5154 return false;
5155 }
5156
5157 if (DstTy.getSizeInBits() != 128) {
5158 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5159 // This case can be done with TBL1.
5160 MachineInstr *Concat =
5161 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5162 if (!Concat) {
5163 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5164 return false;
5165 }
5166
5167 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5168 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5169 IndexLoad->getOperand(0).getReg(), MIB);
5170
5171 auto TBL1 = MIB.buildInstr(
5172 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5173 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5175
5176 auto Copy =
5177 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5178 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5179 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5180 I.eraseFromParent();
5181 return true;
5182 }
5183
5184 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5185 // Q registers for regalloc.
5186 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5187 auto RegSeq = createQTuple(Regs, MIB);
5188 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5189 {RegSeq, IndexLoad->getOperand(0)});
5191 I.eraseFromParent();
5192 return true;
5193}
5194
5195MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5196 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5197 unsigned LaneIdx, const RegisterBank &RB,
5198 MachineIRBuilder &MIRBuilder) const {
5199 MachineInstr *InsElt = nullptr;
5200 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5201 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5202
5203 // Create a register to define with the insert if one wasn't passed in.
5204 if (!DstReg)
5205 DstReg = MRI.createVirtualRegister(DstRC);
5206
5207 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5208 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5209
5210 if (RB.getID() == AArch64::FPRRegBankID) {
5211 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5212 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5213 .addImm(LaneIdx)
5214 .addUse(InsSub->getOperand(0).getReg())
5215 .addImm(0);
5216 } else {
5217 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5218 .addImm(LaneIdx)
5219 .addUse(EltReg);
5220 }
5221
5223 return InsElt;
5224}
5225
5226bool AArch64InstructionSelector::selectUSMovFromExtend(
5227 MachineInstr &MI, MachineRegisterInfo &MRI) {
5228 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5229 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5230 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5231 return false;
5232 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5233 const Register DefReg = MI.getOperand(0).getReg();
5234 const LLT DstTy = MRI.getType(DefReg);
5235 unsigned DstSize = DstTy.getSizeInBits();
5236
5237 if (DstSize != 32 && DstSize != 64)
5238 return false;
5239
5240 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5241 MI.getOperand(1).getReg(), MRI);
5242 int64_t Lane;
5243 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5244 return false;
5245 Register Src0 = Extract->getOperand(1).getReg();
5246
5247 const LLT VecTy = MRI.getType(Src0);
5248 if (VecTy.isScalableVector())
5249 return false;
5250
5251 if (VecTy.getSizeInBits() != 128) {
5252 const MachineInstr *ScalarToVector = emitScalarToVector(
5253 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5254 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5255 Src0 = ScalarToVector->getOperand(0).getReg();
5256 }
5257
5258 unsigned Opcode;
5259 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5260 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5261 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5262 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5263 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5264 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5265 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5266 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5267 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5268 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5269 else
5270 llvm_unreachable("Unexpected type combo for S/UMov!");
5271
5272 // We may need to generate one of these, depending on the type and sign of the
5273 // input:
5274 // DstReg = SMOV Src0, Lane;
5275 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5276 MachineInstr *ExtI = nullptr;
5277 if (DstSize == 64 && !IsSigned) {
5278 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5279 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5280 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5281 .addImm(0)
5282 .addUse(NewReg)
5283 .addImm(AArch64::sub_32);
5284 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5285 } else
5286 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5287
5289 MI.eraseFromParent();
5290 return true;
5291}
5292
5293MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5294 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5295 unsigned int Op;
5296 if (DstSize == 128) {
5297 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5298 return nullptr;
5299 Op = AArch64::MOVIv16b_ns;
5300 } else {
5301 Op = AArch64::MOVIv8b_ns;
5302 }
5303
5304 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5305
5308 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5310 return &*Mov;
5311 }
5312 return nullptr;
5313}
5314
5315MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5316 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5317 bool Inv) {
5318
5319 unsigned int Op;
5320 if (DstSize == 128) {
5321 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5322 return nullptr;
5323 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5324 } else {
5325 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5326 }
5327
5328 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5329 uint64_t Shift;
5330
5333 Shift = 0;
5334 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5336 Shift = 8;
5337 } else
5338 return nullptr;
5339
5340 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5342 return &*Mov;
5343}
5344
5345MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5346 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5347 bool Inv) {
5348
5349 unsigned int Op;
5350 if (DstSize == 128) {
5351 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5352 return nullptr;
5353 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5354 } else {
5355 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5356 }
5357
5358 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5359 uint64_t Shift;
5360
5363 Shift = 0;
5364 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5366 Shift = 8;
5367 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5369 Shift = 16;
5370 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5372 Shift = 24;
5373 } else
5374 return nullptr;
5375
5376 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5378 return &*Mov;
5379}
5380
5381MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5382 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5383
5384 unsigned int Op;
5385 if (DstSize == 128) {
5386 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5387 return nullptr;
5388 Op = AArch64::MOVIv2d_ns;
5389 } else {
5390 Op = AArch64::MOVID;
5391 }
5392
5393 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5396 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5398 return &*Mov;
5399 }
5400 return nullptr;
5401}
5402
5403MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5404 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5405 bool Inv) {
5406
5407 unsigned int Op;
5408 if (DstSize == 128) {
5409 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5410 return nullptr;
5411 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5412 } else {
5413 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5414 }
5415
5416 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5417 uint64_t Shift;
5418
5421 Shift = 264;
5422 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5424 Shift = 272;
5425 } else
5426 return nullptr;
5427
5428 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5430 return &*Mov;
5431}
5432
5433MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5434 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5435
5436 unsigned int Op;
5437 bool IsWide = false;
5438 if (DstSize == 128) {
5439 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5440 return nullptr;
5441 Op = AArch64::FMOVv4f32_ns;
5442 IsWide = true;
5443 } else {
5444 Op = AArch64::FMOVv2f32_ns;
5445 }
5446
5447 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5448
5451 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5453 Op = AArch64::FMOVv2f64_ns;
5454 } else
5455 return nullptr;
5456
5457 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5459 return &*Mov;
5460}
5461
5462bool AArch64InstructionSelector::selectIndexedExtLoad(
5463 MachineInstr &MI, MachineRegisterInfo &MRI) {
5464 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5465 Register Dst = ExtLd.getDstReg();
5466 Register WriteBack = ExtLd.getWritebackReg();
5467 Register Base = ExtLd.getBaseReg();
5468 Register Offset = ExtLd.getOffsetReg();
5469 LLT Ty = MRI.getType(Dst);
5470 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5471 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5472 bool IsPre = ExtLd.isPre();
5473 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5474 unsigned InsertIntoSubReg = 0;
5475 bool IsDst64 = Ty.getSizeInBits() == 64;
5476
5477 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5478 // long as they are scalar.
5479 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5480 if ((IsSExt && IsFPR) || Ty.isVector())
5481 return false;
5482
5483 unsigned Opc = 0;
5484 LLT NewLdDstTy;
5485 LLT s32 = LLT::scalar(32);
5486 LLT s64 = LLT::scalar(64);
5487
5488 if (MemSizeBits == 8) {
5489 if (IsSExt) {
5490 if (IsDst64)
5491 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5492 else
5493 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5494 NewLdDstTy = IsDst64 ? s64 : s32;
5495 } else if (IsFPR) {
5496 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5497 InsertIntoSubReg = AArch64::bsub;
5498 NewLdDstTy = LLT::scalar(MemSizeBits);
5499 } else {
5500 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5501 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5502 NewLdDstTy = s32;
5503 }
5504 } else if (MemSizeBits == 16) {
5505 if (IsSExt) {
5506 if (IsDst64)
5507 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5508 else
5509 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5510 NewLdDstTy = IsDst64 ? s64 : s32;
5511 } else if (IsFPR) {
5512 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5513 InsertIntoSubReg = AArch64::hsub;
5514 NewLdDstTy = LLT::scalar(MemSizeBits);
5515 } else {
5516 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5517 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5518 NewLdDstTy = s32;
5519 }
5520 } else if (MemSizeBits == 32) {
5521 if (IsSExt) {
5522 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5523 NewLdDstTy = s64;
5524 } else if (IsFPR) {
5525 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5526 InsertIntoSubReg = AArch64::ssub;
5527 NewLdDstTy = LLT::scalar(MemSizeBits);
5528 } else {
5529 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5530 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5531 NewLdDstTy = s32;
5532 }
5533 } else {
5534 llvm_unreachable("Unexpected size for indexed load");
5535 }
5536
5537 auto Cst = getIConstantVRegVal(Offset, MRI);
5538 if (!Cst)
5539 return false; // Shouldn't happen, but just in case.
5540
5541 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5542 .addImm(Cst->getSExtValue());
5543 LdMI.cloneMemRefs(ExtLd);
5545 // Make sure to select the load with the MemTy as the dest type, and then
5546 // insert into a larger reg if needed.
5547 if (InsertIntoSubReg) {
5548 // Generate a SUBREG_TO_REG.
5549 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5550 .addImm(0)
5551 .addUse(LdMI.getReg(1))
5552 .addImm(InsertIntoSubReg);
5554 SubToReg.getReg(0),
5555 *getRegClassForTypeOnBank(MRI.getType(Dst),
5556 *RBI.getRegBank(Dst, MRI, TRI)),
5557 MRI);
5558 } else {
5559 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5560 selectCopy(*Copy, TII, MRI, TRI, RBI);
5561 }
5562 MI.eraseFromParent();
5563
5564 return true;
5565}
5566
5567bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5568 MachineRegisterInfo &MRI) {
5569 auto &Ld = cast<GIndexedLoad>(MI);
5570 Register Dst = Ld.getDstReg();
5571 Register WriteBack = Ld.getWritebackReg();
5572 Register Base = Ld.getBaseReg();
5573 Register Offset = Ld.getOffsetReg();
5574 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5575 "Unexpected type for indexed load");
5576 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5577
5578 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5579 return selectIndexedExtLoad(MI, MRI);
5580
5581 unsigned Opc = 0;
5582 if (Ld.isPre()) {
5583 static constexpr unsigned GPROpcodes[] = {
5584 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5585 AArch64::LDRXpre};
5586 static constexpr unsigned FPROpcodes[] = {
5587 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5588 AArch64::LDRQpre};
5589 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5590 Opc = FPROpcodes[Log2_32(MemSize)];
5591 else
5592 Opc = GPROpcodes[Log2_32(MemSize)];
5593 } else {
5594 static constexpr unsigned GPROpcodes[] = {
5595 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5596 AArch64::LDRXpost};
5597 static constexpr unsigned FPROpcodes[] = {
5598 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5599 AArch64::LDRDpost, AArch64::LDRQpost};
5600 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5601 Opc = FPROpcodes[Log2_32(MemSize)];
5602 else
5603 Opc = GPROpcodes[Log2_32(MemSize)];
5604 }
5605 auto Cst = getIConstantVRegVal(Offset, MRI);
5606 if (!Cst)
5607 return false; // Shouldn't happen, but just in case.
5608 auto LdMI =
5609 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5610 LdMI.cloneMemRefs(Ld);
5612 MI.eraseFromParent();
5613 return true;
5614}
5615
5616bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5617 MachineRegisterInfo &MRI) {
5618 Register Dst = I.getWritebackReg();
5619 Register Val = I.getValueReg();
5620 Register Base = I.getBaseReg();
5621 Register Offset = I.getOffsetReg();
5622 LLT ValTy = MRI.getType(Val);
5623 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5624
5625 unsigned Opc = 0;
5626 if (I.isPre()) {
5627 static constexpr unsigned GPROpcodes[] = {
5628 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5629 AArch64::STRXpre};
5630 static constexpr unsigned FPROpcodes[] = {
5631 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5632 AArch64::STRQpre};
5633
5634 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5635 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5636 else
5637 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5638 } else {
5639 static constexpr unsigned GPROpcodes[] = {
5640 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5641 AArch64::STRXpost};
5642 static constexpr unsigned FPROpcodes[] = {
5643 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5644 AArch64::STRDpost, AArch64::STRQpost};
5645
5646 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5647 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5648 else
5649 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5650 }
5651
5652 auto Cst = getIConstantVRegVal(Offset, MRI);
5653 if (!Cst)
5654 return false; // Shouldn't happen, but just in case.
5655 auto Str =
5656 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5657 Str.cloneMemRefs(I);
5659 I.eraseFromParent();
5660 return true;
5661}
5662
5663MachineInstr *
5664AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5665 MachineIRBuilder &MIRBuilder,
5666 MachineRegisterInfo &MRI) {
5667 LLT DstTy = MRI.getType(Dst);
5668 unsigned DstSize = DstTy.getSizeInBits();
5669 if (CV->isNullValue()) {
5670 if (DstSize == 128) {
5671 auto Mov =
5672 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5674 return &*Mov;
5675 }
5676
5677 if (DstSize == 64) {
5678 auto Mov =
5679 MIRBuilder
5680 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5681 .addImm(0);
5682 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5683 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5684 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5685 return &*Copy;
5686 }
5687 }
5688
5689 if (Constant *SplatValue = CV->getSplatValue()) {
5690 APInt SplatValueAsInt =
5691 isa<ConstantFP>(SplatValue)
5692 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5693 : SplatValue->getUniqueInteger();
5694 APInt DefBits = APInt::getSplat(
5695 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5696 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5697 MachineInstr *NewOp;
5698 bool Inv = false;
5699 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5700 (NewOp =
5701 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5702 (NewOp =
5703 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5704 (NewOp =
5705 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5706 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5707 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5708 return NewOp;
5709
5710 DefBits = ~DefBits;
5711 Inv = true;
5712 if ((NewOp =
5713 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5717 return NewOp;
5718 return nullptr;
5719 };
5720
5721 if (auto *NewOp = TryMOVIWithBits(DefBits))
5722 return NewOp;
5723
5724 // See if a fneg of the constant can be materialized with a MOVI, etc
5725 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5726 unsigned NegOpc) -> MachineInstr * {
5727 // FNegate each sub-element of the constant
5728 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5729 APInt NegBits(DstSize, 0);
5730 unsigned NumElts = DstSize / NumBits;
5731 for (unsigned i = 0; i < NumElts; i++)
5732 NegBits |= Neg << (NumBits * i);
5733 NegBits = DefBits ^ NegBits;
5734
5735 // Try to create the new constants with MOVI, and if so generate a fneg
5736 // for it.
5737 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5738 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5739 NewOp->getOperand(0).setReg(NewDst);
5740 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5741 }
5742 return nullptr;
5743 };
5744 MachineInstr *R;
5745 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5746 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5747 (STI.hasFullFP16() &&
5748 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5749 return R;
5750 }
5751
5752 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5753 if (!CPLoad) {
5754 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5755 return nullptr;
5756 }
5757
5758 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5760 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5761 return &*Copy;
5762}
5763
5764bool AArch64InstructionSelector::tryOptConstantBuildVec(
5765 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5766 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5767 unsigned DstSize = DstTy.getSizeInBits();
5768 assert(DstSize <= 128 && "Unexpected build_vec type!");
5769 if (DstSize < 32)
5770 return false;
5771 // Check if we're building a constant vector, in which case we want to
5772 // generate a constant pool load instead of a vector insert sequence.
5774 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5775 // Try to find G_CONSTANT or G_FCONSTANT
5776 auto *OpMI =
5777 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5778 if (OpMI)
5779 Csts.emplace_back(
5780 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5781 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5782 I.getOperand(Idx).getReg(), MRI)))
5783 Csts.emplace_back(
5784 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5785 else
5786 return false;
5787 }
5788 Constant *CV = ConstantVector::get(Csts);
5789 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5790 return false;
5791 I.eraseFromParent();
5792 return true;
5793}
5794
5795bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5796 MachineInstr &I, MachineRegisterInfo &MRI) {
5797 // Given:
5798 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5799 //
5800 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5801 Register Dst = I.getOperand(0).getReg();
5802 Register EltReg = I.getOperand(1).getReg();
5803 LLT EltTy = MRI.getType(EltReg);
5804 // If the index isn't on the same bank as its elements, then this can't be a
5805 // SUBREG_TO_REG.
5806 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5807 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5808 if (EltRB != DstRB)
5809 return false;
5810 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5811 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5812 }))
5813 return false;
5814 unsigned SubReg;
5815 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5816 if (!EltRC)
5817 return false;
5818 const TargetRegisterClass *DstRC =
5819 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5820 if (!DstRC)
5821 return false;
5822 if (!getSubRegForClass(EltRC, TRI, SubReg))
5823 return false;
5824 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5825 .addImm(0)
5826 .addUse(EltReg)
5827 .addImm(SubReg);
5828 I.eraseFromParent();
5829 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5830 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5831}
5832
5833bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5834 MachineRegisterInfo &MRI) {
5835 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5836 // Until we port more of the optimized selections, for now just use a vector
5837 // insert sequence.
5838 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5839 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5840 unsigned EltSize = EltTy.getSizeInBits();
5841
5842 if (tryOptConstantBuildVec(I, DstTy, MRI))
5843 return true;
5844 if (tryOptBuildVecToSubregToReg(I, MRI))
5845 return true;
5846
5847 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5848 return false; // Don't support all element types yet.
5849 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5850
5851 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5852 MachineInstr *ScalarToVec =
5853 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5854 I.getOperand(1).getReg(), MIB);
5855 if (!ScalarToVec)
5856 return false;
5857
5858 Register DstVec = ScalarToVec->getOperand(0).getReg();
5859 unsigned DstSize = DstTy.getSizeInBits();
5860
5861 // Keep track of the last MI we inserted. Later on, we might be able to save
5862 // a copy using it.
5863 MachineInstr *PrevMI = ScalarToVec;
5864 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5865 // Note that if we don't do a subregister copy, we can end up making an
5866 // extra register.
5867 Register OpReg = I.getOperand(i).getReg();
5868 // Do not emit inserts for undefs
5869 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5870 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5871 DstVec = PrevMI->getOperand(0).getReg();
5872 }
5873 }
5874
5875 // If DstTy's size in bits is less than 128, then emit a subregister copy
5876 // from DstVec to the last register we've defined.
5877 if (DstSize < 128) {
5878 // Force this to be FPR using the destination vector.
5879 const TargetRegisterClass *RC =
5880 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5881 if (!RC)
5882 return false;
5883 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5884 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5885 return false;
5886 }
5887
5888 unsigned SubReg = 0;
5889 if (!getSubRegForClass(RC, TRI, SubReg))
5890 return false;
5891 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5892 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5893 << "\n");
5894 return false;
5895 }
5896
5897 Register Reg = MRI.createVirtualRegister(RC);
5898 Register DstReg = I.getOperand(0).getReg();
5899
5900 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5901 MachineOperand &RegOp = I.getOperand(1);
5902 RegOp.setReg(Reg);
5903 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5904 } else {
5905 // We either have a vector with all elements (except the first one) undef or
5906 // at least one non-undef non-first element. In the first case, we need to
5907 // constrain the output register ourselves as we may have generated an
5908 // INSERT_SUBREG operation which is a generic operation for which the
5909 // output regclass cannot be automatically chosen.
5910 //
5911 // In the second case, there is no need to do this as it may generate an
5912 // instruction like INSvi32gpr where the regclass can be automatically
5913 // chosen.
5914 //
5915 // Also, we save a copy by re-using the destination register on the final
5916 // insert.
5917 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5919
5920 Register DstReg = PrevMI->getOperand(0).getReg();
5921 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5922 const TargetRegisterClass *RC =
5923 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5924 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5925 }
5926 }
5927
5929 return true;
5930}
5931
5932bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5933 unsigned NumVecs,
5934 MachineInstr &I) {
5935 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5936 assert(Opc && "Expected an opcode?");
5937 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5938 auto &MRI = *MIB.getMRI();
5939 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5940 unsigned Size = Ty.getSizeInBits();
5941 assert((Size == 64 || Size == 128) &&
5942 "Destination must be 64 bits or 128 bits?");
5943 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5944 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5945 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5946 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5947 Load.cloneMemRefs(I);
5949 Register SelectedLoadDst = Load->getOperand(0).getReg();
5950 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5951 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5952 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5953 // Emit the subreg copies and immediately select them.
5954 // FIXME: We should refactor our copy code into an emitCopy helper and
5955 // clean up uses of this pattern elsewhere in the selector.
5956 selectCopy(*Vec, TII, MRI, TRI, RBI);
5957 }
5958 return true;
5959}
5960
5961bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5962 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5963 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5964 assert(Opc && "Expected an opcode?");
5965 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5966 auto &MRI = *MIB.getMRI();
5967 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5968 bool Narrow = Ty.getSizeInBits() == 64;
5969
5970 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5971 SmallVector<Register, 4> Regs(NumVecs);
5972 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5973 [](auto MO) { return MO.getReg(); });
5974
5975 if (Narrow) {
5976 transform(Regs, Regs.begin(), [this](Register Reg) {
5977 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5978 ->getOperand(0)
5979 .getReg();
5980 });
5981 Ty = Ty.multiplyElements(2);
5982 }
5983
5984 Register Tuple = createQTuple(Regs, MIB);
5985 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5986 if (!LaneNo)
5987 return false;
5988
5989 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5990 auto Load = MIB.buildInstr(Opc, {Ty}, {})
5991 .addReg(Tuple)
5992 .addImm(LaneNo->getZExtValue())
5993 .addReg(Ptr);
5994 Load.cloneMemRefs(I);
5996 Register SelectedLoadDst = Load->getOperand(0).getReg();
5997 unsigned SubReg = AArch64::qsub0;
5998 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5999 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6000 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6001 : DstOp(I.getOperand(Idx).getReg())},
6002 {})
6003 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6004 Register WideReg = Vec.getReg(0);
6005 // Emit the subreg copies and immediately select them.
6006 selectCopy(*Vec, TII, MRI, TRI, RBI);
6007 if (Narrow &&
6008 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6009 return false;
6010 }
6011 return true;
6012}
6013
6014void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6015 unsigned NumVecs,
6016 unsigned Opc) {
6017 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6018 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6019 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6020
6021 SmallVector<Register, 2> Regs(NumVecs);
6022 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6023 Regs.begin(), [](auto MO) { return MO.getReg(); });
6024
6025 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6026 : createDTuple(Regs, MIB);
6027 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6028 Store.cloneMemRefs(I);
6030}
6031
6032bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6033 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6034 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6035 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6036 bool Narrow = Ty.getSizeInBits() == 64;
6037
6038 SmallVector<Register, 2> Regs(NumVecs);
6039 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6040 Regs.begin(), [](auto MO) { return MO.getReg(); });
6041
6042 if (Narrow)
6043 transform(Regs, Regs.begin(), [this](Register Reg) {
6044 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6045 ->getOperand(0)
6046 .getReg();
6047 });
6048
6049 Register Tuple = createQTuple(Regs, MIB);
6050
6051 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6052 if (!LaneNo)
6053 return false;
6054 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6055 auto Store = MIB.buildInstr(Opc, {}, {})
6056 .addReg(Tuple)
6057 .addImm(LaneNo->getZExtValue())
6058 .addReg(Ptr);
6059 Store.cloneMemRefs(I);
6061 return true;
6062}
6063
6064bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6065 MachineInstr &I, MachineRegisterInfo &MRI) {
6066 // Find the intrinsic ID.
6067 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6068
6069 const LLT S8 = LLT::scalar(8);
6070 const LLT S16 = LLT::scalar(16);
6071 const LLT S32 = LLT::scalar(32);
6072 const LLT S64 = LLT::scalar(64);
6073 const LLT P0 = LLT::pointer(0, 64);
6074 // Select the instruction.
6075 switch (IntrinID) {
6076 default:
6077 return false;
6078 case Intrinsic::aarch64_ldxp:
6079 case Intrinsic::aarch64_ldaxp: {
6080 auto NewI = MIB.buildInstr(
6081 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6082 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6083 {I.getOperand(3)});
6084 NewI.cloneMemRefs(I);
6086 break;
6087 }
6088 case Intrinsic::aarch64_neon_ld1x2: {
6089 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6090 unsigned Opc = 0;
6091 if (Ty == LLT::fixed_vector(8, S8))
6092 Opc = AArch64::LD1Twov8b;
6093 else if (Ty == LLT::fixed_vector(16, S8))
6094 Opc = AArch64::LD1Twov16b;
6095 else if (Ty == LLT::fixed_vector(4, S16))
6096 Opc = AArch64::LD1Twov4h;
6097 else if (Ty == LLT::fixed_vector(8, S16))
6098 Opc = AArch64::LD1Twov8h;
6099 else if (Ty == LLT::fixed_vector(2, S32))
6100 Opc = AArch64::LD1Twov2s;
6101 else if (Ty == LLT::fixed_vector(4, S32))
6102 Opc = AArch64::LD1Twov4s;
6103 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6104 Opc = AArch64::LD1Twov2d;
6105 else if (Ty == S64 || Ty == P0)
6106 Opc = AArch64::LD1Twov1d;
6107 else
6108 llvm_unreachable("Unexpected type for ld1x2!");
6109 selectVectorLoadIntrinsic(Opc, 2, I);
6110 break;
6111 }
6112 case Intrinsic::aarch64_neon_ld1x3: {
6113 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6114 unsigned Opc = 0;
6115 if (Ty == LLT::fixed_vector(8, S8))
6116 Opc = AArch64::LD1Threev8b;
6117 else if (Ty == LLT::fixed_vector(16, S8))
6118 Opc = AArch64::LD1Threev16b;
6119 else if (Ty == LLT::fixed_vector(4, S16))
6120 Opc = AArch64::LD1Threev4h;
6121 else if (Ty == LLT::fixed_vector(8, S16))
6122 Opc = AArch64::LD1Threev8h;
6123 else if (Ty == LLT::fixed_vector(2, S32))
6124 Opc = AArch64::LD1Threev2s;
6125 else if (Ty == LLT::fixed_vector(4, S32))
6126 Opc = AArch64::LD1Threev4s;
6127 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6128 Opc = AArch64::LD1Threev2d;
6129 else if (Ty == S64 || Ty == P0)
6130 Opc = AArch64::LD1Threev1d;
6131 else
6132 llvm_unreachable("Unexpected type for ld1x3!");
6133 selectVectorLoadIntrinsic(Opc, 3, I);
6134 break;
6135 }
6136 case Intrinsic::aarch64_neon_ld1x4: {
6137 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6138 unsigned Opc = 0;
6139 if (Ty == LLT::fixed_vector(8, S8))
6140 Opc = AArch64::LD1Fourv8b;
6141 else if (Ty == LLT::fixed_vector(16, S8))
6142 Opc = AArch64::LD1Fourv16b;
6143 else if (Ty == LLT::fixed_vector(4, S16))
6144 Opc = AArch64::LD1Fourv4h;
6145 else if (Ty == LLT::fixed_vector(8, S16))
6146 Opc = AArch64::LD1Fourv8h;
6147 else if (Ty == LLT::fixed_vector(2, S32))
6148 Opc = AArch64::LD1Fourv2s;
6149 else if (Ty == LLT::fixed_vector(4, S32))
6150 Opc = AArch64::LD1Fourv4s;
6151 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6152 Opc = AArch64::LD1Fourv2d;
6153 else if (Ty == S64 || Ty == P0)
6154 Opc = AArch64::LD1Fourv1d;
6155 else
6156 llvm_unreachable("Unexpected type for ld1x4!");
6157 selectVectorLoadIntrinsic(Opc, 4, I);
6158 break;
6159 }
6160 case Intrinsic::aarch64_neon_ld2: {
6161 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6162 unsigned Opc = 0;
6163 if (Ty == LLT::fixed_vector(8, S8))
6164 Opc = AArch64::LD2Twov8b;
6165 else if (Ty == LLT::fixed_vector(16, S8))
6166 Opc = AArch64::LD2Twov16b;
6167 else if (Ty == LLT::fixed_vector(4, S16))
6168 Opc = AArch64::LD2Twov4h;
6169 else if (Ty == LLT::fixed_vector(8, S16))
6170 Opc = AArch64::LD2Twov8h;
6171 else if (Ty == LLT::fixed_vector(2, S32))
6172 Opc = AArch64::LD2Twov2s;
6173 else if (Ty == LLT::fixed_vector(4, S32))
6174 Opc = AArch64::LD2Twov4s;
6175 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6176 Opc = AArch64::LD2Twov2d;
6177 else if (Ty == S64 || Ty == P0)
6178 Opc = AArch64::LD1Twov1d;
6179 else
6180 llvm_unreachable("Unexpected type for ld2!");
6181 selectVectorLoadIntrinsic(Opc, 2, I);
6182 break;
6183 }
6184 case Intrinsic::aarch64_neon_ld2lane: {
6185 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6186 unsigned Opc;
6187 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6188 Opc = AArch64::LD2i8;
6189 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6190 Opc = AArch64::LD2i16;
6191 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6192 Opc = AArch64::LD2i32;
6193 else if (Ty == LLT::fixed_vector(2, S64) ||
6194 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6195 Opc = AArch64::LD2i64;
6196 else
6197 llvm_unreachable("Unexpected type for st2lane!");
6198 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6199 return false;
6200 break;
6201 }
6202 case Intrinsic::aarch64_neon_ld2r: {
6203 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6204 unsigned Opc = 0;
6205 if (Ty == LLT::fixed_vector(8, S8))
6206 Opc = AArch64::LD2Rv8b;
6207 else if (Ty == LLT::fixed_vector(16, S8))
6208 Opc = AArch64::LD2Rv16b;
6209 else if (Ty == LLT::fixed_vector(4, S16))
6210 Opc = AArch64::LD2Rv4h;
6211 else if (Ty == LLT::fixed_vector(8, S16))
6212 Opc = AArch64::LD2Rv8h;
6213 else if (Ty == LLT::fixed_vector(2, S32))
6214 Opc = AArch64::LD2Rv2s;
6215 else if (Ty == LLT::fixed_vector(4, S32))
6216 Opc = AArch64::LD2Rv4s;
6217 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6218 Opc = AArch64::LD2Rv2d;
6219 else if (Ty == S64 || Ty == P0)
6220 Opc = AArch64::LD2Rv1d;
6221 else
6222 llvm_unreachable("Unexpected type for ld2r!");
6223 selectVectorLoadIntrinsic(Opc, 2, I);
6224 break;
6225 }
6226 case Intrinsic::aarch64_neon_ld3: {
6227 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6228 unsigned Opc = 0;
6229 if (Ty == LLT::fixed_vector(8, S8))
6230 Opc = AArch64::LD3Threev8b;
6231 else if (Ty == LLT::fixed_vector(16, S8))
6232 Opc = AArch64::LD3Threev16b;
6233 else if (Ty == LLT::fixed_vector(4, S16))
6234 Opc = AArch64::LD3Threev4h;
6235 else if (Ty == LLT::fixed_vector(8, S16))
6236 Opc = AArch64::LD3Threev8h;
6237 else if (Ty == LLT::fixed_vector(2, S32))
6238 Opc = AArch64::LD3Threev2s;
6239 else if (Ty == LLT::fixed_vector(4, S32))
6240 Opc = AArch64::LD3Threev4s;
6241 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6242 Opc = AArch64::LD3Threev2d;
6243 else if (Ty == S64 || Ty == P0)
6244 Opc = AArch64::LD1Threev1d;
6245 else
6246 llvm_unreachable("Unexpected type for ld3!");
6247 selectVectorLoadIntrinsic(Opc, 3, I);
6248 break;
6249 }
6250 case Intrinsic::aarch64_neon_ld3lane: {
6251 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6252 unsigned Opc;
6253 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6254 Opc = AArch64::LD3i8;
6255 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6256 Opc = AArch64::LD3i16;
6257 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6258 Opc = AArch64::LD3i32;
6259 else if (Ty == LLT::fixed_vector(2, S64) ||
6260 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6261 Opc = AArch64::LD3i64;
6262 else
6263 llvm_unreachable("Unexpected type for st3lane!");
6264 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6265 return false;
6266 break;
6267 }
6268 case Intrinsic::aarch64_neon_ld3r: {
6269 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6270 unsigned Opc = 0;
6271 if (Ty == LLT::fixed_vector(8, S8))
6272 Opc = AArch64::LD3Rv8b;
6273 else if (Ty == LLT::fixed_vector(16, S8))
6274 Opc = AArch64::LD3Rv16b;
6275 else if (Ty == LLT::fixed_vector(4, S16))
6276 Opc = AArch64::LD3Rv4h;
6277 else if (Ty == LLT::fixed_vector(8, S16))
6278 Opc = AArch64::LD3Rv8h;
6279 else if (Ty == LLT::fixed_vector(2, S32))
6280 Opc = AArch64::LD3Rv2s;
6281 else if (Ty == LLT::fixed_vector(4, S32))
6282 Opc = AArch64::LD3Rv4s;
6283 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6284 Opc = AArch64::LD3Rv2d;
6285 else if (Ty == S64 || Ty == P0)
6286 Opc = AArch64::LD3Rv1d;
6287 else
6288 llvm_unreachable("Unexpected type for ld3r!");
6289 selectVectorLoadIntrinsic(Opc, 3, I);
6290 break;
6291 }
6292 case Intrinsic::aarch64_neon_ld4: {
6293 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6294 unsigned Opc = 0;
6295 if (Ty == LLT::fixed_vector(8, S8))
6296 Opc = AArch64::LD4Fourv8b;
6297 else if (Ty == LLT::fixed_vector(16, S8))
6298 Opc = AArch64::LD4Fourv16b;
6299 else if (Ty == LLT::fixed_vector(4, S16))
6300 Opc = AArch64::LD4Fourv4h;
6301 else if (Ty == LLT::fixed_vector(8, S16))
6302 Opc = AArch64::LD4Fourv8h;
6303 else if (Ty == LLT::fixed_vector(2, S32))
6304 Opc = AArch64::LD4Fourv2s;
6305 else if (Ty == LLT::fixed_vector(4, S32))
6306 Opc = AArch64::LD4Fourv4s;
6307 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6308 Opc = AArch64::LD4Fourv2d;
6309 else if (Ty == S64 || Ty == P0)
6310 Opc = AArch64::LD1Fourv1d;
6311 else
6312 llvm_unreachable("Unexpected type for ld4!");
6313 selectVectorLoadIntrinsic(Opc, 4, I);
6314 break;
6315 }
6316 case Intrinsic::aarch64_neon_ld4lane: {
6317 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6318 unsigned Opc;
6319 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6320 Opc = AArch64::LD4i8;
6321 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6322 Opc = AArch64::LD4i16;
6323 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6324 Opc = AArch64::LD4i32;
6325 else if (Ty == LLT::fixed_vector(2, S64) ||
6326 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6327 Opc = AArch64::LD4i64;
6328 else
6329 llvm_unreachable("Unexpected type for st4lane!");
6330 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6331 return false;
6332 break;
6333 }
6334 case Intrinsic::aarch64_neon_ld4r: {
6335 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6336 unsigned Opc = 0;
6337 if (Ty == LLT::fixed_vector(8, S8))
6338 Opc = AArch64::LD4Rv8b;
6339 else if (Ty == LLT::fixed_vector(16, S8))
6340 Opc = AArch64::LD4Rv16b;
6341 else if (Ty == LLT::fixed_vector(4, S16))
6342 Opc = AArch64::LD4Rv4h;
6343 else if (Ty == LLT::fixed_vector(8, S16))
6344 Opc = AArch64::LD4Rv8h;
6345 else if (Ty == LLT::fixed_vector(2, S32))
6346 Opc = AArch64::LD4Rv2s;
6347 else if (Ty == LLT::fixed_vector(4, S32))
6348 Opc = AArch64::LD4Rv4s;
6349 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6350 Opc = AArch64::LD4Rv2d;
6351 else if (Ty == S64 || Ty == P0)
6352 Opc = AArch64::LD4Rv1d;
6353 else
6354 llvm_unreachable("Unexpected type for ld4r!");
6355 selectVectorLoadIntrinsic(Opc, 4, I);
6356 break;
6357 }
6358 case Intrinsic::aarch64_neon_st1x2: {
6359 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6360 unsigned Opc;
6361 if (Ty == LLT::fixed_vector(8, S8))
6362 Opc = AArch64::ST1Twov8b;
6363 else if (Ty == LLT::fixed_vector(16, S8))
6364 Opc = AArch64::ST1Twov16b;
6365 else if (Ty == LLT::fixed_vector(4, S16))
6366 Opc = AArch64::ST1Twov4h;
6367 else if (Ty == LLT::fixed_vector(8, S16))
6368 Opc = AArch64::ST1Twov8h;
6369 else if (Ty == LLT::fixed_vector(2, S32))
6370 Opc = AArch64::ST1Twov2s;
6371 else if (Ty == LLT::fixed_vector(4, S32))
6372 Opc = AArch64::ST1Twov4s;
6373 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6374 Opc = AArch64::ST1Twov2d;
6375 else if (Ty == S64 || Ty == P0)
6376 Opc = AArch64::ST1Twov1d;
6377 else
6378 llvm_unreachable("Unexpected type for st1x2!");
6379 selectVectorStoreIntrinsic(I, 2, Opc);
6380 break;
6381 }
6382 case Intrinsic::aarch64_neon_st1x3: {
6383 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6384 unsigned Opc;
6385 if (Ty == LLT::fixed_vector(8, S8))
6386 Opc = AArch64::ST1Threev8b;
6387 else if (Ty == LLT::fixed_vector(16, S8))
6388 Opc = AArch64::ST1Threev16b;
6389 else if (Ty == LLT::fixed_vector(4, S16))
6390 Opc = AArch64::ST1Threev4h;
6391 else if (Ty == LLT::fixed_vector(8, S16))
6392 Opc = AArch64::ST1Threev8h;
6393 else if (Ty == LLT::fixed_vector(2, S32))
6394 Opc = AArch64::ST1Threev2s;
6395 else if (Ty == LLT::fixed_vector(4, S32))
6396 Opc = AArch64::ST1Threev4s;
6397 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6398 Opc = AArch64::ST1Threev2d;
6399 else if (Ty == S64 || Ty == P0)
6400 Opc = AArch64::ST1Threev1d;
6401 else
6402 llvm_unreachable("Unexpected type for st1x3!");
6403 selectVectorStoreIntrinsic(I, 3, Opc);
6404 break;
6405 }
6406 case Intrinsic::aarch64_neon_st1x4: {
6407 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6408 unsigned Opc;
6409 if (Ty == LLT::fixed_vector(8, S8))
6410 Opc = AArch64::ST1Fourv8b;
6411 else if (Ty == LLT::fixed_vector(16, S8))
6412 Opc = AArch64::ST1Fourv16b;
6413 else if (Ty == LLT::fixed_vector(4, S16))
6414 Opc = AArch64::ST1Fourv4h;
6415 else if (Ty == LLT::fixed_vector(8, S16))
6416 Opc = AArch64::ST1Fourv8h;
6417 else if (Ty == LLT::fixed_vector(2, S32))
6418 Opc = AArch64::ST1Fourv2s;
6419 else if (Ty == LLT::fixed_vector(4, S32))
6420 Opc = AArch64::ST1Fourv4s;
6421 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6422 Opc = AArch64::ST1Fourv2d;
6423 else if (Ty == S64 || Ty == P0)
6424 Opc = AArch64::ST1Fourv1d;
6425 else
6426 llvm_unreachable("Unexpected type for st1x4!");
6427 selectVectorStoreIntrinsic(I, 4, Opc);
6428 break;
6429 }
6430 case Intrinsic::aarch64_neon_st2: {
6431 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6432 unsigned Opc;
6433 if (Ty == LLT::fixed_vector(8, S8))
6434 Opc = AArch64::ST2Twov8b;
6435 else if (Ty == LLT::fixed_vector(16, S8))
6436 Opc = AArch64::ST2Twov16b;
6437 else if (Ty == LLT::fixed_vector(4, S16))
6438 Opc = AArch64::ST2Twov4h;
6439 else if (Ty == LLT::fixed_vector(8, S16))
6440 Opc = AArch64::ST2Twov8h;
6441 else if (Ty == LLT::fixed_vector(2, S32))
6442 Opc = AArch64::ST2Twov2s;
6443 else if (Ty == LLT::fixed_vector(4, S32))
6444 Opc = AArch64::ST2Twov4s;
6445 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6446 Opc = AArch64::ST2Twov2d;
6447 else if (Ty == S64 || Ty == P0)
6448 Opc = AArch64::ST1Twov1d;
6449 else
6450 llvm_unreachable("Unexpected type for st2!");
6451 selectVectorStoreIntrinsic(I, 2, Opc);
6452 break;
6453 }
6454 case Intrinsic::aarch64_neon_st3: {
6455 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6456 unsigned Opc;
6457 if (Ty == LLT::fixed_vector(8, S8))
6458 Opc = AArch64::ST3Threev8b;
6459 else if (Ty == LLT::fixed_vector(16, S8))
6460 Opc = AArch64::ST3Threev16b;
6461 else if (Ty == LLT::fixed_vector(4, S16))
6462 Opc = AArch64::ST3Threev4h;
6463 else if (Ty == LLT::fixed_vector(8, S16))
6464 Opc = AArch64::ST3Threev8h;
6465 else if (Ty == LLT::fixed_vector(2, S32))
6466 Opc = AArch64::ST3Threev2s;
6467 else if (Ty == LLT::fixed_vector(4, S32))
6468 Opc = AArch64::ST3Threev4s;
6469 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6470 Opc = AArch64::ST3Threev2d;
6471 else if (Ty == S64 || Ty == P0)
6472 Opc = AArch64::ST1Threev1d;
6473 else
6474 llvm_unreachable("Unexpected type for st3!");
6475 selectVectorStoreIntrinsic(I, 3, Opc);
6476 break;
6477 }
6478 case Intrinsic::aarch64_neon_st4: {
6479 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6480 unsigned Opc;
6481 if (Ty == LLT::fixed_vector(8, S8))
6482 Opc = AArch64::ST4Fourv8b;
6483 else if (Ty == LLT::fixed_vector(16, S8))
6484 Opc = AArch64::ST4Fourv16b;
6485 else if (Ty == LLT::fixed_vector(4, S16))
6486 Opc = AArch64::ST4Fourv4h;
6487 else if (Ty == LLT::fixed_vector(8, S16))
6488 Opc = AArch64::ST4Fourv8h;
6489 else if (Ty == LLT::fixed_vector(2, S32))
6490 Opc = AArch64::ST4Fourv2s;
6491 else if (Ty == LLT::fixed_vector(4, S32))
6492 Opc = AArch64::ST4Fourv4s;
6493 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6494 Opc = AArch64::ST4Fourv2d;
6495 else if (Ty == S64 || Ty == P0)
6496 Opc = AArch64::ST1Fourv1d;
6497 else
6498 llvm_unreachable("Unexpected type for st4!");
6499 selectVectorStoreIntrinsic(I, 4, Opc);
6500 break;
6501 }
6502 case Intrinsic::aarch64_neon_st2lane: {
6503 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6504 unsigned Opc;
6505 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6506 Opc = AArch64::ST2i8;
6507 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6508 Opc = AArch64::ST2i16;
6509 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6510 Opc = AArch64::ST2i32;
6511 else if (Ty == LLT::fixed_vector(2, S64) ||
6512 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6513 Opc = AArch64::ST2i64;
6514 else
6515 llvm_unreachable("Unexpected type for st2lane!");
6516 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6517 return false;
6518 break;
6519 }
6520 case Intrinsic::aarch64_neon_st3lane: {
6521 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6522 unsigned Opc;
6523 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6524 Opc = AArch64::ST3i8;
6525 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6526 Opc = AArch64::ST3i16;
6527 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6528 Opc = AArch64::ST3i32;
6529 else if (Ty == LLT::fixed_vector(2, S64) ||
6530 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6531 Opc = AArch64::ST3i64;
6532 else
6533 llvm_unreachable("Unexpected type for st3lane!");
6534 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6535 return false;
6536 break;
6537 }
6538 case Intrinsic::aarch64_neon_st4lane: {
6539 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6540 unsigned Opc;
6541 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6542 Opc = AArch64::ST4i8;
6543 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6544 Opc = AArch64::ST4i16;
6545 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6546 Opc = AArch64::ST4i32;
6547 else if (Ty == LLT::fixed_vector(2, S64) ||
6548 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6549 Opc = AArch64::ST4i64;
6550 else
6551 llvm_unreachable("Unexpected type for st4lane!");
6552 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6553 return false;
6554 break;
6555 }
6556 case Intrinsic::aarch64_mops_memset_tag: {
6557 // Transform
6558 // %dst:gpr(p0) = \
6559 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6560 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6561 // where %dst is updated, into
6562 // %Rd:GPR64common, %Rn:GPR64) = \
6563 // MOPSMemorySetTaggingPseudo \
6564 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6565 // where Rd and Rn are tied.
6566 // It is expected that %val has been extended to s64 in legalization.
6567 // Note that the order of the size/value operands are swapped.
6568
6569 Register DstDef = I.getOperand(0).getReg();
6570 // I.getOperand(1) is the intrinsic function
6571 Register DstUse = I.getOperand(2).getReg();
6572 Register ValUse = I.getOperand(3).getReg();
6573 Register SizeUse = I.getOperand(4).getReg();
6574
6575 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6576 // Therefore an additional virtual register is required for the updated size
6577 // operand. This value is not accessible via the semantics of the intrinsic.
6578 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6579
6580 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6581 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6582 Memset.cloneMemRefs(I);
6584 break;
6585 }
6586 }
6587
6588 I.eraseFromParent();
6589 return true;
6590}
6591
6592bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6593 MachineRegisterInfo &MRI) {
6594 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6595
6596 switch (IntrinID) {
6597 default:
6598 break;
6599 case Intrinsic::ptrauth_resign: {
6600 Register DstReg = I.getOperand(0).getReg();
6601 Register ValReg = I.getOperand(2).getReg();
6602 uint64_t AUTKey = I.getOperand(3).getImm();
6603 Register AUTDisc = I.getOperand(4).getReg();
6604 uint64_t PACKey = I.getOperand(5).getImm();
6605 Register PACDisc = I.getOperand(6).getReg();
6606
6607 Register AUTAddrDisc = AUTDisc;
6608 uint16_t AUTConstDiscC = 0;
6609 std::tie(AUTConstDiscC, AUTAddrDisc) =
6611
6612 Register PACAddrDisc = PACDisc;
6613 uint16_t PACConstDiscC = 0;
6614 std::tie(PACConstDiscC, PACAddrDisc) =
6616
6617 MIB.buildCopy({AArch64::X16}, {ValReg});
6618 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6619 MIB.buildInstr(AArch64::AUTPAC)
6620 .addImm(AUTKey)
6621 .addImm(AUTConstDiscC)
6622 .addUse(AUTAddrDisc)
6623 .addImm(PACKey)
6624 .addImm(PACConstDiscC)
6625 .addUse(PACAddrDisc)
6626 .constrainAllUses(TII, TRI, RBI);
6627 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6628
6629 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6630 I.eraseFromParent();
6631 return true;
6632 }
6633 case Intrinsic::ptrauth_auth: {
6634 Register DstReg = I.getOperand(0).getReg();
6635 Register ValReg = I.getOperand(2).getReg();
6636 uint64_t AUTKey = I.getOperand(3).getImm();
6637 Register AUTDisc = I.getOperand(4).getReg();
6638
6639 Register AUTAddrDisc = AUTDisc;
6640 uint16_t AUTConstDiscC = 0;
6641 std::tie(AUTConstDiscC, AUTAddrDisc) =
6643
6644 if (STI.isX16X17Safer()) {
6645 MIB.buildCopy({AArch64::X16}, {ValReg});
6646 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6647 MIB.buildInstr(AArch64::AUTx16x17)
6648 .addImm(AUTKey)
6649 .addImm(AUTConstDiscC)
6650 .addUse(AUTAddrDisc)
6651 .constrainAllUses(TII, TRI, RBI);
6652 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6653 } else {
6654 Register ScratchReg =
6655 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6656 MIB.buildInstr(AArch64::AUTxMxN)
6657 .addDef(DstReg)
6658 .addDef(ScratchReg)
6659 .addUse(ValReg)
6660 .addImm(AUTKey)
6661 .addImm(AUTConstDiscC)
6662 .addUse(AUTAddrDisc)
6663 .constrainAllUses(TII, TRI, RBI);
6664 }
6665
6666 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6667 I.eraseFromParent();
6668 return true;
6669 }
6670 case Intrinsic::frameaddress:
6671 case Intrinsic::returnaddress: {
6672 MachineFunction &MF = *I.getParent()->getParent();
6673 MachineFrameInfo &MFI = MF.getFrameInfo();
6674
6675 unsigned Depth = I.getOperand(2).getImm();
6676 Register DstReg = I.getOperand(0).getReg();
6677 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6678
6679 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6680 if (!MFReturnAddr) {
6681 // Insert the copy from LR/X30 into the entry block, before it can be
6682 // clobbered by anything.
6683 MFI.setReturnAddressIsTaken(true);
6684 MFReturnAddr = getFunctionLiveInPhysReg(
6685 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6686 }
6687
6688 if (STI.hasPAuth()) {
6689 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6690 } else {
6691 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6692 MIB.buildInstr(AArch64::XPACLRI);
6693 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6694 }
6695
6696 I.eraseFromParent();
6697 return true;
6698 }
6699
6700 MFI.setFrameAddressIsTaken(true);
6701 Register FrameAddr(AArch64::FP);
6702 while (Depth--) {
6703 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6704 auto Ldr =
6705 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6707 FrameAddr = NextFrame;
6708 }
6709
6710 if (IntrinID == Intrinsic::frameaddress)
6711 MIB.buildCopy({DstReg}, {FrameAddr});
6712 else {
6713 MFI.setReturnAddressIsTaken(true);
6714
6715 if (STI.hasPAuth()) {
6716 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6717 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6718 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6719 } else {
6720 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6721 .addImm(1);
6722 MIB.buildInstr(AArch64::XPACLRI);
6723 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6724 }
6725 }
6726
6727 I.eraseFromParent();
6728 return true;
6729 }
6730 case Intrinsic::aarch64_neon_tbl2:
6731 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6732 return true;
6733 case Intrinsic::aarch64_neon_tbl3:
6734 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6735 false);
6736 return true;
6737 case Intrinsic::aarch64_neon_tbl4:
6738 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6739 return true;
6740 case Intrinsic::aarch64_neon_tbx2:
6741 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6742 return true;
6743 case Intrinsic::aarch64_neon_tbx3:
6744 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6745 return true;
6746 case Intrinsic::aarch64_neon_tbx4:
6747 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6748 return true;
6749 case Intrinsic::swift_async_context_addr:
6750 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6751 {Register(AArch64::FP)})
6752 .addImm(8)
6753 .addImm(0);
6755
6757 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6758 I.eraseFromParent();
6759 return true;
6760 }
6761 return false;
6762}
6763
6764// G_PTRAUTH_GLOBAL_VALUE lowering
6765//
6766// We have 3 lowering alternatives to choose from:
6767// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6768// If the GV doesn't need a GOT load (i.e., is locally defined)
6769// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6770//
6771// - LOADgotPAC: similar to LOADgot, with added PAC.
6772// If the GV needs a GOT load, materialize the pointer using the usual
6773// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6774// section is assumed to be read-only (for example, via relro mechanism). See
6775// LowerMOVaddrPAC.
6776//
6777// - LOADauthptrstatic: similar to LOADgot, but use a
6778// special stub slot instead of a GOT slot.
6779// Load a signed pointer for symbol 'sym' from a stub slot named
6780// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6781// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6782// .data with an
6783// @AUTH relocation. See LowerLOADauthptrstatic.
6784//
6785// All 3 are pseudos that are expand late to longer sequences: this lets us
6786// provide integrity guarantees on the to-be-signed intermediate values.
6787//
6788// LOADauthptrstatic is undesirable because it requires a large section filled
6789// with often similarly-signed pointers, making it a good harvesting target.
6790// Thus, it's only used for ptrauth references to extern_weak to avoid null
6791// checks.
6792
6793bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6794 MachineInstr &I, MachineRegisterInfo &MRI) const {
6795 Register DefReg = I.getOperand(0).getReg();
6796 Register Addr = I.getOperand(1).getReg();
6797 uint64_t Key = I.getOperand(2).getImm();
6798 Register AddrDisc = I.getOperand(3).getReg();
6799 uint64_t Disc = I.getOperand(4).getImm();
6800 int64_t Offset = 0;
6801
6803 report_fatal_error("key in ptrauth global out of range [0, " +
6804 Twine((int)AArch64PACKey::LAST) + "]");
6805
6806 // Blend only works if the integer discriminator is 16-bit wide.
6807 if (!isUInt<16>(Disc))
6809 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6810
6811 // Choosing between 3 lowering alternatives is target-specific.
6812 if (!STI.isTargetELF() && !STI.isTargetMachO())
6813 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6814
6815 if (!MRI.hasOneDef(Addr))
6816 return false;
6817
6818 // First match any offset we take from the real global.
6819 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6820 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6821 Register OffsetReg = DefMI->getOperand(2).getReg();
6822 if (!MRI.hasOneDef(OffsetReg))
6823 return false;
6824 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6825 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6826 return false;
6827
6828 Addr = DefMI->getOperand(1).getReg();
6829 if (!MRI.hasOneDef(Addr))
6830 return false;
6831
6832 DefMI = &*MRI.def_instr_begin(Addr);
6833 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6834 }
6835
6836 // We should be left with a genuine unauthenticated GlobalValue.
6837 const GlobalValue *GV;
6838 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6839 GV = DefMI->getOperand(1).getGlobal();
6841 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6842 GV = DefMI->getOperand(2).getGlobal();
6844 } else {
6845 return false;
6846 }
6847
6848 MachineIRBuilder MIB(I);
6849
6850 // Classify the reference to determine whether it needs a GOT load.
6851 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6852 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6853 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6854 "unsupported non-GOT op flags on ptrauth global reference");
6855 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6856 "unsupported non-GOT reference to weak ptrauth global");
6857
6858 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6859 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6860
6861 // Non-extern_weak:
6862 // - No GOT load needed -> MOVaddrPAC
6863 // - GOT load for non-extern_weak -> LOADgotPAC
6864 // Note that we disallow extern_weak refs to avoid null checks later.
6865 if (!GV->hasExternalWeakLinkage()) {
6866 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6867 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6868 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6870 .addImm(Key)
6871 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6872 .addImm(Disc)
6873 .constrainAllUses(TII, TRI, RBI);
6874 MIB.buildCopy(DefReg, Register(AArch64::X16));
6875 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6876 I.eraseFromParent();
6877 return true;
6878 }
6879
6880 // extern_weak -> LOADauthptrstatic
6881
6882 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6883 // offset alone as a pointer if the symbol wasn't available, which would
6884 // probably break null checks in users. Ptrauth complicates things further:
6885 // error out.
6886 if (Offset != 0)
6888 "unsupported non-zero offset in weak ptrauth global reference");
6889
6890 if (HasAddrDisc)
6891 report_fatal_error("unsupported weak addr-div ptrauth global");
6892
6893 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6894 .addGlobalAddress(GV, Offset)
6895 .addImm(Key)
6896 .addImm(Disc);
6897 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6898
6899 I.eraseFromParent();
6900 return true;
6901}
6902
6903void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6904 MachineRegisterInfo &MRI,
6905 unsigned NumVec, unsigned Opc1,
6906 unsigned Opc2, bool isExt) {
6907 Register DstReg = I.getOperand(0).getReg();
6908 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6909
6910 // Create the REG_SEQUENCE
6912 for (unsigned i = 0; i < NumVec; i++)
6913 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6914 Register RegSeq = createQTuple(Regs, MIB);
6915
6916 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6917 MachineInstrBuilder Instr;
6918 if (isExt) {
6919 Register Reg = I.getOperand(2).getReg();
6920 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6921 } else
6922 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6924 I.eraseFromParent();
6925}
6926
6927InstructionSelector::ComplexRendererFns
6928AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6929 auto MaybeImmed = getImmedFromMO(Root);
6930 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6931 return std::nullopt;
6932 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6933 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6934}
6935
6936InstructionSelector::ComplexRendererFns
6937AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6938 auto MaybeImmed = getImmedFromMO(Root);
6939 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6940 return std::nullopt;
6941 uint64_t Enc = 31 - *MaybeImmed;
6942 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6943}
6944
6945InstructionSelector::ComplexRendererFns
6946AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6947 auto MaybeImmed = getImmedFromMO(Root);
6948 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6949 return std::nullopt;
6950 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6951 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6952}
6953
6954InstructionSelector::ComplexRendererFns
6955AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6956 auto MaybeImmed = getImmedFromMO(Root);
6957 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6958 return std::nullopt;
6959 uint64_t Enc = 63 - *MaybeImmed;
6960 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6961}
6962
6963/// Helper to select an immediate value that can be represented as a 12-bit
6964/// value shifted left by either 0 or 12. If it is possible to do so, return
6965/// the immediate and shift value. If not, return std::nullopt.
6966///
6967/// Used by selectArithImmed and selectNegArithImmed.
6968InstructionSelector::ComplexRendererFns
6969AArch64InstructionSelector::select12BitValueWithLeftShift(
6970 uint64_t Immed) const {
6971 unsigned ShiftAmt;
6972 if (Immed >> 12 == 0) {
6973 ShiftAmt = 0;
6974 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6975 ShiftAmt = 12;
6976 Immed = Immed >> 12;
6977 } else
6978 return std::nullopt;
6979
6980 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6981 return {{
6982 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6983 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6984 }};
6985}
6986
6987/// SelectArithImmed - Select an immediate value that can be represented as
6988/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6989/// Val set to the 12-bit value and Shift set to the shifter operand.
6990InstructionSelector::ComplexRendererFns
6991AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6992 // This function is called from the addsub_shifted_imm ComplexPattern,
6993 // which lists [imm] as the list of opcode it's interested in, however
6994 // we still need to check whether the operand is actually an immediate
6995 // here because the ComplexPattern opcode list is only used in
6996 // root-level opcode matching.
6997 auto MaybeImmed = getImmedFromMO(Root);
6998 if (MaybeImmed == std::nullopt)
6999 return std::nullopt;
7000 return select12BitValueWithLeftShift(*MaybeImmed);
7001}
7002
7003/// SelectNegArithImmed - As above, but negates the value before trying to
7004/// select it.
7005InstructionSelector::ComplexRendererFns
7006AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7007 // We need a register here, because we need to know if we have a 64 or 32
7008 // bit immediate.
7009 if (!Root.isReg())
7010 return std::nullopt;
7011 auto MaybeImmed = getImmedFromMO(Root);
7012 if (MaybeImmed == std::nullopt)
7013 return std::nullopt;
7014 uint64_t Immed = *MaybeImmed;
7015
7016 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7017 // have the opposite effect on the C flag, so this pattern mustn't match under
7018 // those circumstances.
7019 if (Immed == 0)
7020 return std::nullopt;
7021
7022 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7023 // the root.
7024 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7025 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7026 Immed = ~((uint32_t)Immed) + 1;
7027 else
7028 Immed = ~Immed + 1ULL;
7029
7030 if (Immed & 0xFFFFFFFFFF000000ULL)
7031 return std::nullopt;
7032
7033 Immed &= 0xFFFFFFULL;
7034 return select12BitValueWithLeftShift(Immed);
7035}
7036
7037/// Checks if we are sure that folding MI into load/store addressing mode is
7038/// beneficial or not.
7039///
7040/// Returns:
7041/// - true if folding MI would be beneficial.
7042/// - false if folding MI would be bad.
7043/// - std::nullopt if it is not sure whether folding MI is beneficial.
7044///
7045/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7046///
7047/// %13:gpr(s64) = G_CONSTANT i64 1
7048/// %8:gpr(s64) = G_SHL %6, %13(s64)
7049/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7050/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7051std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7052 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7053 if (MI.getOpcode() == AArch64::G_SHL) {
7054 // Address operands with shifts are free, except for running on subtargets
7055 // with AddrLSLSlow14.
7056 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7057 MI.getOperand(2).getReg(), MRI)) {
7058 const APInt ShiftVal = ValAndVeg->Value;
7059
7060 // Don't fold if we know this will be slow.
7061 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7062 }
7063 }
7064 return std::nullopt;
7065}
7066
7067/// Return true if it is worth folding MI into an extended register. That is,
7068/// if it's safe to pull it into the addressing mode of a load or store as a
7069/// shift.
7070/// \p IsAddrOperand whether the def of MI is used as an address operand
7071/// (e.g. feeding into an LDR/STR).
7072bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7073 MachineInstr &MI, const MachineRegisterInfo &MRI,
7074 bool IsAddrOperand) const {
7075
7076 // Always fold if there is one use, or if we're optimizing for size.
7077 Register DefReg = MI.getOperand(0).getReg();
7078 if (MRI.hasOneNonDBGUse(DefReg) ||
7079 MI.getParent()->getParent()->getFunction().hasOptSize())
7080 return true;
7081
7082 if (IsAddrOperand) {
7083 // If we are already sure that folding MI is good or bad, return the result.
7084 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7085 return *Worth;
7086
7087 // Fold G_PTR_ADD if its offset operand can be folded
7088 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7089 MachineInstr *OffsetInst =
7090 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7091
7092 // Note, we already know G_PTR_ADD is used by at least two instructions.
7093 // If we are also sure about whether folding is beneficial or not,
7094 // return the result.
7095 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7096 return *Worth;
7097 }
7098 }
7099
7100 // FIXME: Consider checking HasALULSLFast as appropriate.
7101
7102 // We have a fastpath, so folding a shift in and potentially computing it
7103 // many times may be beneficial. Check if this is only used in memory ops.
7104 // If it is, then we should fold.
7105 return all_of(MRI.use_nodbg_instructions(DefReg),
7106 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7107}
7108
7110 switch (Type) {
7111 case AArch64_AM::SXTB:
7112 case AArch64_AM::SXTH:
7113 case AArch64_AM::SXTW:
7114 return true;
7115 default:
7116 return false;
7117 }
7118}
7119
7120InstructionSelector::ComplexRendererFns
7121AArch64InstructionSelector::selectExtendedSHL(
7122 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7123 unsigned SizeInBytes, bool WantsExt) const {
7124 assert(Base.isReg() && "Expected base to be a register operand");
7125 assert(Offset.isReg() && "Expected offset to be a register operand");
7126
7127 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7128 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7129
7130 unsigned OffsetOpc = OffsetInst->getOpcode();
7131 bool LookedThroughZExt = false;
7132 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7133 // Try to look through a ZEXT.
7134 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7135 return std::nullopt;
7136
7137 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7138 OffsetOpc = OffsetInst->getOpcode();
7139 LookedThroughZExt = true;
7140
7141 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7142 return std::nullopt;
7143 }
7144 // Make sure that the memory op is a valid size.
7145 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7146 if (LegalShiftVal == 0)
7147 return std::nullopt;
7148 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7149 return std::nullopt;
7150
7151 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7152 // register we will offset is the LHS, and the register containing the
7153 // constant is the RHS.
7154 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7155 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7156 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7157 if (!ValAndVReg) {
7158 // We didn't get a constant on the RHS. If the opcode is a shift, then
7159 // we're done.
7160 if (OffsetOpc == TargetOpcode::G_SHL)
7161 return std::nullopt;
7162
7163 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7164 std::swap(OffsetReg, ConstantReg);
7165 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7166 if (!ValAndVReg)
7167 return std::nullopt;
7168 }
7169
7170 // The value must fit into 3 bits, and must be positive. Make sure that is
7171 // true.
7172 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7173
7174 // Since we're going to pull this into a shift, the constant value must be
7175 // a power of 2. If we got a multiply, then we need to check this.
7176 if (OffsetOpc == TargetOpcode::G_MUL) {
7177 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7178 return std::nullopt;
7179
7180 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7181 ImmVal = Log2_32(ImmVal);
7182 }
7183
7184 if ((ImmVal & 0x7) != ImmVal)
7185 return std::nullopt;
7186
7187 // We are only allowed to shift by LegalShiftVal. This shift value is built
7188 // into the instruction, so we can't just use whatever we want.
7189 if (ImmVal != LegalShiftVal)
7190 return std::nullopt;
7191
7192 unsigned SignExtend = 0;
7193 if (WantsExt) {
7194 // Check if the offset is defined by an extend, unless we looked through a
7195 // G_ZEXT earlier.
7196 if (!LookedThroughZExt) {
7197 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7198 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7200 return std::nullopt;
7201
7202 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7203 // We only support SXTW for signed extension here.
7204 if (SignExtend && Ext != AArch64_AM::SXTW)
7205 return std::nullopt;
7206 OffsetReg = ExtInst->getOperand(1).getReg();
7207 }
7208
7209 // Need a 32-bit wide register here.
7210 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7211 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7212 }
7213
7214 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7215 // offset. Signify that we are shifting by setting the shift flag to 1.
7216 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7217 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7218 [=](MachineInstrBuilder &MIB) {
7219 // Need to add both immediates here to make sure that they are both
7220 // added to the instruction.
7221 MIB.addImm(SignExtend);
7222 MIB.addImm(1);
7223 }}};
7224}
7225
7226/// This is used for computing addresses like this:
7227///
7228/// ldr x1, [x2, x3, lsl #3]
7229///
7230/// Where x2 is the base register, and x3 is an offset register. The shift-left
7231/// is a constant value specific to this load instruction. That is, we'll never
7232/// see anything other than a 3 here (which corresponds to the size of the
7233/// element being loaded.)
7234InstructionSelector::ComplexRendererFns
7235AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7236 MachineOperand &Root, unsigned SizeInBytes) const {
7237 if (!Root.isReg())
7238 return std::nullopt;
7239 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7240
7241 // We want to find something like this:
7242 //
7243 // val = G_CONSTANT LegalShiftVal
7244 // shift = G_SHL off_reg val
7245 // ptr = G_PTR_ADD base_reg shift
7246 // x = G_LOAD ptr
7247 //
7248 // And fold it into this addressing mode:
7249 //
7250 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7251
7252 // Check if we can find the G_PTR_ADD.
7253 MachineInstr *PtrAdd =
7254 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7255 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7256 return std::nullopt;
7257
7258 // Now, try to match an opcode which will match our specific offset.
7259 // We want a G_SHL or a G_MUL.
7260 MachineInstr *OffsetInst =
7262 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7263 OffsetInst->getOperand(0), SizeInBytes,
7264 /*WantsExt=*/false);
7265}
7266
7267/// This is used for computing addresses like this:
7268///
7269/// ldr x1, [x2, x3]
7270///
7271/// Where x2 is the base register, and x3 is an offset register.
7272///
7273/// When possible (or profitable) to fold a G_PTR_ADD into the address
7274/// calculation, this will do so. Otherwise, it will return std::nullopt.
7275InstructionSelector::ComplexRendererFns
7276AArch64InstructionSelector::selectAddrModeRegisterOffset(
7277 MachineOperand &Root) const {
7278 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7279
7280 // We need a GEP.
7281 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7282 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7283 return std::nullopt;
7284
7285 // If this is used more than once, let's not bother folding.
7286 // TODO: Check if they are memory ops. If they are, then we can still fold
7287 // without having to recompute anything.
7288 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7289 return std::nullopt;
7290
7291 // Base is the GEP's LHS, offset is its RHS.
7292 return {{[=](MachineInstrBuilder &MIB) {
7293 MIB.addUse(Gep->getOperand(1).getReg());
7294 },
7295 [=](MachineInstrBuilder &MIB) {
7296 MIB.addUse(Gep->getOperand(2).getReg());
7297 },
7298 [=](MachineInstrBuilder &MIB) {
7299 // Need to add both immediates here to make sure that they are both
7300 // added to the instruction.
7301 MIB.addImm(0);
7302 MIB.addImm(0);
7303 }}};
7304}
7305
7306/// This is intended to be equivalent to selectAddrModeXRO in
7307/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7308InstructionSelector::ComplexRendererFns
7309AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7310 unsigned SizeInBytes) const {
7311 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7312 if (!Root.isReg())
7313 return std::nullopt;
7314 MachineInstr *PtrAdd =
7315 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7316 if (!PtrAdd)
7317 return std::nullopt;
7318
7319 // Check for an immediates which cannot be encoded in the [base + imm]
7320 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7321 // end up with code like:
7322 //
7323 // mov x0, wide
7324 // add x1 base, x0
7325 // ldr x2, [x1, x0]
7326 //
7327 // In this situation, we can use the [base, xreg] addressing mode to save an
7328 // add/sub:
7329 //
7330 // mov x0, wide
7331 // ldr x2, [base, x0]
7332 auto ValAndVReg =
7334 if (ValAndVReg) {
7335 unsigned Scale = Log2_32(SizeInBytes);
7336 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7337
7338 // Skip immediates that can be selected in the load/store addressing
7339 // mode.
7340 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7341 ImmOff < (0x1000 << Scale))
7342 return std::nullopt;
7343
7344 // Helper lambda to decide whether or not it is preferable to emit an add.
7345 auto isPreferredADD = [](int64_t ImmOff) {
7346 // Constants in [0x0, 0xfff] can be encoded in an add.
7347 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7348 return true;
7349
7350 // Can it be encoded in an add lsl #12?
7351 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7352 return false;
7353
7354 // It can be encoded in an add lsl #12, but we may not want to. If it is
7355 // possible to select this as a single movz, then prefer that. A single
7356 // movz is faster than an add with a shift.
7357 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7358 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7359 };
7360
7361 // If the immediate can be encoded in a single add/sub, then bail out.
7362 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7363 return std::nullopt;
7364 }
7365
7366 // Try to fold shifts into the addressing mode.
7367 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7368 if (AddrModeFns)
7369 return AddrModeFns;
7370
7371 // If that doesn't work, see if it's possible to fold in registers from
7372 // a GEP.
7373 return selectAddrModeRegisterOffset(Root);
7374}
7375
7376/// This is used for computing addresses like this:
7377///
7378/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7379///
7380/// Where we have a 64-bit base register, a 32-bit offset register, and an
7381/// extend (which may or may not be signed).
7382InstructionSelector::ComplexRendererFns
7383AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7384 unsigned SizeInBytes) const {
7385 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7386
7387 MachineInstr *PtrAdd =
7388 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7389 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7390 return std::nullopt;
7391
7392 MachineOperand &LHS = PtrAdd->getOperand(1);
7393 MachineOperand &RHS = PtrAdd->getOperand(2);
7394 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7395
7396 // The first case is the same as selectAddrModeXRO, except we need an extend.
7397 // In this case, we try to find a shift and extend, and fold them into the
7398 // addressing mode.
7399 //
7400 // E.g.
7401 //
7402 // off_reg = G_Z/S/ANYEXT ext_reg
7403 // val = G_CONSTANT LegalShiftVal
7404 // shift = G_SHL off_reg val
7405 // ptr = G_PTR_ADD base_reg shift
7406 // x = G_LOAD ptr
7407 //
7408 // In this case we can get a load like this:
7409 //
7410 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7411 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7412 SizeInBytes, /*WantsExt=*/true);
7413 if (ExtendedShl)
7414 return ExtendedShl;
7415
7416 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7417 //
7418 // e.g.
7419 // ldr something, [base_reg, ext_reg, sxtw]
7420 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7421 return std::nullopt;
7422
7423 // Check if this is an extend. We'll get an extend type if it is.
7425 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7427 return std::nullopt;
7428
7429 // Need a 32-bit wide register.
7430 MachineIRBuilder MIB(*PtrAdd);
7431 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7432 AArch64::GPR32RegClass, MIB);
7433 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7434
7435 // Base is LHS, offset is ExtReg.
7436 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7437 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7438 [=](MachineInstrBuilder &MIB) {
7439 MIB.addImm(SignExtend);
7440 MIB.addImm(0);
7441 }}};
7442}
7443
7444/// Select a "register plus unscaled signed 9-bit immediate" address. This
7445/// should only match when there is an offset that is not valid for a scaled
7446/// immediate addressing mode. The "Size" argument is the size in bytes of the
7447/// memory reference, which is needed here to know what is valid for a scaled
7448/// immediate.
7449InstructionSelector::ComplexRendererFns
7450AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7451 unsigned Size) const {
7452 MachineRegisterInfo &MRI =
7453 Root.getParent()->getParent()->getParent()->getRegInfo();
7454
7455 if (!Root.isReg())
7456 return std::nullopt;
7457
7458 if (!isBaseWithConstantOffset(Root, MRI))
7459 return std::nullopt;
7460
7461 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7462
7463 MachineOperand &OffImm = RootDef->getOperand(2);
7464 if (!OffImm.isReg())
7465 return std::nullopt;
7466 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7467 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7468 return std::nullopt;
7469 int64_t RHSC;
7470 MachineOperand &RHSOp1 = RHS->getOperand(1);
7471 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7472 return std::nullopt;
7473 RHSC = RHSOp1.getCImm()->getSExtValue();
7474
7475 if (RHSC >= -256 && RHSC < 256) {
7476 MachineOperand &Base = RootDef->getOperand(1);
7477 return {{
7478 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7479 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7480 }};
7481 }
7482 return std::nullopt;
7483}
7484
7485InstructionSelector::ComplexRendererFns
7486AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7487 unsigned Size,
7488 MachineRegisterInfo &MRI) const {
7489 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7490 return std::nullopt;
7491 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7492 if (Adrp.getOpcode() != AArch64::ADRP)
7493 return std::nullopt;
7494
7495 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7496 auto Offset = Adrp.getOperand(1).getOffset();
7497 if (Offset % Size != 0)
7498 return std::nullopt;
7499
7500 auto GV = Adrp.getOperand(1).getGlobal();
7501 if (GV->isThreadLocal())
7502 return std::nullopt;
7503
7504 auto &MF = *RootDef.getParent()->getParent();
7505 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7506 return std::nullopt;
7507
7508 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7509 MachineIRBuilder MIRBuilder(RootDef);
7510 Register AdrpReg = Adrp.getOperand(0).getReg();
7511 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7512 [=](MachineInstrBuilder &MIB) {
7513 MIB.addGlobalAddress(GV, Offset,
7514 OpFlags | AArch64II::MO_PAGEOFF |
7516 }}};
7517}
7518
7519/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7520/// "Size" argument is the size in bytes of the memory reference, which
7521/// determines the scale.
7522InstructionSelector::ComplexRendererFns
7523AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7524 unsigned Size) const {
7525 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7526 MachineRegisterInfo &MRI = MF.getRegInfo();
7527
7528 if (!Root.isReg())
7529 return std::nullopt;
7530
7531 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7532 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7533 return {{
7534 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7535 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7536 }};
7537 }
7538
7540 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7541 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7542 // globals into the offset.
7543 MachineInstr *RootParent = Root.getParent();
7544 if (CM == CodeModel::Small &&
7545 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7546 STI.isTargetDarwin())) {
7547 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7548 if (OpFns)
7549 return OpFns;
7550 }
7551
7552 if (isBaseWithConstantOffset(Root, MRI)) {
7553 MachineOperand &LHS = RootDef->getOperand(1);
7554 MachineOperand &RHS = RootDef->getOperand(2);
7555 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7556 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7557
7558 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7559 unsigned Scale = Log2_32(Size);
7560 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7561 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7562 return {{
7563 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7564 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7565 }};
7566
7567 return {{
7568 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7569 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7570 }};
7571 }
7572 }
7573
7574 // Before falling back to our general case, check if the unscaled
7575 // instructions can handle this. If so, that's preferable.
7576 if (selectAddrModeUnscaled(Root, Size))
7577 return std::nullopt;
7578
7579 return {{
7580 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7581 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7582 }};
7583}
7584
7585/// Given a shift instruction, return the correct shift type for that
7586/// instruction.
7588 switch (MI.getOpcode()) {
7589 default:
7591 case TargetOpcode::G_SHL:
7592 return AArch64_AM::LSL;
7593 case TargetOpcode::G_LSHR:
7594 return AArch64_AM::LSR;
7595 case TargetOpcode::G_ASHR:
7596 return AArch64_AM::ASR;
7597 case TargetOpcode::G_ROTR:
7598 return AArch64_AM::ROR;
7599 }
7600}
7601
7602/// Select a "shifted register" operand. If the value is not shifted, set the
7603/// shift operand to a default value of "lsl 0".
7604InstructionSelector::ComplexRendererFns
7605AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7606 bool AllowROR) const {
7607 if (!Root.isReg())
7608 return std::nullopt;
7609 MachineRegisterInfo &MRI =
7610 Root.getParent()->getParent()->getParent()->getRegInfo();
7611
7612 // Check if the operand is defined by an instruction which corresponds to
7613 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7614 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7616 if (ShType == AArch64_AM::InvalidShiftExtend)
7617 return std::nullopt;
7618 if (ShType == AArch64_AM::ROR && !AllowROR)
7619 return std::nullopt;
7620 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7621 return std::nullopt;
7622
7623 // Need an immediate on the RHS.
7624 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7625 auto Immed = getImmedFromMO(ShiftRHS);
7626 if (!Immed)
7627 return std::nullopt;
7628
7629 // We have something that we can fold. Fold in the shift's LHS and RHS into
7630 // the instruction.
7631 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7632 Register ShiftReg = ShiftLHS.getReg();
7633
7634 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7635 unsigned Val = *Immed & (NumBits - 1);
7636 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7637
7638 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7639 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7640}
7641
7642AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7643 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7644 unsigned Opc = MI.getOpcode();
7645
7646 // Handle explicit extend instructions first.
7647 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7648 unsigned Size;
7649 if (Opc == TargetOpcode::G_SEXT)
7650 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7651 else
7652 Size = MI.getOperand(2).getImm();
7653 assert(Size != 64 && "Extend from 64 bits?");
7654 switch (Size) {
7655 case 8:
7656 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7657 case 16:
7658 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7659 case 32:
7660 return AArch64_AM::SXTW;
7661 default:
7663 }
7664 }
7665
7666 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7667 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7668 assert(Size != 64 && "Extend from 64 bits?");
7669 switch (Size) {
7670 case 8:
7671 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7672 case 16:
7673 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7674 case 32:
7675 return AArch64_AM::UXTW;
7676 default:
7678 }
7679 }
7680
7681 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7682 // on the RHS.
7683 if (Opc != TargetOpcode::G_AND)
7685
7686 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7687 if (!MaybeAndMask)
7689 uint64_t AndMask = *MaybeAndMask;
7690 switch (AndMask) {
7691 default:
7693 case 0xFF:
7694 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7695 case 0xFFFF:
7696 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7697 case 0xFFFFFFFF:
7698 return AArch64_AM::UXTW;
7699 }
7700}
7701
7702Register AArch64InstructionSelector::moveScalarRegClass(
7703 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7704 MachineRegisterInfo &MRI = *MIB.getMRI();
7705 auto Ty = MRI.getType(Reg);
7706 assert(!Ty.isVector() && "Expected scalars only!");
7707 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7708 return Reg;
7709
7710 // Create a copy and immediately select it.
7711 // FIXME: We should have an emitCopy function?
7712 auto Copy = MIB.buildCopy({&RC}, {Reg});
7713 selectCopy(*Copy, TII, MRI, TRI, RBI);
7714 return Copy.getReg(0);
7715}
7716
7717/// Select an "extended register" operand. This operand folds in an extend
7718/// followed by an optional left shift.
7719InstructionSelector::ComplexRendererFns
7720AArch64InstructionSelector::selectArithExtendedRegister(
7721 MachineOperand &Root) const {
7722 if (!Root.isReg())
7723 return std::nullopt;
7724 MachineRegisterInfo &MRI =
7725 Root.getParent()->getParent()->getParent()->getRegInfo();
7726
7727 uint64_t ShiftVal = 0;
7728 Register ExtReg;
7730 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7731 if (!RootDef)
7732 return std::nullopt;
7733
7734 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7735 return std::nullopt;
7736
7737 // Check if we can fold a shift and an extend.
7738 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7739 // Look for a constant on the RHS of the shift.
7740 MachineOperand &RHS = RootDef->getOperand(2);
7741 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7742 if (!MaybeShiftVal)
7743 return std::nullopt;
7744 ShiftVal = *MaybeShiftVal;
7745 if (ShiftVal > 4)
7746 return std::nullopt;
7747 // Look for a valid extend instruction on the LHS of the shift.
7748 MachineOperand &LHS = RootDef->getOperand(1);
7749 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7750 if (!ExtDef)
7751 return std::nullopt;
7752 Ext = getExtendTypeForInst(*ExtDef, MRI);
7754 return std::nullopt;
7755 ExtReg = ExtDef->getOperand(1).getReg();
7756 } else {
7757 // Didn't get a shift. Try just folding an extend.
7758 Ext = getExtendTypeForInst(*RootDef, MRI);
7760 return std::nullopt;
7761 ExtReg = RootDef->getOperand(1).getReg();
7762
7763 // If we have a 32 bit instruction which zeroes out the high half of a
7764 // register, we get an implicit zero extend for free. Check if we have one.
7765 // FIXME: We actually emit the extend right now even though we don't have
7766 // to.
7767 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7768 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7769 if (isDef32(*ExtInst))
7770 return std::nullopt;
7771 }
7772 }
7773
7774 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7775 // copy.
7776 MachineIRBuilder MIB(*RootDef);
7777 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7778
7779 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7780 [=](MachineInstrBuilder &MIB) {
7781 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7782 }}};
7783}
7784
7785InstructionSelector::ComplexRendererFns
7786AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7787 if (!Root.isReg())
7788 return std::nullopt;
7789 MachineRegisterInfo &MRI =
7790 Root.getParent()->getParent()->getParent()->getRegInfo();
7791
7792 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7793 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7794 STI.isLittleEndian())
7795 Extract =
7796 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7797 if (!Extract)
7798 return std::nullopt;
7799
7800 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7801 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7802 Register ExtReg = Extract->MI->getOperand(2).getReg();
7803 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7804 }
7805 }
7806 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7807 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7809 Extract->MI->getOperand(2).getReg(), MRI);
7810 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7811 LaneIdx->Value.getSExtValue() == 1) {
7812 Register ExtReg = Extract->MI->getOperand(1).getReg();
7813 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7814 }
7815 }
7816
7817 return std::nullopt;
7818}
7819
7820void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7821 const MachineInstr &MI,
7822 int OpIdx) const {
7823 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7824 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7825 "Expected G_CONSTANT");
7826 std::optional<int64_t> CstVal =
7827 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7828 assert(CstVal && "Expected constant value");
7829 MIB.addImm(*CstVal);
7830}
7831
7832void AArch64InstructionSelector::renderLogicalImm32(
7833 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7834 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7835 "Expected G_CONSTANT");
7836 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7837 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7838 MIB.addImm(Enc);
7839}
7840
7841void AArch64InstructionSelector::renderLogicalImm64(
7842 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7843 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7844 "Expected G_CONSTANT");
7845 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7846 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7847 MIB.addImm(Enc);
7848}
7849
7850void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7851 const MachineInstr &MI,
7852 int OpIdx) const {
7853 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7854 "Expected G_UBSANTRAP");
7855 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7856}
7857
7858void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7859 const MachineInstr &MI,
7860 int OpIdx) const {
7861 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7862 "Expected G_FCONSTANT");
7863 MIB.addImm(
7864 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7865}
7866
7867void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7868 const MachineInstr &MI,
7869 int OpIdx) const {
7870 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7871 "Expected G_FCONSTANT");
7872 MIB.addImm(
7873 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7874}
7875
7876void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7877 const MachineInstr &MI,
7878 int OpIdx) const {
7879 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7880 "Expected G_FCONSTANT");
7881 MIB.addImm(
7882 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7883}
7884
7885void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7886 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7887 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7888 "Expected G_FCONSTANT");
7890 .getFPImm()
7891 ->getValueAPF()
7892 .bitcastToAPInt()
7893 .getZExtValue()));
7894}
7895
7896bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7897 const MachineInstr &MI, unsigned NumBytes) const {
7898 if (!MI.mayLoadOrStore())
7899 return false;
7900 assert(MI.hasOneMemOperand() &&
7901 "Expected load/store to have only one mem op!");
7902 return (*MI.memoperands_begin())->getSize() == NumBytes;
7903}
7904
7905bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7906 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7907 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7908 return false;
7909
7910 // Only return true if we know the operation will zero-out the high half of
7911 // the 64-bit register. Truncates can be subregister copies, which don't
7912 // zero out the high bits. Copies and other copy-like instructions can be
7913 // fed by truncates, or could be lowered as subregister copies.
7914 switch (MI.getOpcode()) {
7915 default:
7916 return true;
7917 case TargetOpcode::COPY:
7918 case TargetOpcode::G_BITCAST:
7919 case TargetOpcode::G_TRUNC:
7920 case TargetOpcode::G_PHI:
7921 return false;
7922 }
7923}
7924
7925
7926// Perform fixups on the given PHI instruction's operands to force them all
7927// to be the same as the destination regbank.
7929 const AArch64RegisterBankInfo &RBI) {
7930 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7931 Register DstReg = MI.getOperand(0).getReg();
7932 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7933 assert(DstRB && "Expected PHI dst to have regbank assigned");
7934 MachineIRBuilder MIB(MI);
7935
7936 // Go through each operand and ensure it has the same regbank.
7937 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7938 if (!MO.isReg())
7939 continue;
7940 Register OpReg = MO.getReg();
7941 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7942 if (RB != DstRB) {
7943 // Insert a cross-bank copy.
7944 auto *OpDef = MRI.getVRegDef(OpReg);
7945 const LLT &Ty = MRI.getType(OpReg);
7946 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7947
7948 // Any instruction we insert must appear after all PHIs in the block
7949 // for the block to be valid MIR.
7950 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7951 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7952 InsertPt = OpDefBB.getFirstNonPHI();
7953 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7954 auto Copy = MIB.buildCopy(Ty, OpReg);
7955 MRI.setRegBank(Copy.getReg(0), *DstRB);
7956 MO.setReg(Copy.getReg(0));
7957 }
7958 }
7959}
7960
7961void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7962 // We're looking for PHIs, build a list so we don't invalidate iterators.
7963 MachineRegisterInfo &MRI = MF.getRegInfo();
7965 for (auto &BB : MF) {
7966 for (auto &MI : BB) {
7967 if (MI.getOpcode() == TargetOpcode::G_PHI)
7968 Phis.emplace_back(&MI);
7969 }
7970 }
7971
7972 for (auto *MI : Phis) {
7973 // We need to do some work here if the operand types are < 16 bit and they
7974 // are split across fpr/gpr banks. Since all types <32b on gpr
7975 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7976 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7977 // be selecting heterogenous regbanks for operands if possible, but we
7978 // still need to be able to deal with it here.
7979 //
7980 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7981 // one other operand is on the fpr bank, then we add cross-bank copies
7982 // to homogenize the operand banks. For simplicity the bank that we choose
7983 // to settle on is whatever bank the def operand has. For example:
7984 //
7985 // %endbb:
7986 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7987 // =>
7988 // %bb2:
7989 // ...
7990 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7991 // ...
7992 // %endbb:
7993 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7994 bool HasGPROp = false, HasFPROp = false;
7995 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7996 if (!MO.isReg())
7997 continue;
7998 const LLT &Ty = MRI.getType(MO.getReg());
7999 if (!Ty.isValid() || !Ty.isScalar())
8000 break;
8001 if (Ty.getSizeInBits() >= 32)
8002 break;
8003 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8004 // If for some reason we don't have a regbank yet. Don't try anything.
8005 if (!RB)
8006 break;
8007
8008 if (RB->getID() == AArch64::GPRRegBankID)
8009 HasGPROp = true;
8010 else
8011 HasFPROp = true;
8012 }
8013 // We have heterogenous regbanks, need to fixup.
8014 if (HasGPROp && HasFPROp)
8015 fixupPHIOpBanks(*MI, MRI, RBI);
8016 }
8017}
8018
8019namespace llvm {
8020InstructionSelector *
8022 const AArch64Subtarget &Subtarget,
8023 const AArch64RegisterBankInfo &RBI) {
8024 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8025}
8026}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:320
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:327
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:324
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:557
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:948
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:915
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.