LLVM  14.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
21 #include "AArch64GlobalISelUtils.h"
24 #include "llvm/ADT/Optional.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
48 
49 #define DEBUG_TYPE "aarch64-isel"
50 
51 using namespace llvm;
52 using namespace MIPatternMatch;
53 using namespace AArch64GISelUtils;
54 
55 namespace llvm {
56 class BlockFrequencyInfo;
57 class ProfileSummaryInfo;
58 }
59 
60 namespace {
61 
62 #define GET_GLOBALISEL_PREDICATE_BITSET
63 #include "AArch64GenGlobalISel.inc"
64 #undef GET_GLOBALISEL_PREDICATE_BITSET
65 
66 class AArch64InstructionSelector : public InstructionSelector {
67 public:
68  AArch64InstructionSelector(const AArch64TargetMachine &TM,
69  const AArch64Subtarget &STI,
70  const AArch64RegisterBankInfo &RBI);
71 
72  bool select(MachineInstr &I) override;
73  static const char *getName() { return DEBUG_TYPE; }
74 
75  void setupMF(MachineFunction &MF, GISelKnownBits *KB,
76  CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
77  BlockFrequencyInfo *BFI) override {
78  InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
79  MIB.setMF(MF);
80 
81  // hasFnAttribute() is expensive to call on every BRCOND selection, so
82  // cache it here for each run of the selector.
83  ProduceNonFlagSettingCondBr =
84  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
85  MFReturnAddr = Register();
86 
87  processPHIs(MF);
88  }
89 
90 private:
91  /// tblgen-erated 'select' implementation, used as the initial selector for
92  /// the patterns that don't require complex C++.
93  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
94 
95  // A lowering phase that runs before any selection attempts.
96  // Returns true if the instruction was modified.
97  bool preISelLower(MachineInstr &I);
98 
99  // An early selection function that runs before the selectImpl() call.
100  bool earlySelect(MachineInstr &I);
101 
102  // Do some preprocessing of G_PHIs before we begin selection.
103  void processPHIs(MachineFunction &MF);
104 
105  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
106 
107  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
108  bool contractCrossBankCopyIntoStore(MachineInstr &I,
110 
111  bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
112 
113  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
114  MachineRegisterInfo &MRI) const;
115  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
116  MachineRegisterInfo &MRI) const;
117 
118  ///@{
119  /// Helper functions for selectCompareBranch.
120  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
121  MachineIRBuilder &MIB) const;
122  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
123  MachineIRBuilder &MIB) const;
124  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
125  MachineIRBuilder &MIB) const;
126  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
127  MachineBasicBlock *DstMBB,
128  MachineIRBuilder &MIB) const;
129  ///@}
130 
131  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
133 
134  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
135  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
136 
137  // Helper to generate an equivalent of scalar_to_vector into a new register,
138  // returned via 'Dst'.
139  MachineInstr *emitScalarToVector(unsigned EltSize,
140  const TargetRegisterClass *DstRC,
141  Register Scalar,
142  MachineIRBuilder &MIRBuilder) const;
143 
144  /// Emit a lane insert into \p DstReg, or a new vector register if None is
145  /// provided.
146  ///
147  /// The lane inserted into is defined by \p LaneIdx. The vector source
148  /// register is given by \p SrcReg. The register containing the element is
149  /// given by \p EltReg.
150  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
151  Register EltReg, unsigned LaneIdx,
152  const RegisterBank &RB,
153  MachineIRBuilder &MIRBuilder) const;
154 
155  /// Emit a sequence of instructions representing a constant \p CV for a
156  /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
157  ///
158  /// \returns the last instruction in the sequence on success, and nullptr
159  /// otherwise.
160  MachineInstr *emitConstantVector(Register Dst, Constant *CV,
161  MachineIRBuilder &MIRBuilder,
163 
164  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
165  bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
167  /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
168  /// SUBREG_TO_REG.
169  bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
170  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
173 
174  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
175  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
176  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
177  bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
178 
179  /// Helper function to select vector load intrinsics like
180  /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
181  /// \p Opc is the opcode that the selected instruction should use.
182  /// \p NumVecs is the number of vector destinations for the instruction.
183  /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
184  bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
185  MachineInstr &I);
186  bool selectIntrinsicWithSideEffects(MachineInstr &I,
188  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
189  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
190  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
191  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
192  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
193  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
194  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
195  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
196  bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
197 
198  unsigned emitConstantPoolEntry(const Constant *CPVal,
199  MachineFunction &MF) const;
200  MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
201  MachineIRBuilder &MIRBuilder) const;
202 
203  // Emit a vector concat operation.
204  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
205  Register Op2,
206  MachineIRBuilder &MIRBuilder) const;
207 
208  // Emit an integer compare between LHS and RHS, which checks for Predicate.
209  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
211  MachineIRBuilder &MIRBuilder) const;
212 
213  /// Emit a floating point comparison between \p LHS and \p RHS.
214  /// \p Pred if given is the intended predicate to use.
215  MachineInstr *emitFPCompare(Register LHS, Register RHS,
216  MachineIRBuilder &MIRBuilder,
218 
219  MachineInstr *emitInstr(unsigned Opcode,
220  std::initializer_list<llvm::DstOp> DstOps,
221  std::initializer_list<llvm::SrcOp> SrcOps,
222  MachineIRBuilder &MIRBuilder,
223  const ComplexRendererFns &RenderFns = None) const;
224  /// Helper function to emit an add or sub instruction.
225  ///
226  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
227  /// in a specific order.
228  ///
229  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
230  ///
231  /// \code
232  /// const std::array<std::array<unsigned, 2>, 4> Table {
233  /// {{AArch64::ADDXri, AArch64::ADDWri},
234  /// {AArch64::ADDXrs, AArch64::ADDWrs},
235  /// {AArch64::ADDXrr, AArch64::ADDWrr},
236  /// {AArch64::SUBXri, AArch64::SUBWri},
237  /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
238  /// \endcode
239  ///
240  /// Each row in the table corresponds to a different addressing mode. Each
241  /// column corresponds to a different register size.
242  ///
243  /// \attention Rows must be structured as follows:
244  /// - Row 0: The ri opcode variants
245  /// - Row 1: The rs opcode variants
246  /// - Row 2: The rr opcode variants
247  /// - Row 3: The ri opcode variants for negative immediates
248  /// - Row 4: The rx opcode variants
249  ///
250  /// \attention Columns must be structured as follows:
251  /// - Column 0: The 64-bit opcode variants
252  /// - Column 1: The 32-bit opcode variants
253  ///
254  /// \p Dst is the destination register of the binop to emit.
255  /// \p LHS is the left-hand operand of the binop to emit.
256  /// \p RHS is the right-hand operand of the binop to emit.
257  MachineInstr *emitAddSub(
258  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
259  Register Dst, MachineOperand &LHS, MachineOperand &RHS,
260  MachineIRBuilder &MIRBuilder) const;
261  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
262  MachineOperand &RHS,
263  MachineIRBuilder &MIRBuilder) const;
264  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
265  MachineIRBuilder &MIRBuilder) const;
266  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
267  MachineIRBuilder &MIRBuilder) const;
268  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
269  MachineIRBuilder &MIRBuilder) const;
270  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
271  MachineIRBuilder &MIRBuilder) const;
272  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
274  MachineIRBuilder &MIRBuilder) const;
275  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
276  const RegisterBank &DstRB, LLT ScalarTy,
277  Register VecReg, unsigned LaneIdx,
278  MachineIRBuilder &MIRBuilder) const;
279 
280  /// Emit a CSet for an integer compare.
281  ///
282  /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
283  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
284  MachineIRBuilder &MIRBuilder,
285  Register SrcReg = AArch64::WZR) const;
286  /// Emit a CSet for a FP compare.
287  ///
288  /// \p Dst is expected to be a 32-bit scalar register.
289  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290  MachineIRBuilder &MIRBuilder) const;
291 
292  /// Emit the overflow op for \p Opcode.
293  ///
294  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295  /// G_USUBO, etc.
296  std::pair<MachineInstr *, AArch64CC::CondCode>
297  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299 
300  /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
301  /// \p IsNegative is true if the test should be "not zero".
302  /// This will also optimize the test bit instruction when possible.
303  MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
304  MachineBasicBlock *DstMBB,
305  MachineIRBuilder &MIB) const;
306 
307  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
308  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
309  MachineBasicBlock *DestMBB,
310  MachineIRBuilder &MIB) const;
311 
312  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
313  // We use these manually instead of using the importer since it doesn't
314  // support SDNodeXForm.
315  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
316  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
317  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
318  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
319 
320  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
321  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
322  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
323 
324  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
325  unsigned Size) const;
326 
327  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
328  return selectAddrModeUnscaled(Root, 1);
329  }
330  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
331  return selectAddrModeUnscaled(Root, 2);
332  }
333  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
334  return selectAddrModeUnscaled(Root, 4);
335  }
336  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
337  return selectAddrModeUnscaled(Root, 8);
338  }
339  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
340  return selectAddrModeUnscaled(Root, 16);
341  }
342 
343  /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
344  /// from complex pattern matchers like selectAddrModeIndexed().
345  ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
346  MachineRegisterInfo &MRI) const;
347 
348  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
349  unsigned Size) const;
350  template <int Width>
351  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
352  return selectAddrModeIndexed(Root, Width / 8);
353  }
354 
355  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
356  const MachineRegisterInfo &MRI) const;
357  ComplexRendererFns
358  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
359  unsigned SizeInBytes) const;
360 
361  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
362  /// or not a shift + extend should be folded into an addressing mode. Returns
363  /// None when this is not profitable or possible.
364  ComplexRendererFns
365  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
366  MachineOperand &Offset, unsigned SizeInBytes,
367  bool WantsExt) const;
368  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
369  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
370  unsigned SizeInBytes) const;
371  template <int Width>
372  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
373  return selectAddrModeXRO(Root, Width / 8);
374  }
375 
376  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
377  unsigned SizeInBytes) const;
378  template <int Width>
379  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
380  return selectAddrModeWRO(Root, Width / 8);
381  }
382 
383  ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
384  bool AllowROR = false) const;
385 
386  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
387  return selectShiftedRegister(Root);
388  }
389 
390  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
391  return selectShiftedRegister(Root, true);
392  }
393 
394  /// Given an extend instruction, determine the correct shift-extend type for
395  /// that instruction.
396  ///
397  /// If the instruction is going to be used in a load or store, pass
398  /// \p IsLoadStore = true.
400  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
401  bool IsLoadStore = false) const;
402 
403  /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
404  ///
405  /// \returns Either \p Reg if no change was necessary, or the new register
406  /// created by moving \p Reg.
407  ///
408  /// Note: This uses emitCopy right now.
409  Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
410  MachineIRBuilder &MIB) const;
411 
412  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
413 
414  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
415  int OpIdx = -1) const;
416  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
417  int OpIdx = -1) const;
418  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
419  int OpIdx = -1) const;
420  void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
421  int OpIdx = -1) const;
422  void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
423  int OpIdx = -1) const;
424  void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
425  int OpIdx = -1) const;
426 
427  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
428  void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
429 
430  // Optimization methods.
431  bool tryOptSelect(MachineInstr &MI);
432  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
434  MachineIRBuilder &MIRBuilder) const;
435 
436  /// Return true if \p MI is a load or store of \p NumBytes bytes.
437  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
438 
439  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
440  /// register zeroed out. In other words, the result of MI has been explicitly
441  /// zero extended.
442  bool isDef32(const MachineInstr &MI) const;
443 
444  const AArch64TargetMachine &TM;
445  const AArch64Subtarget &STI;
446  const AArch64InstrInfo &TII;
447  const AArch64RegisterInfo &TRI;
448  const AArch64RegisterBankInfo &RBI;
449 
450  bool ProduceNonFlagSettingCondBr = false;
451 
452  // Some cached values used during selection.
453  // We use LR as a live-in register, and we keep track of it here as it can be
454  // clobbered by calls.
455  Register MFReturnAddr;
456 
457  MachineIRBuilder MIB;
458 
459 #define GET_GLOBALISEL_PREDICATES_DECL
460 #include "AArch64GenGlobalISel.inc"
461 #undef GET_GLOBALISEL_PREDICATES_DECL
462 
463 // We declare the temporaries used by selectImpl() in the class to minimize the
464 // cost of constructing placeholder values.
465 #define GET_GLOBALISEL_TEMPORARIES_DECL
466 #include "AArch64GenGlobalISel.inc"
467 #undef GET_GLOBALISEL_TEMPORARIES_DECL
468 };
469 
470 } // end anonymous namespace
471 
472 #define GET_GLOBALISEL_IMPL
473 #include "AArch64GenGlobalISel.inc"
474 #undef GET_GLOBALISEL_IMPL
475 
476 AArch64InstructionSelector::AArch64InstructionSelector(
477  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
478  const AArch64RegisterBankInfo &RBI)
479  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
480  TRI(*STI.getRegisterInfo()), RBI(RBI),
482 #include "AArch64GenGlobalISel.inc"
485 #include "AArch64GenGlobalISel.inc"
487 {
488 }
489 
490 // FIXME: This should be target-independent, inferred from the types declared
491 // for each class in the bank.
492 static const TargetRegisterClass *
493 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
494  const RegisterBankInfo &RBI,
495  bool GetAllRegSet = false) {
496  if (RB.getID() == AArch64::GPRRegBankID) {
497  if (Ty.getSizeInBits() <= 32)
498  return GetAllRegSet ? &AArch64::GPR32allRegClass
499  : &AArch64::GPR32RegClass;
500  if (Ty.getSizeInBits() == 64)
501  return GetAllRegSet ? &AArch64::GPR64allRegClass
502  : &AArch64::GPR64RegClass;
503  if (Ty.getSizeInBits() == 128)
504  return &AArch64::XSeqPairsClassRegClass;
505  return nullptr;
506  }
507 
508  if (RB.getID() == AArch64::FPRRegBankID) {
509  switch (Ty.getSizeInBits()) {
510  case 8:
511  return &AArch64::FPR8RegClass;
512  case 16:
513  return &AArch64::FPR16RegClass;
514  case 32:
515  return &AArch64::FPR32RegClass;
516  case 64:
517  return &AArch64::FPR64RegClass;
518  case 128:
519  return &AArch64::FPR128RegClass;
520  }
521  return nullptr;
522  }
523 
524  return nullptr;
525 }
526 
527 /// Given a register bank, and size in bits, return the smallest register class
528 /// that can represent that combination.
529 static const TargetRegisterClass *
530 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
531  bool GetAllRegSet = false) {
532  unsigned RegBankID = RB.getID();
533 
534  if (RegBankID == AArch64::GPRRegBankID) {
535  if (SizeInBits <= 32)
536  return GetAllRegSet ? &AArch64::GPR32allRegClass
537  : &AArch64::GPR32RegClass;
538  if (SizeInBits == 64)
539  return GetAllRegSet ? &AArch64::GPR64allRegClass
540  : &AArch64::GPR64RegClass;
541  if (SizeInBits == 128)
542  return &AArch64::XSeqPairsClassRegClass;
543  }
544 
545  if (RegBankID == AArch64::FPRRegBankID) {
546  switch (SizeInBits) {
547  default:
548  return nullptr;
549  case 8:
550  return &AArch64::FPR8RegClass;
551  case 16:
552  return &AArch64::FPR16RegClass;
553  case 32:
554  return &AArch64::FPR32RegClass;
555  case 64:
556  return &AArch64::FPR64RegClass;
557  case 128:
558  return &AArch64::FPR128RegClass;
559  }
560  }
561 
562  return nullptr;
563 }
564 
565 /// Returns the correct subregister to use for a given register class.
567  const TargetRegisterInfo &TRI, unsigned &SubReg) {
568  switch (TRI.getRegSizeInBits(*RC)) {
569  case 8:
570  SubReg = AArch64::bsub;
571  break;
572  case 16:
573  SubReg = AArch64::hsub;
574  break;
575  case 32:
576  if (RC != &AArch64::FPR32RegClass)
577  SubReg = AArch64::sub_32;
578  else
579  SubReg = AArch64::ssub;
580  break;
581  case 64:
582  SubReg = AArch64::dsub;
583  break;
584  default:
585  LLVM_DEBUG(
586  dbgs() << "Couldn't find appropriate subregister for register class.");
587  return false;
588  }
589 
590  return true;
591 }
592 
593 /// Returns the minimum size the given register bank can hold.
594 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
595  switch (RB.getID()) {
596  case AArch64::GPRRegBankID:
597  return 32;
598  case AArch64::FPRRegBankID:
599  return 8;
600  default:
601  llvm_unreachable("Tried to get minimum size for unknown register bank.");
602  }
603 }
604 
605 /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
606 /// Helper function for functions like createDTuple and createQTuple.
607 ///
608 /// \p RegClassIDs - The list of register class IDs available for some tuple of
609 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
610 /// expected to contain between 2 and 4 tuple classes.
611 ///
612 /// \p SubRegs - The list of subregister classes associated with each register
613 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
614 /// subregister class. The index of each subregister class is expected to
615 /// correspond with the index of each register class.
616 ///
617 /// \returns Either the destination register of REG_SEQUENCE instruction that
618 /// was created, or the 0th element of \p Regs if \p Regs contains a single
619 /// element.
621  const unsigned RegClassIDs[],
622  const unsigned SubRegs[], MachineIRBuilder &MIB) {
623  unsigned NumRegs = Regs.size();
624  if (NumRegs == 1)
625  return Regs[0];
626  assert(NumRegs >= 2 && NumRegs <= 4 &&
627  "Only support between two and 4 registers in a tuple!");
629  auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
630  auto RegSequence =
631  MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
632  for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
633  RegSequence.addUse(Regs[I]);
634  RegSequence.addImm(SubRegs[I]);
635  }
636  return RegSequence.getReg(0);
637 }
638 
639 /// Create a tuple of D-registers using the registers in \p Regs.
641  static const unsigned RegClassIDs[] = {
642  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
643  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
644  AArch64::dsub2, AArch64::dsub3};
645  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
646 }
647 
648 /// Create a tuple of Q-registers using the registers in \p Regs.
650  static const unsigned RegClassIDs[] = {
651  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
652  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
653  AArch64::qsub2, AArch64::qsub3};
654  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
655 }
656 
658  auto &MI = *Root.getParent();
659  auto &MBB = *MI.getParent();
660  auto &MF = *MBB.getParent();
661  auto &MRI = MF.getRegInfo();
662  uint64_t Immed;
663  if (Root.isImm())
664  Immed = Root.getImm();
665  else if (Root.isCImm())
666  Immed = Root.getCImm()->getZExtValue();
667  else if (Root.isReg()) {
668  auto ValAndVReg =
670  if (!ValAndVReg)
671  return None;
672  Immed = ValAndVReg->Value.getSExtValue();
673  } else
674  return None;
675  return Immed;
676 }
677 
678 /// Check whether \p I is a currently unsupported binary operation:
679 /// - it has an unsized type
680 /// - an operand is not a vreg
681 /// - all operands are not in the same bank
682 /// These are checks that should someday live in the verifier, but right now,
683 /// these are mostly limitations of the aarch64 selector.
684 static bool unsupportedBinOp(const MachineInstr &I,
685  const AArch64RegisterBankInfo &RBI,
686  const MachineRegisterInfo &MRI,
687  const AArch64RegisterInfo &TRI) {
688  LLT Ty = MRI.getType(I.getOperand(0).getReg());
689  if (!Ty.isValid()) {
690  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
691  return true;
692  }
693 
694  const RegisterBank *PrevOpBank = nullptr;
695  for (auto &MO : I.operands()) {
696  // FIXME: Support non-register operands.
697  if (!MO.isReg()) {
698  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
699  return true;
700  }
701 
702  // FIXME: Can generic operations have physical registers operands? If
703  // so, this will need to be taught about that, and we'll need to get the
704  // bank out of the minimal class for the register.
705  // Either way, this needs to be documented (and possibly verified).
706  if (!Register::isVirtualRegister(MO.getReg())) {
707  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
708  return true;
709  }
710 
711  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
712  if (!OpBank) {
713  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
714  return true;
715  }
716 
717  if (PrevOpBank && OpBank != PrevOpBank) {
718  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
719  return true;
720  }
721  PrevOpBank = OpBank;
722  }
723  return false;
724 }
725 
726 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
727 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
728 /// and of size \p OpSize.
729 /// \returns \p GenericOpc if the combination is unsupported.
730 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
731  unsigned OpSize) {
732  switch (RegBankID) {
733  case AArch64::GPRRegBankID:
734  if (OpSize == 32) {
735  switch (GenericOpc) {
736  case TargetOpcode::G_SHL:
737  return AArch64::LSLVWr;
738  case TargetOpcode::G_LSHR:
739  return AArch64::LSRVWr;
740  case TargetOpcode::G_ASHR:
741  return AArch64::ASRVWr;
742  default:
743  return GenericOpc;
744  }
745  } else if (OpSize == 64) {
746  switch (GenericOpc) {
747  case TargetOpcode::G_PTR_ADD:
748  return AArch64::ADDXrr;
749  case TargetOpcode::G_SHL:
750  return AArch64::LSLVXr;
751  case TargetOpcode::G_LSHR:
752  return AArch64::LSRVXr;
753  case TargetOpcode::G_ASHR:
754  return AArch64::ASRVXr;
755  default:
756  return GenericOpc;
757  }
758  }
759  break;
760  case AArch64::FPRRegBankID:
761  switch (OpSize) {
762  case 32:
763  switch (GenericOpc) {
764  case TargetOpcode::G_FADD:
765  return AArch64::FADDSrr;
766  case TargetOpcode::G_FSUB:
767  return AArch64::FSUBSrr;
768  case TargetOpcode::G_FMUL:
769  return AArch64::FMULSrr;
770  case TargetOpcode::G_FDIV:
771  return AArch64::FDIVSrr;
772  default:
773  return GenericOpc;
774  }
775  case 64:
776  switch (GenericOpc) {
777  case TargetOpcode::G_FADD:
778  return AArch64::FADDDrr;
779  case TargetOpcode::G_FSUB:
780  return AArch64::FSUBDrr;
781  case TargetOpcode::G_FMUL:
782  return AArch64::FMULDrr;
783  case TargetOpcode::G_FDIV:
784  return AArch64::FDIVDrr;
785  case TargetOpcode::G_OR:
786  return AArch64::ORRv8i8;
787  default:
788  return GenericOpc;
789  }
790  }
791  break;
792  }
793  return GenericOpc;
794 }
795 
796 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
797 /// appropriate for the (value) register bank \p RegBankID and of memory access
798 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
799 /// addressing mode (e.g., LDRXui).
800 /// \returns \p GenericOpc if the combination is unsupported.
801 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
802  unsigned OpSize) {
803  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
804  switch (RegBankID) {
805  case AArch64::GPRRegBankID:
806  switch (OpSize) {
807  case 8:
808  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
809  case 16:
810  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
811  case 32:
812  return isStore ? AArch64::STRWui : AArch64::LDRWui;
813  case 64:
814  return isStore ? AArch64::STRXui : AArch64::LDRXui;
815  }
816  break;
817  case AArch64::FPRRegBankID:
818  switch (OpSize) {
819  case 8:
820  return isStore ? AArch64::STRBui : AArch64::LDRBui;
821  case 16:
822  return isStore ? AArch64::STRHui : AArch64::LDRHui;
823  case 32:
824  return isStore ? AArch64::STRSui : AArch64::LDRSui;
825  case 64:
826  return isStore ? AArch64::STRDui : AArch64::LDRDui;
827  case 128:
828  return isStore ? AArch64::STRQui : AArch64::LDRQui;
829  }
830  break;
831  }
832  return GenericOpc;
833 }
834 
835 #ifndef NDEBUG
836 /// Helper function that verifies that we have a valid copy at the end of
837 /// selectCopy. Verifies that the source and dest have the expected sizes and
838 /// then returns true.
839 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
840  const MachineRegisterInfo &MRI,
841  const TargetRegisterInfo &TRI,
842  const RegisterBankInfo &RBI) {
843  const Register DstReg = I.getOperand(0).getReg();
844  const Register SrcReg = I.getOperand(1).getReg();
845  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
846  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
847 
848  // Make sure the size of the source and dest line up.
849  assert(
850  (DstSize == SrcSize ||
851  // Copies are a mean to setup initial types, the number of
852  // bits may not exactly match.
853  (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
854  // Copies are a mean to copy bits around, as long as we are
855  // on the same register class, that's fine. Otherwise, that
856  // means we need some SUBREG_TO_REG or AND & co.
857  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
858  "Copy with different width?!");
859 
860  // Check the size of the destination.
861  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
862  "GPRs cannot get more than 64-bit width values");
863 
864  return true;
865 }
866 #endif
867 
868 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
869 /// to \p *To.
870 ///
871 /// E.g "To = COPY SrcReg:SubReg"
873  const RegisterBankInfo &RBI, Register SrcReg,
874  const TargetRegisterClass *To, unsigned SubReg) {
875  assert(SrcReg.isValid() && "Expected a valid source register?");
876  assert(To && "Destination register class cannot be null");
877  assert(SubReg && "Expected a valid subregister");
878 
879  MachineIRBuilder MIB(I);
880  auto SubRegCopy =
881  MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
882  MachineOperand &RegOp = I.getOperand(1);
883  RegOp.setReg(SubRegCopy.getReg(0));
884 
885  // It's possible that the destination register won't be constrained. Make
886  // sure that happens.
887  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
888  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
889 
890  return true;
891 }
892 
893 /// Helper function to get the source and destination register classes for a
894 /// copy. Returns a std::pair containing the source register class for the
895 /// copy, and the destination register class for the copy. If a register class
896 /// cannot be determined, then it will be nullptr.
897 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
900  const RegisterBankInfo &RBI) {
901  Register DstReg = I.getOperand(0).getReg();
902  Register SrcReg = I.getOperand(1).getReg();
903  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
904  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
905  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
906  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
907 
908  // Special casing for cross-bank copies of s1s. We can technically represent
909  // a 1-bit value with any size of register. The minimum size for a GPR is 32
910  // bits. So, we need to put the FPR on 32 bits as well.
911  //
912  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
913  // then we can pull it into the helpers that get the appropriate class for a
914  // register bank. Or make a new helper that carries along some constraint
915  // information.
916  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
917  SrcSize = DstSize = 32;
918 
919  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
920  getMinClassForRegBank(DstRegBank, DstSize, true)};
921 }
922 
925  const RegisterBankInfo &RBI) {
926  Register DstReg = I.getOperand(0).getReg();
927  Register SrcReg = I.getOperand(1).getReg();
928  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
929  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
930 
931  // Find the correct register classes for the source and destination registers.
932  const TargetRegisterClass *SrcRC;
933  const TargetRegisterClass *DstRC;
934  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
935 
936  if (!DstRC) {
937  LLVM_DEBUG(dbgs() << "Unexpected dest size "
938  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
939  return false;
940  }
941 
942  // A couple helpers below, for making sure that the copy we produce is valid.
943 
944  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
945  // to verify that the src and dst are the same size, since that's handled by
946  // the SUBREG_TO_REG.
947  bool KnownValid = false;
948 
949  // Returns true, or asserts if something we don't expect happens. Instead of
950  // returning true, we return isValidCopy() to ensure that we verify the
951  // result.
952  auto CheckCopy = [&]() {
953  // If we have a bitcast or something, we can't have physical registers.
954  assert((I.isCopy() ||
955  (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
956  !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
957  "No phys reg on generic operator!");
958  bool ValidCopy = true;
959 #ifndef NDEBUG
960  ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
961  assert(ValidCopy && "Invalid copy.");
962 #endif
963  (void)KnownValid;
964  return ValidCopy;
965  };
966 
967  // Is this a copy? If so, then we may need to insert a subregister copy.
968  if (I.isCopy()) {
969  // Yes. Check if there's anything to fix up.
970  if (!SrcRC) {
971  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
972  return false;
973  }
974 
975  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
976  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
977  unsigned SubReg;
978 
979  // If the source bank doesn't support a subregister copy small enough,
980  // then we first need to copy to the destination bank.
981  if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
982  const TargetRegisterClass *DstTempRC =
983  getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
984  getSubRegForClass(DstRC, TRI, SubReg);
985 
986  MachineIRBuilder MIB(I);
987  auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
988  copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
989  } else if (SrcSize > DstSize) {
990  // If the source register is bigger than the destination we need to
991  // perform a subregister copy.
992  const TargetRegisterClass *SubRegRC =
993  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
994  getSubRegForClass(SubRegRC, TRI, SubReg);
995  copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
996  } else if (DstSize > SrcSize) {
997  // If the destination register is bigger than the source we need to do
998  // a promotion using SUBREG_TO_REG.
999  const TargetRegisterClass *PromotionRC =
1000  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1001  getSubRegForClass(SrcRC, TRI, SubReg);
1002 
1003  Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1004  BuildMI(*I.getParent(), I, I.getDebugLoc(),
1005  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1006  .addImm(0)
1007  .addUse(SrcReg)
1008  .addImm(SubReg);
1009  MachineOperand &RegOp = I.getOperand(1);
1010  RegOp.setReg(PromoteReg);
1011 
1012  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1013  KnownValid = true;
1014  }
1015 
1016  // If the destination is a physical register, then there's nothing to
1017  // change, so we're done.
1018  if (Register::isPhysicalRegister(DstReg))
1019  return CheckCopy();
1020  }
1021 
1022  // No need to constrain SrcReg. It will get constrained when we hit another
1023  // of its use or its defs. Copies do not have constraints.
1024  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1025  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1026  << " operand\n");
1027  return false;
1028  }
1029 
1030  // If this a GPR ZEXT that we want to just reduce down into a copy.
1031  // The sizes will be mismatched with the source < 32b but that's ok.
1032  if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1033  I.setDesc(TII.get(AArch64::COPY));
1034  assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1035  return selectCopy(I, TII, MRI, TRI, RBI);
1036  }
1037 
1038  I.setDesc(TII.get(AArch64::COPY));
1039  return CheckCopy();
1040 }
1041 
1042 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1043  if (!DstTy.isScalar() || !SrcTy.isScalar())
1044  return GenericOpc;
1045 
1046  const unsigned DstSize = DstTy.getSizeInBits();
1047  const unsigned SrcSize = SrcTy.getSizeInBits();
1048 
1049  switch (DstSize) {
1050  case 32:
1051  switch (SrcSize) {
1052  case 32:
1053  switch (GenericOpc) {
1054  case TargetOpcode::G_SITOFP:
1055  return AArch64::SCVTFUWSri;
1056  case TargetOpcode::G_UITOFP:
1057  return AArch64::UCVTFUWSri;
1058  case TargetOpcode::G_FPTOSI:
1059  return AArch64::FCVTZSUWSr;
1060  case TargetOpcode::G_FPTOUI:
1061  return AArch64::FCVTZUUWSr;
1062  default:
1063  return GenericOpc;
1064  }
1065  case 64:
1066  switch (GenericOpc) {
1067  case TargetOpcode::G_SITOFP:
1068  return AArch64::SCVTFUXSri;
1069  case TargetOpcode::G_UITOFP:
1070  return AArch64::UCVTFUXSri;
1071  case TargetOpcode::G_FPTOSI:
1072  return AArch64::FCVTZSUWDr;
1073  case TargetOpcode::G_FPTOUI:
1074  return AArch64::FCVTZUUWDr;
1075  default:
1076  return GenericOpc;
1077  }
1078  default:
1079  return GenericOpc;
1080  }
1081  case 64:
1082  switch (SrcSize) {
1083  case 32:
1084  switch (GenericOpc) {
1085  case TargetOpcode::G_SITOFP:
1086  return AArch64::SCVTFUWDri;
1087  case TargetOpcode::G_UITOFP:
1088  return AArch64::UCVTFUWDri;
1089  case TargetOpcode::G_FPTOSI:
1090  return AArch64::FCVTZSUXSr;
1091  case TargetOpcode::G_FPTOUI:
1092  return AArch64::FCVTZUUXSr;
1093  default:
1094  return GenericOpc;
1095  }
1096  case 64:
1097  switch (GenericOpc) {
1098  case TargetOpcode::G_SITOFP:
1099  return AArch64::SCVTFUXDri;
1100  case TargetOpcode::G_UITOFP:
1101  return AArch64::UCVTFUXDri;
1102  case TargetOpcode::G_FPTOSI:
1103  return AArch64::FCVTZSUXDr;
1104  case TargetOpcode::G_FPTOUI:
1105  return AArch64::FCVTZUUXDr;
1106  default:
1107  return GenericOpc;
1108  }
1109  default:
1110  return GenericOpc;
1111  }
1112  default:
1113  return GenericOpc;
1114  };
1115  return GenericOpc;
1116 }
1117 
1118 MachineInstr *
1119 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1120  Register False, AArch64CC::CondCode CC,
1121  MachineIRBuilder &MIB) const {
1122  MachineRegisterInfo &MRI = *MIB.getMRI();
1123  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1124  RBI.getRegBank(True, MRI, TRI)->getID() &&
1125  "Expected both select operands to have the same regbank?");
1126  LLT Ty = MRI.getType(True);
1127  if (Ty.isVector())
1128  return nullptr;
1129  const unsigned Size = Ty.getSizeInBits();
1130  assert((Size == 32 || Size == 64) &&
1131  "Expected 32 bit or 64 bit select only?");
1132  const bool Is32Bit = Size == 32;
1133  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1134  unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1135  auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1136  constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1137  return &*FCSel;
1138  }
1139 
1140  // By default, we'll try and emit a CSEL.
1141  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1142  bool Optimized = false;
1143  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1144  &Optimized](Register &Reg, Register &OtherReg,
1145  bool Invert) {
1146  if (Optimized)
1147  return false;
1148 
1149  // Attempt to fold:
1150  //
1151  // %sub = G_SUB 0, %x
1152  // %select = G_SELECT cc, %reg, %sub
1153  //
1154  // Into:
1155  // %select = CSNEG %reg, %x, cc
1156  Register MatchReg;
1157  if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1158  Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1159  Reg = MatchReg;
1160  if (Invert) {
1162  std::swap(Reg, OtherReg);
1163  }
1164  return true;
1165  }
1166 
1167  // Attempt to fold:
1168  //
1169  // %xor = G_XOR %x, -1
1170  // %select = G_SELECT cc, %reg, %xor
1171  //
1172  // Into:
1173  // %select = CSINV %reg, %x, cc
1174  if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1175  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1176  Reg = MatchReg;
1177  if (Invert) {
1179  std::swap(Reg, OtherReg);
1180  }
1181  return true;
1182  }
1183 
1184  // Attempt to fold:
1185  //
1186  // %add = G_ADD %x, 1
1187  // %select = G_SELECT cc, %reg, %add
1188  //
1189  // Into:
1190  // %select = CSINC %reg, %x, cc
1191  if (mi_match(Reg, MRI,
1192  m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1193  m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1194  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1195  Reg = MatchReg;
1196  if (Invert) {
1198  std::swap(Reg, OtherReg);
1199  }
1200  return true;
1201  }
1202 
1203  return false;
1204  };
1205 
1206  // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1207  // true/false values are constants.
1208  // FIXME: All of these patterns already exist in tablegen. We should be
1209  // able to import these.
1210  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1211  &Optimized]() {
1212  if (Optimized)
1213  return false;
1214  auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1215  auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1216  if (!TrueCst && !FalseCst)
1217  return false;
1218 
1219  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1220  if (TrueCst && FalseCst) {
1221  int64_t T = TrueCst->Value.getSExtValue();
1222  int64_t F = FalseCst->Value.getSExtValue();
1223 
1224  if (T == 0 && F == 1) {
1225  // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1226  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1227  True = ZReg;
1228  False = ZReg;
1229  return true;
1230  }
1231 
1232  if (T == 0 && F == -1) {
1233  // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1234  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1235  True = ZReg;
1236  False = ZReg;
1237  return true;
1238  }
1239  }
1240 
1241  if (TrueCst) {
1242  int64_t T = TrueCst->Value.getSExtValue();
1243  if (T == 1) {
1244  // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1245  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1246  True = False;
1247  False = ZReg;
1249  return true;
1250  }
1251 
1252  if (T == -1) {
1253  // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1254  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1255  True = False;
1256  False = ZReg;
1258  return true;
1259  }
1260  }
1261 
1262  if (FalseCst) {
1263  int64_t F = FalseCst->Value.getSExtValue();
1264  if (F == 1) {
1265  // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1266  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1267  False = ZReg;
1268  return true;
1269  }
1270 
1271  if (F == -1) {
1272  // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1273  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1274  False = ZReg;
1275  return true;
1276  }
1277  }
1278  return false;
1279  };
1280 
1281  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1282  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1283  Optimized |= TryOptSelectCst();
1284  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1286  return &*SelectInst;
1287 }
1288 
1290  switch (P) {
1291  default:
1292  llvm_unreachable("Unknown condition code!");
1293  case CmpInst::ICMP_NE:
1294  return AArch64CC::NE;
1295  case CmpInst::ICMP_EQ:
1296  return AArch64CC::EQ;
1297  case CmpInst::ICMP_SGT:
1298  return AArch64CC::GT;
1299  case CmpInst::ICMP_SGE:
1300  return AArch64CC::GE;
1301  case CmpInst::ICMP_SLT:
1302  return AArch64CC::LT;
1303  case CmpInst::ICMP_SLE:
1304  return AArch64CC::LE;
1305  case CmpInst::ICMP_UGT:
1306  return AArch64CC::HI;
1307  case CmpInst::ICMP_UGE:
1308  return AArch64CC::HS;
1309  case CmpInst::ICMP_ULT:
1310  return AArch64CC::LO;
1311  case CmpInst::ICMP_ULE:
1312  return AArch64CC::LS;
1313  }
1314 }
1315 
1316 /// Return a register which can be used as a bit to test in a TB(N)Z.
1319  assert(Reg.isValid() && "Expected valid register!");
1320  bool HasZext = false;
1321  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1322  unsigned Opc = MI->getOpcode();
1323 
1324  if (!MI->getOperand(0).isReg() ||
1325  !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1326  break;
1327 
1328  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1329  //
1330  // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1331  // on the truncated x is the same as the bit number on x.
1332  if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1333  Opc == TargetOpcode::G_TRUNC) {
1334  if (Opc == TargetOpcode::G_ZEXT)
1335  HasZext = true;
1336 
1337  Register NextReg = MI->getOperand(1).getReg();
1338  // Did we find something worth folding?
1339  if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1340  break;
1341 
1342  // NextReg is worth folding. Keep looking.
1343  Reg = NextReg;
1344  continue;
1345  }
1346 
1347  // Attempt to find a suitable operation with a constant on one side.
1349  Register TestReg;
1350  switch (Opc) {
1351  default:
1352  break;
1353  case TargetOpcode::G_AND:
1354  case TargetOpcode::G_XOR: {
1355  TestReg = MI->getOperand(1).getReg();
1356  Register ConstantReg = MI->getOperand(2).getReg();
1357  auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1358  if (!VRegAndVal) {
1359  // AND commutes, check the other side for a constant.
1360  // FIXME: Can we canonicalize the constant so that it's always on the
1361  // same side at some point earlier?
1362  std::swap(ConstantReg, TestReg);
1363  VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1364  }
1365  if (VRegAndVal) {
1366  if (HasZext)
1367  C = VRegAndVal->Value.getZExtValue();
1368  else
1369  C = VRegAndVal->Value.getSExtValue();
1370  }
1371  break;
1372  }
1373  case TargetOpcode::G_ASHR:
1374  case TargetOpcode::G_LSHR:
1375  case TargetOpcode::G_SHL: {
1376  TestReg = MI->getOperand(1).getReg();
1377  auto VRegAndVal =
1378  getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1379  if (VRegAndVal)
1380  C = VRegAndVal->Value.getSExtValue();
1381  break;
1382  }
1383  }
1384 
1385  // Didn't find a constant or viable register. Bail out of the loop.
1386  if (!C || !TestReg.isValid())
1387  break;
1388 
1389  // We found a suitable instruction with a constant. Check to see if we can
1390  // walk through the instruction.
1391  Register NextReg;
1392  unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1393  switch (Opc) {
1394  default:
1395  break;
1396  case TargetOpcode::G_AND:
1397  // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1398  if ((*C >> Bit) & 1)
1399  NextReg = TestReg;
1400  break;
1401  case TargetOpcode::G_SHL:
1402  // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1403  // the type of the register.
1404  if (*C <= Bit && (Bit - *C) < TestRegSize) {
1405  NextReg = TestReg;
1406  Bit = Bit - *C;
1407  }
1408  break;
1409  case TargetOpcode::G_ASHR:
1410  // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1411  // in x
1412  NextReg = TestReg;
1413  Bit = Bit + *C;
1414  if (Bit >= TestRegSize)
1415  Bit = TestRegSize - 1;
1416  break;
1417  case TargetOpcode::G_LSHR:
1418  // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1419  if ((Bit + *C) < TestRegSize) {
1420  NextReg = TestReg;
1421  Bit = Bit + *C;
1422  }
1423  break;
1424  case TargetOpcode::G_XOR:
1425  // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1426  // appropriate.
1427  //
1428  // e.g. If x' = xor x, c, and the b-th bit is set in c then
1429  //
1430  // tbz x', b -> tbnz x, b
1431  //
1432  // Because x' only has the b-th bit set if x does not.
1433  if ((*C >> Bit) & 1)
1434  Invert = !Invert;
1435  NextReg = TestReg;
1436  break;
1437  }
1438 
1439  // Check if we found anything worth folding.
1440  if (!NextReg.isValid())
1441  return Reg;
1442  Reg = NextReg;
1443  }
1444 
1445  return Reg;
1446 }
1447 
1448 MachineInstr *AArch64InstructionSelector::emitTestBit(
1449  Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1450  MachineIRBuilder &MIB) const {
1451  assert(TestReg.isValid());
1452  assert(ProduceNonFlagSettingCondBr &&
1453  "Cannot emit TB(N)Z with speculation tracking!");
1454  MachineRegisterInfo &MRI = *MIB.getMRI();
1455 
1456  // Attempt to optimize the test bit by walking over instructions.
1457  TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1458  LLT Ty = MRI.getType(TestReg);
1459  unsigned Size = Ty.getSizeInBits();
1460  assert(!Ty.isVector() && "Expected a scalar!");
1461  assert(Bit < 64 && "Bit is too large!");
1462 
1463  // When the test register is a 64-bit register, we have to narrow to make
1464  // TBNZW work.
1465  bool UseWReg = Bit < 32;
1466  unsigned NecessarySize = UseWReg ? 32 : 64;
1467  if (Size != NecessarySize)
1468  TestReg = moveScalarRegClass(
1469  TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1470  MIB);
1471 
1472  static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1473  {AArch64::TBZW, AArch64::TBNZW}};
1474  unsigned Opc = OpcTable[UseWReg][IsNegative];
1475  auto TestBitMI =
1476  MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1477  constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1478  return &*TestBitMI;
1479 }
1480 
1481 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1482  MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1483  MachineIRBuilder &MIB) const {
1484  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1485  // Given something like this:
1486  //
1487  // %x = ...Something...
1488  // %one = G_CONSTANT i64 1
1489  // %zero = G_CONSTANT i64 0
1490  // %and = G_AND %x, %one
1491  // %cmp = G_ICMP intpred(ne), %and, %zero
1492  // %cmp_trunc = G_TRUNC %cmp
1493  // G_BRCOND %cmp_trunc, %bb.3
1494  //
1495  // We want to try and fold the AND into the G_BRCOND and produce either a
1496  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1497  //
1498  // In this case, we'd get
1499  //
1500  // TBNZ %x %bb.3
1501  //
1502 
1503  // Check if the AND has a constant on its RHS which we can use as a mask.
1504  // If it's a power of 2, then it's the same as checking a specific bit.
1505  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1506  auto MaybeBit = getIConstantVRegValWithLookThrough(
1507  AndInst.getOperand(2).getReg(), *MIB.getMRI());
1508  if (!MaybeBit)
1509  return false;
1510 
1511  int32_t Bit = MaybeBit->Value.exactLogBase2();
1512  if (Bit < 0)
1513  return false;
1514 
1515  Register TestReg = AndInst.getOperand(1).getReg();
1516 
1517  // Emit a TB(N)Z.
1518  emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1519  return true;
1520 }
1521 
1522 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1523  bool IsNegative,
1524  MachineBasicBlock *DestMBB,
1525  MachineIRBuilder &MIB) const {
1526  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1527  MachineRegisterInfo &MRI = *MIB.getMRI();
1528  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1529  AArch64::GPRRegBankID &&
1530  "Expected GPRs only?");
1531  auto Ty = MRI.getType(CompareReg);
1532  unsigned Width = Ty.getSizeInBits();
1533  assert(!Ty.isVector() && "Expected scalar only?");
1534  assert(Width <= 64 && "Expected width to be at most 64?");
1535  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1536  {AArch64::CBNZW, AArch64::CBNZX}};
1537  unsigned Opc = OpcTable[IsNegative][Width == 64];
1538  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1539  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1540  return &*BranchMI;
1541 }
1542 
1543 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1544  MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1545  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1546  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1547  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1548  // totally clean. Some of them require two branches to implement.
1549  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1550  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1551  Pred);
1552  AArch64CC::CondCode CC1, CC2;
1553  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1554  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1555  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1556  if (CC2 != AArch64CC::AL)
1557  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1558  I.eraseFromParent();
1559  return true;
1560 }
1561 
1562 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1563  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1564  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1565  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1566  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1567  //
1568  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1569  // instructions will not be produced, as they are conditional branch
1570  // instructions that do not set flags.
1571  if (!ProduceNonFlagSettingCondBr)
1572  return false;
1573 
1574  MachineRegisterInfo &MRI = *MIB.getMRI();
1575  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1576  auto Pred =
1577  static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1578  Register LHS = ICmp.getOperand(2).getReg();
1579  Register RHS = ICmp.getOperand(3).getReg();
1580 
1581  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1582  auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1583  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1584 
1585  // When we can emit a TB(N)Z, prefer that.
1586  //
1587  // Handle non-commutative condition codes first.
1588  // Note that we don't want to do this when we have a G_AND because it can
1589  // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1590  if (VRegAndVal && !AndInst) {
1591  int64_t C = VRegAndVal->Value.getSExtValue();
1592 
1593  // When we have a greater-than comparison, we can just test if the msb is
1594  // zero.
1595  if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1596  uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1597  emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1598  I.eraseFromParent();
1599  return true;
1600  }
1601 
1602  // When we have a less than comparison, we can just test if the msb is not
1603  // zero.
1604  if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1605  uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1606  emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1607  I.eraseFromParent();
1608  return true;
1609  }
1610  }
1611 
1612  // Attempt to handle commutative condition codes. Right now, that's only
1613  // eq/ne.
1614  if (ICmpInst::isEquality(Pred)) {
1615  if (!VRegAndVal) {
1616  std::swap(RHS, LHS);
1617  VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1618  AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1619  }
1620 
1621  if (VRegAndVal && VRegAndVal->Value == 0) {
1622  // If there's a G_AND feeding into this branch, try to fold it away by
1623  // emitting a TB(N)Z instead.
1624  //
1625  // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1626  // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1627  // would be redundant.
1628  if (AndInst &&
1629  tryOptAndIntoCompareBranch(
1630  *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1631  I.eraseFromParent();
1632  return true;
1633  }
1634 
1635  // Otherwise, try to emit a CB(N)Z instead.
1636  auto LHSTy = MRI.getType(LHS);
1637  if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1638  emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1639  I.eraseFromParent();
1640  return true;
1641  }
1642  }
1643  }
1644 
1645  return false;
1646 }
1647 
1648 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1649  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1650  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1651  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1652  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1653  return true;
1654 
1655  // Couldn't optimize. Emit a compare + a Bcc.
1656  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1657  auto PredOp = ICmp.getOperand(1);
1658  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1660  static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1661  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1662  I.eraseFromParent();
1663  return true;
1664 }
1665 
1666 bool AArch64InstructionSelector::selectCompareBranch(
1668  Register CondReg = I.getOperand(0).getReg();
1669  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1670  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1671  CondReg = CCMI->getOperand(1).getReg();
1672  CCMI = MRI.getVRegDef(CondReg);
1673  }
1674 
1675  // Try to select the G_BRCOND using whatever is feeding the condition if
1676  // possible.
1677  unsigned CCMIOpc = CCMI->getOpcode();
1678  if (CCMIOpc == TargetOpcode::G_FCMP)
1679  return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1680  if (CCMIOpc == TargetOpcode::G_ICMP)
1681  return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1682 
1683  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1684  // instructions will not be produced, as they are conditional branch
1685  // instructions that do not set flags.
1686  if (ProduceNonFlagSettingCondBr) {
1687  emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1688  I.getOperand(1).getMBB(), MIB);
1689  I.eraseFromParent();
1690  return true;
1691  }
1692 
1693  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1694  auto TstMI =
1695  MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1696  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1697  auto Bcc = MIB.buildInstr(AArch64::Bcc)
1699  .addMBB(I.getOperand(1).getMBB());
1700  I.eraseFromParent();
1701  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1702 }
1703 
1704 /// Returns the element immediate value of a vector shift operand if found.
1705 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1708  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1709  MachineInstr *OpMI = MRI.getVRegDef(Reg);
1710  assert(OpMI && "Expected to find a vreg def for vector shift operand");
1711  return getAArch64VectorSplatScalar(*OpMI, MRI);
1712 }
1713 
1714 /// Matches and returns the shift immediate value for a SHL instruction given
1715 /// a shift operand.
1718  if (!ShiftImm)
1719  return None;
1720  // Check the immediate is in range for a SHL.
1721  int64_t Imm = *ShiftImm;
1722  if (Imm < 0)
1723  return None;
1724  switch (SrcTy.getElementType().getSizeInBits()) {
1725  default:
1726  LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1727  return None;
1728  case 8:
1729  if (Imm > 7)
1730  return None;
1731  break;
1732  case 16:
1733  if (Imm > 15)
1734  return None;
1735  break;
1736  case 32:
1737  if (Imm > 31)
1738  return None;
1739  break;
1740  case 64:
1741  if (Imm > 63)
1742  return None;
1743  break;
1744  }
1745  return Imm;
1746 }
1747 
1748 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1750  assert(I.getOpcode() == TargetOpcode::G_SHL);
1751  Register DstReg = I.getOperand(0).getReg();
1752  const LLT Ty = MRI.getType(DstReg);
1753  Register Src1Reg = I.getOperand(1).getReg();
1754  Register Src2Reg = I.getOperand(2).getReg();
1755 
1756  if (!Ty.isVector())
1757  return false;
1758 
1759  // Check if we have a vector of constants on RHS that we can select as the
1760  // immediate form.
1761  Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1762 
1763  unsigned Opc = 0;
1764  if (Ty == LLT::fixed_vector(2, 64)) {
1765  Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1766  } else if (Ty == LLT::fixed_vector(4, 32)) {
1767  Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1768  } else if (Ty == LLT::fixed_vector(2, 32)) {
1769  Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1770  } else if (Ty == LLT::fixed_vector(4, 16)) {
1771  Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1772  } else if (Ty == LLT::fixed_vector(8, 16)) {
1773  Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1774  } else if (Ty == LLT::fixed_vector(16, 8)) {
1775  Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1776  } else if (Ty == LLT::fixed_vector(8, 8)) {
1777  Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1778  } else {
1779  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1780  return false;
1781  }
1782 
1783  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1784  if (ImmVal)
1785  Shl.addImm(*ImmVal);
1786  else
1787  Shl.addUse(Src2Reg);
1789  I.eraseFromParent();
1790  return true;
1791 }
1792 
1793 bool AArch64InstructionSelector::selectVectorAshrLshr(
1795  assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1796  I.getOpcode() == TargetOpcode::G_LSHR);
1797  Register DstReg = I.getOperand(0).getReg();
1798  const LLT Ty = MRI.getType(DstReg);
1799  Register Src1Reg = I.getOperand(1).getReg();
1800  Register Src2Reg = I.getOperand(2).getReg();
1801 
1802  if (!Ty.isVector())
1803  return false;
1804 
1805  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1806 
1807  // We expect the immediate case to be lowered in the PostLegalCombiner to
1808  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1809 
1810  // There is not a shift right register instruction, but the shift left
1811  // register instruction takes a signed value, where negative numbers specify a
1812  // right shift.
1813 
1814  unsigned Opc = 0;
1815  unsigned NegOpc = 0;
1816  const TargetRegisterClass *RC =
1817  getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1818  if (Ty == LLT::fixed_vector(2, 64)) {
1819  Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1820  NegOpc = AArch64::NEGv2i64;
1821  } else if (Ty == LLT::fixed_vector(4, 32)) {
1822  Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1823  NegOpc = AArch64::NEGv4i32;
1824  } else if (Ty == LLT::fixed_vector(2, 32)) {
1825  Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1826  NegOpc = AArch64::NEGv2i32;
1827  } else if (Ty == LLT::fixed_vector(4, 16)) {
1828  Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1829  NegOpc = AArch64::NEGv4i16;
1830  } else if (Ty == LLT::fixed_vector(8, 16)) {
1831  Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1832  NegOpc = AArch64::NEGv8i16;
1833  } else if (Ty == LLT::fixed_vector(16, 8)) {
1834  Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1835  NegOpc = AArch64::NEGv16i8;
1836  } else if (Ty == LLT::fixed_vector(8, 8)) {
1837  Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1838  NegOpc = AArch64::NEGv8i8;
1839  } else {
1840  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1841  return false;
1842  }
1843 
1844  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1846  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1848  I.eraseFromParent();
1849  return true;
1850 }
1851 
1852 bool AArch64InstructionSelector::selectVaStartAAPCS(
1854  return false;
1855 }
1856 
1857 bool AArch64InstructionSelector::selectVaStartDarwin(
1860  Register ListReg = I.getOperand(0).getReg();
1861 
1862  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1863 
1864  auto MIB =
1865  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1866  .addDef(ArgsAddrReg)
1867  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1868  .addImm(0)
1869  .addImm(0);
1870 
1872 
1873  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1874  .addUse(ArgsAddrReg)
1875  .addUse(ListReg)
1876  .addImm(0)
1877  .addMemOperand(*I.memoperands_begin());
1878 
1880  I.eraseFromParent();
1881  return true;
1882 }
1883 
1884 void AArch64InstructionSelector::materializeLargeCMVal(
1885  MachineInstr &I, const Value *V, unsigned OpFlags) {
1886  MachineBasicBlock &MBB = *I.getParent();
1887  MachineFunction &MF = *MBB.getParent();
1889 
1890  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1891  MovZ->addOperand(MF, I.getOperand(1));
1892  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1894  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1896 
1897  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1898  Register ForceDstReg) {
1899  Register DstReg = ForceDstReg
1900  ? ForceDstReg
1901  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1902  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1903  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1904  MovI->addOperand(MF, MachineOperand::CreateGA(
1905  GV, MovZ->getOperand(1).getOffset(), Flags));
1906  } else {
1907  MovI->addOperand(
1908  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1909  MovZ->getOperand(1).getOffset(), Flags));
1910  }
1911  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1913  return DstReg;
1914  };
1915  Register DstReg = BuildMovK(MovZ.getReg(0),
1917  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1918  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1919 }
1920 
1921 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1922  MachineBasicBlock &MBB = *I.getParent();
1923  MachineFunction &MF = *MBB.getParent();
1925 
1926  switch (I.getOpcode()) {
1927  case TargetOpcode::G_SHL:
1928  case TargetOpcode::G_ASHR:
1929  case TargetOpcode::G_LSHR: {
1930  // These shifts are legalized to have 64 bit shift amounts because we want
1931  // to take advantage of the existing imported selection patterns that assume
1932  // the immediates are s64s. However, if the shifted type is 32 bits and for
1933  // some reason we receive input GMIR that has an s64 shift amount that's not
1934  // a G_CONSTANT, insert a truncate so that we can still select the s32
1935  // register-register variant.
1936  Register SrcReg = I.getOperand(1).getReg();
1937  Register ShiftReg = I.getOperand(2).getReg();
1938  const LLT ShiftTy = MRI.getType(ShiftReg);
1939  const LLT SrcTy = MRI.getType(SrcReg);
1940  if (SrcTy.isVector())
1941  return false;
1942  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1943  if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1944  return false;
1945  auto *AmtMI = MRI.getVRegDef(ShiftReg);
1946  assert(AmtMI && "could not find a vreg definition for shift amount");
1947  if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1948  // Insert a subregister copy to implement a 64->32 trunc
1949  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1950  .addReg(ShiftReg, 0, AArch64::sub_32);
1951  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1952  I.getOperand(2).setReg(Trunc.getReg(0));
1953  }
1954  return true;
1955  }
1956  case TargetOpcode::G_STORE: {
1957  bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1958  MachineOperand &SrcOp = I.getOperand(0);
1959  if (MRI.getType(SrcOp.getReg()).isPointer()) {
1960  // Allow matching with imported patterns for stores of pointers. Unlike
1961  // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1962  // and constrain.
1963  auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1964  Register NewSrc = Copy.getReg(0);
1965  SrcOp.setReg(NewSrc);
1966  RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1967  Changed = true;
1968  }
1969  return Changed;
1970  }
1971  case TargetOpcode::G_PTR_ADD:
1972  return convertPtrAddToAdd(I, MRI);
1973  case TargetOpcode::G_LOAD: {
1974  // For scalar loads of pointers, we try to convert the dest type from p0
1975  // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1976  // conversion, this should be ok because all users should have been
1977  // selected already, so the type doesn't matter for them.
1978  Register DstReg = I.getOperand(0).getReg();
1979  const LLT DstTy = MRI.getType(DstReg);
1980  if (!DstTy.isPointer())
1981  return false;
1982  MRI.setType(DstReg, LLT::scalar(64));
1983  return true;
1984  }
1985  case AArch64::G_DUP: {
1986  // Convert the type from p0 to s64 to help selection.
1987  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1988  if (!DstTy.getElementType().isPointer())
1989  return false;
1990  auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1991  MRI.setType(I.getOperand(0).getReg(),
1992  DstTy.changeElementType(LLT::scalar(64)));
1993  MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1994  I.getOperand(1).setReg(NewSrc.getReg(0));
1995  return true;
1996  }
1997  case TargetOpcode::G_UITOFP:
1998  case TargetOpcode::G_SITOFP: {
1999  // If both source and destination regbanks are FPR, then convert the opcode
2000  // to G_SITOF so that the importer can select it to an fpr variant.
2001  // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2002  // copy.
2003  Register SrcReg = I.getOperand(1).getReg();
2004  LLT SrcTy = MRI.getType(SrcReg);
2005  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2006  if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2007  return false;
2008 
2009  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2010  if (I.getOpcode() == TargetOpcode::G_SITOFP)
2011  I.setDesc(TII.get(AArch64::G_SITOF));
2012  else
2013  I.setDesc(TII.get(AArch64::G_UITOF));
2014  return true;
2015  }
2016  return false;
2017  }
2018  default:
2019  return false;
2020  }
2021 }
2022 
2023 /// This lowering tries to look for G_PTR_ADD instructions and then converts
2024 /// them to a standard G_ADD with a COPY on the source.
2025 ///
2026 /// The motivation behind this is to expose the add semantics to the imported
2027 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2028 /// because the selector works bottom up, uses before defs. By the time we
2029 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2030 /// fold this into addressing modes and were therefore unsuccessful.
2031 bool AArch64InstructionSelector::convertPtrAddToAdd(
2033  assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2034  Register DstReg = I.getOperand(0).getReg();
2035  Register AddOp1Reg = I.getOperand(1).getReg();
2036  const LLT PtrTy = MRI.getType(DstReg);
2037  if (PtrTy.getAddressSpace() != 0)
2038  return false;
2039 
2040  const LLT CastPtrTy =
2041  PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2042  auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2043  // Set regbanks on the registers.
2044  if (PtrTy.isVector())
2045  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2046  else
2047  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2048 
2049  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2050  // %dst(intty) = G_ADD %intbase, off
2051  I.setDesc(TII.get(TargetOpcode::G_ADD));
2052  MRI.setType(DstReg, CastPtrTy);
2053  I.getOperand(1).setReg(PtrToInt.getReg(0));
2054  if (!select(*PtrToInt)) {
2055  LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2056  return false;
2057  }
2058 
2059  // Also take the opportunity here to try to do some optimization.
2060  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2061  Register NegatedReg;
2062  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2063  return true;
2064  I.getOperand(2).setReg(NegatedReg);
2065  I.setDesc(TII.get(TargetOpcode::G_SUB));
2066  return true;
2067 }
2068 
2069 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2071  // We try to match the immediate variant of LSL, which is actually an alias
2072  // for a special case of UBFM. Otherwise, we fall back to the imported
2073  // selector which will match the register variant.
2074  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2075  const auto &MO = I.getOperand(2);
2076  auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2077  if (!VRegAndVal)
2078  return false;
2079 
2080  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2081  if (DstTy.isVector())
2082  return false;
2083  bool Is64Bit = DstTy.getSizeInBits() == 64;
2084  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2085  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2086 
2087  if (!Imm1Fn || !Imm2Fn)
2088  return false;
2089 
2090  auto NewI =
2091  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2092  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2093 
2094  for (auto &RenderFn : *Imm1Fn)
2095  RenderFn(NewI);
2096  for (auto &RenderFn : *Imm2Fn)
2097  RenderFn(NewI);
2098 
2099  I.eraseFromParent();
2100  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2101 }
2102 
2103 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2105  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2106  // If we're storing a scalar, it doesn't matter what register bank that
2107  // scalar is on. All that matters is the size.
2108  //
2109  // So, if we see something like this (with a 32-bit scalar as an example):
2110  //
2111  // %x:gpr(s32) = ... something ...
2112  // %y:fpr(s32) = COPY %x:gpr(s32)
2113  // G_STORE %y:fpr(s32)
2114  //
2115  // We can fix this up into something like this:
2116  //
2117  // G_STORE %x:gpr(s32)
2118  //
2119  // And then continue the selection process normally.
2120  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2121  if (!DefDstReg.isValid())
2122  return false;
2123  LLT DefDstTy = MRI.getType(DefDstReg);
2124  Register StoreSrcReg = I.getOperand(0).getReg();
2125  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2126 
2127  // If we get something strange like a physical register, then we shouldn't
2128  // go any further.
2129  if (!DefDstTy.isValid())
2130  return false;
2131 
2132  // Are the source and dst types the same size?
2133  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2134  return false;
2135 
2136  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2137  RBI.getRegBank(DefDstReg, MRI, TRI))
2138  return false;
2139 
2140  // We have a cross-bank copy, which is entering a store. Let's fold it.
2141  I.getOperand(0).setReg(DefDstReg);
2142  return true;
2143 }
2144 
2145 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2146  assert(I.getParent() && "Instruction should be in a basic block!");
2147  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2148 
2149  MachineBasicBlock &MBB = *I.getParent();
2150  MachineFunction &MF = *MBB.getParent();
2152 
2153  switch (I.getOpcode()) {
2154  case AArch64::G_DUP: {
2155  // Before selecting a DUP instruction, check if it is better selected as a
2156  // MOV or load from a constant pool.
2157  Register Src = I.getOperand(1).getReg();
2158  auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2159  if (!ValAndVReg)
2160  return false;
2161  LLVMContext &Ctx = MF.getFunction().getContext();
2162  Register Dst = I.getOperand(0).getReg();
2163  auto *CV = ConstantDataVector::getSplat(
2164  MRI.getType(Dst).getNumElements(),
2165  ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2166  ValAndVReg->Value));
2167  if (!emitConstantVector(Dst, CV, MIB, MRI))
2168  return false;
2169  I.eraseFromParent();
2170  return true;
2171  }
2172  case TargetOpcode::G_SEXT:
2173  // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2174  // over a normal extend.
2175  if (selectUSMovFromExtend(I, MRI))
2176  return true;
2177  return false;
2178  case TargetOpcode::G_BR:
2179  return false;
2180  case TargetOpcode::G_SHL:
2181  return earlySelectSHL(I, MRI);
2182  case TargetOpcode::G_CONSTANT: {
2183  bool IsZero = false;
2184  if (I.getOperand(1).isCImm())
2185  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2186  else if (I.getOperand(1).isImm())
2187  IsZero = I.getOperand(1).getImm() == 0;
2188 
2189  if (!IsZero)
2190  return false;
2191 
2192  Register DefReg = I.getOperand(0).getReg();
2193  LLT Ty = MRI.getType(DefReg);
2194  if (Ty.getSizeInBits() == 64) {
2195  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2196  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2197  } else if (Ty.getSizeInBits() == 32) {
2198  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2199  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2200  } else
2201  return false;
2202 
2203  I.setDesc(TII.get(TargetOpcode::COPY));
2204  return true;
2205  }
2206 
2207  case TargetOpcode::G_ADD: {
2208  // Check if this is being fed by a G_ICMP on either side.
2209  //
2210  // (cmp pred, x, y) + z
2211  //
2212  // In the above case, when the cmp is true, we increment z by 1. So, we can
2213  // fold the add into the cset for the cmp by using cinc.
2214  //
2215  // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2216  Register X = I.getOperand(1).getReg();
2217 
2218  // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2219  // early if we see it.
2220  LLT Ty = MRI.getType(X);
2221  if (Ty.isVector() || Ty.getSizeInBits() != 32)
2222  return false;
2223 
2224  Register CmpReg = I.getOperand(2).getReg();
2225  MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2226  if (!Cmp) {
2227  std::swap(X, CmpReg);
2228  Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2229  if (!Cmp)
2230  return false;
2231  }
2232  auto Pred =
2233  static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2234  emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2235  Cmp->getOperand(1), MIB);
2236  emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
2237  I.eraseFromParent();
2238  return true;
2239  }
2240  case TargetOpcode::G_OR: {
2241  // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2242  // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2243  // shifting and masking that we can replace with a BFI (encoded as a BFM).
2244  Register Dst = I.getOperand(0).getReg();
2245  LLT Ty = MRI.getType(Dst);
2246 
2247  if (!Ty.isScalar())
2248  return false;
2249 
2250  unsigned Size = Ty.getSizeInBits();
2251  if (Size != 32 && Size != 64)
2252  return false;
2253 
2254  Register ShiftSrc;
2255  int64_t ShiftImm;
2256  Register MaskSrc;
2257  int64_t MaskImm;
2258  if (!mi_match(
2259  Dst, MRI,
2260  m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2261  m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2262  return false;
2263 
2264  if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2265  return false;
2266 
2267  int64_t Immr = Size - ShiftImm;
2268  int64_t Imms = Size - ShiftImm - 1;
2269  unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2270  emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2271  I.eraseFromParent();
2272  return true;
2273  }
2274  default:
2275  return false;
2276  }
2277 }
2278 
2280  assert(I.getParent() && "Instruction should be in a basic block!");
2281  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2282 
2283  MachineBasicBlock &MBB = *I.getParent();
2284  MachineFunction &MF = *MBB.getParent();
2286 
2287  const AArch64Subtarget *Subtarget =
2288  &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2289  if (Subtarget->requiresStrictAlign()) {
2290  // We don't support this feature yet.
2291  LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2292  return false;
2293  }
2294 
2295  MIB.setInstrAndDebugLoc(I);
2296 
2297  unsigned Opcode = I.getOpcode();
2298  // G_PHI requires same handling as PHI
2299  if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2300  // Certain non-generic instructions also need some special handling.
2301 
2302  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2303  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2304 
2305  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2306  const Register DefReg = I.getOperand(0).getReg();
2307  const LLT DefTy = MRI.getType(DefReg);
2308 
2309  const RegClassOrRegBank &RegClassOrBank =
2310  MRI.getRegClassOrRegBank(DefReg);
2311 
2312  const TargetRegisterClass *DefRC
2313  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2314  if (!DefRC) {
2315  if (!DefTy.isValid()) {
2316  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2317  return false;
2318  }
2319  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2320  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2321  if (!DefRC) {
2322  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2323  return false;
2324  }
2325  }
2326 
2327  I.setDesc(TII.get(TargetOpcode::PHI));
2328 
2329  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2330  }
2331 
2332  if (I.isCopy())
2333  return selectCopy(I, TII, MRI, TRI, RBI);
2334 
2335  return true;
2336  }
2337 
2338 
2339  if (I.getNumOperands() != I.getNumExplicitOperands()) {
2340  LLVM_DEBUG(
2341  dbgs() << "Generic instruction has unexpected implicit operands\n");
2342  return false;
2343  }
2344 
2345  // Try to do some lowering before we start instruction selecting. These
2346  // lowerings are purely transformations on the input G_MIR and so selection
2347  // must continue after any modification of the instruction.
2348  if (preISelLower(I)) {
2349  Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2350  }
2351 
2352  // There may be patterns where the importer can't deal with them optimally,
2353  // but does select it to a suboptimal sequence so our custom C++ selection
2354  // code later never has a chance to work on it. Therefore, we have an early
2355  // selection attempt here to give priority to certain selection routines
2356  // over the imported ones.
2357  if (earlySelect(I))
2358  return true;
2359 
2360  if (selectImpl(I, *CoverageInfo))
2361  return true;
2362 
2363  LLT Ty =
2364  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2365 
2366  switch (Opcode) {
2367  case TargetOpcode::G_SBFX:
2368  case TargetOpcode::G_UBFX: {
2369  static const unsigned OpcTable[2][2] = {
2370  {AArch64::UBFMWri, AArch64::UBFMXri},
2371  {AArch64::SBFMWri, AArch64::SBFMXri}};
2372  bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2373  unsigned Size = Ty.getSizeInBits();
2374  unsigned Opc = OpcTable[IsSigned][Size == 64];
2375  auto Cst1 =
2376  getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2377  assert(Cst1 && "Should have gotten a constant for src 1?");
2378  auto Cst2 =
2379  getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2380  assert(Cst2 && "Should have gotten a constant for src 2?");
2381  auto LSB = Cst1->Value.getZExtValue();
2382  auto Width = Cst2->Value.getZExtValue();
2383  auto BitfieldInst =
2384  MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2385  .addImm(LSB)
2386  .addImm(LSB + Width - 1);
2387  I.eraseFromParent();
2388  return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2389  }
2390  case TargetOpcode::G_BRCOND:
2391  return selectCompareBranch(I, MF, MRI);
2392 
2393  case TargetOpcode::G_BRINDIRECT: {
2394  I.setDesc(TII.get(AArch64::BR));
2395  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2396  }
2397 
2398  case TargetOpcode::G_BRJT:
2399  return selectBrJT(I, MRI);
2400 
2401  case AArch64::G_ADD_LOW: {
2402  // This op may have been separated from it's ADRP companion by the localizer
2403  // or some other code motion pass. Given that many CPUs will try to
2404  // macro fuse these operations anyway, select this into a MOVaddr pseudo
2405  // which will later be expanded into an ADRP+ADD pair after scheduling.
2406  MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2407  if (BaseMI->getOpcode() != AArch64::ADRP) {
2408  I.setDesc(TII.get(AArch64::ADDXri));
2409  I.addOperand(MachineOperand::CreateImm(0));
2410  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2411  }
2412  assert(TM.getCodeModel() == CodeModel::Small &&
2413  "Expected small code model");
2414  auto Op1 = BaseMI->getOperand(1);
2415  auto Op2 = I.getOperand(2);
2416  auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2417  .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2418  Op1.getTargetFlags())
2419  .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2420  Op2.getTargetFlags());
2421  I.eraseFromParent();
2422  return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2423  }
2424 
2425  case TargetOpcode::G_BSWAP: {
2426  // Handle vector types for G_BSWAP directly.
2427  Register DstReg = I.getOperand(0).getReg();
2428  LLT DstTy = MRI.getType(DstReg);
2429 
2430  // We should only get vector types here; everything else is handled by the
2431  // importer right now.
2432  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2433  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2434  return false;
2435  }
2436 
2437  // Only handle 4 and 2 element vectors for now.
2438  // TODO: 16-bit elements.
2439  unsigned NumElts = DstTy.getNumElements();
2440  if (NumElts != 4 && NumElts != 2) {
2441  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2442  return false;
2443  }
2444 
2445  // Choose the correct opcode for the supported types. Right now, that's
2446  // v2s32, v4s32, and v2s64.
2447  unsigned Opc = 0;
2448  unsigned EltSize = DstTy.getElementType().getSizeInBits();
2449  if (EltSize == 32)
2450  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2451  : AArch64::REV32v16i8;
2452  else if (EltSize == 64)
2453  Opc = AArch64::REV64v16i8;
2454 
2455  // We should always get something by the time we get here...
2456  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2457 
2458  I.setDesc(TII.get(Opc));
2459  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2460  }
2461 
2462  case TargetOpcode::G_FCONSTANT:
2463  case TargetOpcode::G_CONSTANT: {
2464  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2465 
2466  const LLT s8 = LLT::scalar(8);
2467  const LLT s16 = LLT::scalar(16);
2468  const LLT s32 = LLT::scalar(32);
2469  const LLT s64 = LLT::scalar(64);
2470  const LLT s128 = LLT::scalar(128);
2471  const LLT p0 = LLT::pointer(0, 64);
2472 
2473  const Register DefReg = I.getOperand(0).getReg();
2474  const LLT DefTy = MRI.getType(DefReg);
2475  const unsigned DefSize = DefTy.getSizeInBits();
2476  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2477 
2478  // FIXME: Redundant check, but even less readable when factored out.
2479  if (isFP) {
2480  if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2481  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2482  << " constant, expected: " << s16 << " or " << s32
2483  << " or " << s64 << " or " << s128 << '\n');
2484  return false;
2485  }
2486 
2487  if (RB.getID() != AArch64::FPRRegBankID) {
2488  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2489  << " constant on bank: " << RB
2490  << ", expected: FPR\n");
2491  return false;
2492  }
2493 
2494  // The case when we have 0.0 is covered by tablegen. Reject it here so we
2495  // can be sure tablegen works correctly and isn't rescued by this code.
2496  // 0.0 is not covered by tablegen for FP128. So we will handle this
2497  // scenario in the code here.
2498  if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2499  return false;
2500  } else {
2501  // s32 and s64 are covered by tablegen.
2502  if (Ty != p0 && Ty != s8 && Ty != s16) {
2503  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2504  << " constant, expected: " << s32 << ", " << s64
2505  << ", or " << p0 << '\n');
2506  return false;
2507  }
2508 
2509  if (RB.getID() != AArch64::GPRRegBankID) {
2510  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2511  << " constant on bank: " << RB
2512  << ", expected: GPR\n");
2513  return false;
2514  }
2515  }
2516 
2517  if (isFP) {
2518  const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2519  // For 16, 64, and 128b values, emit a constant pool load.
2520  switch (DefSize) {
2521  default:
2522  llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2523  case 32:
2524  // For s32, use a cp load if we have optsize/minsize.
2525  if (!shouldOptForSize(&MF))
2526  break;
2528  case 16:
2529  case 64:
2530  case 128: {
2531  auto *FPImm = I.getOperand(1).getFPImm();
2532  auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2533  if (!LoadMI) {
2534  LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2535  return false;
2536  }
2537  MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2538  I.eraseFromParent();
2539  return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2540  }
2541  }
2542 
2543  // Either emit a FMOV, or emit a copy to emit a normal mov.
2544  assert(DefSize == 32 &&
2545  "Expected constant pool loads for all sizes other than 32!");
2546  const Register DefGPRReg =
2547  MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2548  MachineOperand &RegOp = I.getOperand(0);
2549  RegOp.setReg(DefGPRReg);
2550  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2551  MIB.buildCopy({DefReg}, {DefGPRReg});
2552 
2553  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2554  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2555  return false;
2556  }
2557 
2558  MachineOperand &ImmOp = I.getOperand(1);
2559  // FIXME: Is going through int64_t always correct?
2560  ImmOp.ChangeToImmediate(
2562  } else if (I.getOperand(1).isCImm()) {
2563  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2564  I.getOperand(1).ChangeToImmediate(Val);
2565  } else if (I.getOperand(1).isImm()) {
2566  uint64_t Val = I.getOperand(1).getImm();
2567  I.getOperand(1).ChangeToImmediate(Val);
2568  }
2569 
2570  const unsigned MovOpc =
2571  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2572  I.setDesc(TII.get(MovOpc));
2574  return true;
2575  }
2576  case TargetOpcode::G_EXTRACT: {
2577  Register DstReg = I.getOperand(0).getReg();
2578  Register SrcReg = I.getOperand(1).getReg();
2579  LLT SrcTy = MRI.getType(SrcReg);
2580  LLT DstTy = MRI.getType(DstReg);
2581  (void)DstTy;
2582  unsigned SrcSize = SrcTy.getSizeInBits();
2583 
2584  if (SrcTy.getSizeInBits() > 64) {
2585  // This should be an extract of an s128, which is like a vector extract.
2586  if (SrcTy.getSizeInBits() != 128)
2587  return false;
2588  // Only support extracting 64 bits from an s128 at the moment.
2589  if (DstTy.getSizeInBits() != 64)
2590  return false;
2591 
2592  unsigned Offset = I.getOperand(2).getImm();
2593  if (Offset % 64 != 0)
2594  return false;
2595 
2596  // Check we have the right regbank always.
2597  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2598  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2599  assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2600 
2601  if (SrcRB.getID() == AArch64::GPRRegBankID) {
2602  MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2603  .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2604  I.eraseFromParent();
2605  return true;
2606  }
2607 
2608  // Emit the same code as a vector extract.
2609  // Offset must be a multiple of 64.
2610  unsigned LaneIdx = Offset / 64;
2611  MachineInstr *Extract = emitExtractVectorElt(
2612  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2613  if (!Extract)
2614  return false;
2615  I.eraseFromParent();
2616  return true;
2617  }
2618 
2619  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2620  MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2621  Ty.getSizeInBits() - 1);
2622 
2623  if (SrcSize < 64) {
2624  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2625  "unexpected G_EXTRACT types");
2626  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2627  }
2628 
2629  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2630  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2631  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2632  .addReg(DstReg, 0, AArch64::sub_32);
2633  RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2634  AArch64::GPR32RegClass, MRI);
2635  I.getOperand(0).setReg(DstReg);
2636 
2637  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2638  }
2639 
2640  case TargetOpcode::G_INSERT: {
2641  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2642  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2643  unsigned DstSize = DstTy.getSizeInBits();
2644  // Larger inserts are vectors, same-size ones should be something else by
2645  // now (split up or turned into COPYs).
2646  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2647  return false;
2648 
2649  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2650  unsigned LSB = I.getOperand(3).getImm();
2651  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2652  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2653  MachineInstrBuilder(MF, I).addImm(Width - 1);
2654 
2655  if (DstSize < 64) {
2656  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2657  "unexpected G_INSERT types");
2658  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2659  }
2660 
2661  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2662  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2663  TII.get(AArch64::SUBREG_TO_REG))
2664  .addDef(SrcReg)
2665  .addImm(0)
2666  .addUse(I.getOperand(2).getReg())
2667  .addImm(AArch64::sub_32);
2668  RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2669  AArch64::GPR32RegClass, MRI);
2670  I.getOperand(2).setReg(SrcReg);
2671 
2672  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2673  }
2674  case TargetOpcode::G_FRAME_INDEX: {
2675  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2676  if (Ty != LLT::pointer(0, 64)) {
2677  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2678  << ", expected: " << LLT::pointer(0, 64) << '\n');
2679  return false;
2680  }
2681  I.setDesc(TII.get(AArch64::ADDXri));
2682 
2683  // MOs for a #0 shifted immediate.
2684  I.addOperand(MachineOperand::CreateImm(0));
2685  I.addOperand(MachineOperand::CreateImm(0));
2686 
2687  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2688  }
2689 
2690  case TargetOpcode::G_GLOBAL_VALUE: {
2691  auto GV = I.getOperand(1).getGlobal();
2692  if (GV->isThreadLocal())
2693  return selectTLSGlobalValue(I, MRI);
2694 
2695  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2696  if (OpFlags & AArch64II::MO_GOT) {
2697  I.setDesc(TII.get(AArch64::LOADgot));
2698  I.getOperand(1).setTargetFlags(OpFlags);
2699  } else if (TM.getCodeModel() == CodeModel::Large) {
2700  // Materialize the global using movz/movk instructions.
2701  materializeLargeCMVal(I, GV, OpFlags);
2702  I.eraseFromParent();
2703  return true;
2704  } else if (TM.getCodeModel() == CodeModel::Tiny) {
2705  I.setDesc(TII.get(AArch64::ADR));
2706  I.getOperand(1).setTargetFlags(OpFlags);
2707  } else {
2708  I.setDesc(TII.get(AArch64::MOVaddr));
2709  I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2710  MachineInstrBuilder MIB(MF, I);
2711  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2713  }
2714  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2715  }
2716 
2717  case TargetOpcode::G_ZEXTLOAD:
2718  case TargetOpcode::G_LOAD:
2719  case TargetOpcode::G_STORE: {
2720  GLoadStore &LdSt = cast<GLoadStore>(I);
2721  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2722  LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2723 
2724  if (PtrTy != LLT::pointer(0, 64)) {
2725  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2726  << ", expected: " << LLT::pointer(0, 64) << '\n');
2727  return false;
2728  }
2729 
2730  uint64_t MemSizeInBytes = LdSt.getMemSize();
2731  unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2732  AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2733 
2734  // Need special instructions for atomics that affect ordering.
2735  if (Order != AtomicOrdering::NotAtomic &&
2736  Order != AtomicOrdering::Unordered &&
2737  Order != AtomicOrdering::Monotonic) {
2738  assert(!isa<GZExtLoad>(LdSt));
2739  if (MemSizeInBytes > 64)
2740  return false;
2741 
2742  if (isa<GLoad>(LdSt)) {
2743  static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2744  AArch64::LDARW, AArch64::LDARX};
2745  I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2746  } else {
2747  static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2748  AArch64::STLRW, AArch64::STLRX};
2749  I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2750  }
2752  return true;
2753  }
2754 
2755 #ifndef NDEBUG
2756  const Register PtrReg = LdSt.getPointerReg();
2757  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2758  // Sanity-check the pointer register.
2759  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2760  "Load/Store pointer operand isn't a GPR");
2761  assert(MRI.getType(PtrReg).isPointer() &&
2762  "Load/Store pointer operand isn't a pointer");
2763 #endif
2764 
2765  const Register ValReg = LdSt.getReg(0);
2766  const LLT ValTy = MRI.getType(ValReg);
2767  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2768 
2769  // The code below doesn't support truncating stores, so we need to split it
2770  // again.
2771  if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2772  unsigned SubReg;
2773  LLT MemTy = LdSt.getMMO().getMemoryType();
2774  auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2775  if (!getSubRegForClass(RC, TRI, SubReg))
2776  return false;
2777 
2778  // Generate a subreg copy.
2779  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2780  .addReg(ValReg, 0, SubReg)
2781  .getReg(0);
2782  RBI.constrainGenericRegister(Copy, *RC, MRI);
2783  LdSt.getOperand(0).setReg(Copy);
2784  } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2785  // If this is an any-extending load from the FPR bank, split it into a regular
2786  // load + extend.
2787  if (RB.getID() == AArch64::FPRRegBankID) {
2788  unsigned SubReg;
2789  LLT MemTy = LdSt.getMMO().getMemoryType();
2790  auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2791  if (!getSubRegForClass(RC, TRI, SubReg))
2792  return false;
2793  Register OldDst = LdSt.getReg(0);
2794  Register NewDst =
2796  LdSt.getOperand(0).setReg(NewDst);
2797  MRI.setRegBank(NewDst, RB);
2798  // Generate a SUBREG_TO_REG to extend it.
2799  MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2800  MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2801  .addImm(0)
2802  .addUse(NewDst)
2803  .addImm(SubReg);
2804  auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2805  RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2806  MIB.setInstr(LdSt);
2807  }
2808  }
2809 
2810  // Helper lambda for partially selecting I. Either returns the original
2811  // instruction with an updated opcode, or a new instruction.
2812  auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2813  bool IsStore = isa<GStore>(I);
2814  const unsigned NewOpc =
2815  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2816  if (NewOpc == I.getOpcode())
2817  return nullptr;
2818  // Check if we can fold anything into the addressing mode.
2819  auto AddrModeFns =
2820  selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2821  if (!AddrModeFns) {
2822  // Can't fold anything. Use the original instruction.
2823  I.setDesc(TII.get(NewOpc));
2824  I.addOperand(MachineOperand::CreateImm(0));
2825  return &I;
2826  }
2827 
2828  // Folded something. Create a new instruction and return it.
2829  auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2830  Register CurValReg = I.getOperand(0).getReg();
2831  IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2832  NewInst.cloneMemRefs(I);
2833  for (auto &Fn : *AddrModeFns)
2834  Fn(NewInst);
2835  I.eraseFromParent();
2836  return &*NewInst;
2837  };
2838 
2839  MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2840  if (!LoadStore)
2841  return false;
2842 
2843  // If we're storing a 0, use WZR/XZR.
2844  if (Opcode == TargetOpcode::G_STORE) {
2846  LoadStore->getOperand(0).getReg(), MRI);
2847  if (CVal && CVal->Value == 0) {
2848  switch (LoadStore->getOpcode()) {
2849  case AArch64::STRWui:
2850  case AArch64::STRHHui:
2851  case AArch64::STRBBui:
2852  LoadStore->getOperand(0).setReg(AArch64::WZR);
2853  break;
2854  case AArch64::STRXui:
2855  LoadStore->getOperand(0).setReg(AArch64::XZR);
2856  break;
2857  }
2858  }
2859  }
2860 
2861  if (IsZExtLoad) {
2862  // The zextload from a smaller type to i32 should be handled by the
2863  // importer.
2864  if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2865  return false;
2866  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2867  // and zero_extend with SUBREG_TO_REG.
2868  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2869  Register DstReg = LoadStore->getOperand(0).getReg();
2870  LoadStore->getOperand(0).setReg(LdReg);
2871 
2872  MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2873  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2874  .addImm(0)
2875  .addUse(LdReg)
2876  .addImm(AArch64::sub_32);
2878  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2879  MRI);
2880  }
2882  }
2883 
2884  case TargetOpcode::G_SMULH:
2885  case TargetOpcode::G_UMULH: {
2886  // Reject the various things we don't support yet.
2887  if (unsupportedBinOp(I, RBI, MRI, TRI))
2888  return false;
2889 
2890  const Register DefReg = I.getOperand(0).getReg();
2891  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2892 
2893  if (RB.getID() != AArch64::GPRRegBankID) {
2894  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2895  return false;
2896  }
2897 
2898  if (Ty != LLT::scalar(64)) {
2899  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2900  << ", expected: " << LLT::scalar(64) << '\n');
2901  return false;
2902  }
2903 
2904  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2905  : AArch64::UMULHrr;
2906  I.setDesc(TII.get(NewOpc));
2907 
2908  // Now that we selected an opcode, we need to constrain the register
2909  // operands to use appropriate classes.
2910  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2911  }
2912  case TargetOpcode::G_LSHR:
2913  case TargetOpcode::G_ASHR:
2914  if (MRI.getType(I.getOperand(0).getReg()).isVector())
2915  return selectVectorAshrLshr(I, MRI);
2917  case TargetOpcode::G_SHL:
2918  if (Opcode == TargetOpcode::G_SHL &&
2919  MRI.getType(I.getOperand(0).getReg()).isVector())
2920  return selectVectorSHL(I, MRI);
2922  case TargetOpcode::G_FADD:
2923  case TargetOpcode::G_FSUB:
2924  case TargetOpcode::G_FMUL:
2925  case TargetOpcode::G_FDIV:
2926  case TargetOpcode::G_OR: {
2927  // Reject the various things we don't support yet.
2928  if (unsupportedBinOp(I, RBI, MRI, TRI))
2929  return false;
2930 
2931  const unsigned OpSize = Ty.getSizeInBits();
2932 
2933  const Register DefReg = I.getOperand(0).getReg();
2934  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2935 
2936  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2937  if (NewOpc == I.getOpcode())
2938  return false;
2939 
2940  I.setDesc(TII.get(NewOpc));
2941  // FIXME: Should the type be always reset in setDesc?
2942 
2943  // Now that we selected an opcode, we need to constrain the register
2944  // operands to use appropriate classes.
2945  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2946  }
2947 
2948  case TargetOpcode::G_PTR_ADD: {
2949  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2950  I.eraseFromParent();
2951  return true;
2952  }
2953  case TargetOpcode::G_SADDO:
2954  case TargetOpcode::G_UADDO:
2955  case TargetOpcode::G_SSUBO:
2956  case TargetOpcode::G_USUBO: {
2957  // Emit the operation and get the correct condition code.
2958  auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2959  I.getOperand(2), I.getOperand(3), MIB);
2960 
2961  // Now, put the overflow result in the register given by the first operand
2962  // to the overflow op. CSINC increments the result when the predicate is
2963  // false, so to get the increment when it's true, we need to use the
2964  // inverse. In this case, we want to increment when carry is set.
2965  Register ZReg = AArch64::WZR;
2966  auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2967  {ZReg, ZReg})
2968  .addImm(getInvertedCondCode(OpAndCC.second));
2969  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2970  I.eraseFromParent();
2971  return true;
2972  }
2973 
2974  case TargetOpcode::G_PTRMASK: {
2975  Register MaskReg = I.getOperand(2).getReg();
2976  Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
2977  // TODO: Implement arbitrary cases
2978  if (!MaskVal || !isShiftedMask_64(*MaskVal))
2979  return false;
2980 
2981  uint64_t Mask = *MaskVal;
2982  I.setDesc(TII.get(AArch64::ANDXri));
2983  I.getOperand(2).ChangeToImmediate(
2985 
2986  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2987  }
2988  case TargetOpcode::G_PTRTOINT:
2989  case TargetOpcode::G_TRUNC: {
2990  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2991  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2992 
2993  const Register DstReg = I.getOperand(0).getReg();
2994  const Register SrcReg = I.getOperand(1).getReg();
2995 
2996  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2997  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2998 
2999  if (DstRB.getID() != SrcRB.getID()) {
3000  LLVM_DEBUG(
3001  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3002  return false;
3003  }
3004 
3005  if (DstRB.getID() == AArch64::GPRRegBankID) {
3006  const TargetRegisterClass *DstRC =
3007  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3008  if (!DstRC)
3009  return false;
3010 
3011  const TargetRegisterClass *SrcRC =
3012  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3013  if (!SrcRC)
3014  return false;
3015 
3016  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3017  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3018  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3019  return false;
3020  }
3021 
3022  if (DstRC == SrcRC) {
3023  // Nothing to be done
3024  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3025  SrcTy == LLT::scalar(64)) {
3026  llvm_unreachable("TableGen can import this case");
3027  return false;
3028  } else if (DstRC == &AArch64::GPR32RegClass &&
3029  SrcRC == &AArch64::GPR64RegClass) {
3030  I.getOperand(1).setSubReg(AArch64::sub_32);
3031  } else {
3032  LLVM_DEBUG(
3033  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3034  return false;
3035  }
3036 
3037  I.setDesc(TII.get(TargetOpcode::COPY));
3038  return true;
3039  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3040  if (DstTy == LLT::fixed_vector(4, 16) &&
3041  SrcTy == LLT::fixed_vector(4, 32)) {
3042  I.setDesc(TII.get(AArch64::XTNv4i16));
3044  return true;
3045  }
3046 
3047  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3048  MachineInstr *Extract = emitExtractVectorElt(
3049  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3050  if (!Extract)
3051  return false;
3052  I.eraseFromParent();
3053  return true;
3054  }
3055 
3056  // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3057  if (Opcode == TargetOpcode::G_PTRTOINT) {
3058  assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3059  I.setDesc(TII.get(TargetOpcode::COPY));
3060  return selectCopy(I, TII, MRI, TRI, RBI);
3061  }
3062  }
3063 
3064  return false;
3065  }
3066 
3067  case TargetOpcode::G_ANYEXT: {
3068  if (selectUSMovFromExtend(I, MRI))
3069  return true;
3070 
3071  const Register DstReg = I.getOperand(0).getReg();
3072  const Register SrcReg = I.getOperand(1).getReg();
3073 
3074  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3075  if (RBDst.getID() != AArch64::GPRRegBankID) {
3076  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3077  << ", expected: GPR\n");
3078  return false;
3079  }
3080 
3081  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3082  if (RBSrc.getID() != AArch64::GPRRegBankID) {
3083  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3084  << ", expected: GPR\n");
3085  return false;
3086  }
3087 
3088  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3089 
3090  if (DstSize == 0) {
3091  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3092  return false;
3093  }
3094 
3095  if (DstSize != 64 && DstSize > 32) {
3096  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3097  << ", expected: 32 or 64\n");
3098  return false;
3099  }
3100  // At this point G_ANYEXT is just like a plain COPY, but we need
3101  // to explicitly form the 64-bit value if any.
3102  if (DstSize > 32) {
3103  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3104  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3105  .addDef(ExtSrc)
3106  .addImm(0)
3107  .addUse(SrcReg)
3108  .addImm(AArch64::sub_32);
3109  I.getOperand(1).setReg(ExtSrc);
3110  }
3111  return selectCopy(I, TII, MRI, TRI, RBI);
3112  }
3113 
3114  case TargetOpcode::G_ZEXT:
3115  case TargetOpcode::G_SEXT_INREG:
3116  case TargetOpcode::G_SEXT: {
3117  if (selectUSMovFromExtend(I, MRI))
3118  return true;
3119 
3120  unsigned Opcode = I.getOpcode();
3121  const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3122  const Register DefReg = I.getOperand(0).getReg();
3123  Register SrcReg = I.getOperand(1).getReg();
3124  const LLT DstTy = MRI.getType(DefReg);
3125  const LLT SrcTy = MRI.getType(SrcReg);
3126  unsigned DstSize = DstTy.getSizeInBits();
3127  unsigned SrcSize = SrcTy.getSizeInBits();
3128 
3129  // SEXT_INREG has the same src reg size as dst, the size of the value to be
3130  // extended is encoded in the imm.
3131  if (Opcode == TargetOpcode::G_SEXT_INREG)
3132  SrcSize = I.getOperand(2).getImm();
3133 
3134  if (DstTy.isVector())
3135  return false; // Should be handled by imported patterns.
3136 
3137  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3138  AArch64::GPRRegBankID &&
3139  "Unexpected ext regbank");
3140 
3141  MachineInstr *ExtI;
3142 
3143  // First check if we're extending the result of a load which has a dest type
3144  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3145  // GPR register on AArch64 and all loads which are smaller automatically
3146  // zero-extend the upper bits. E.g.
3147  // %v(s8) = G_LOAD %p, :: (load 1)
3148  // %v2(s32) = G_ZEXT %v(s8)
3149  if (!IsSigned) {
3150  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3151  bool IsGPR =
3152  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3153  if (LoadMI && IsGPR) {
3154  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3155  unsigned BytesLoaded = MemOp->getSize();
3156  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3157  return selectCopy(I, TII, MRI, TRI, RBI);
3158  }
3159 
3160  // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3161  // + SUBREG_TO_REG.
3162  //
3163  // If we are zero extending from 32 bits to 64 bits, it's possible that
3164  // the instruction implicitly does the zero extend for us. In that case,
3165  // we only need the SUBREG_TO_REG.
3166  if (IsGPR && SrcSize == 32 && DstSize == 64) {
3167  // Unlike with the G_LOAD case, we don't want to look through copies
3168  // here. (See isDef32.)
3169  MachineInstr *Def = MRI.getVRegDef(SrcReg);
3170  Register SubregToRegSrc = SrcReg;
3171 
3172  // Does the instruction implicitly zero extend?
3173  if (!Def || !isDef32(*Def)) {
3174  // No. Zero out using an OR.
3175  Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3176  const Register ZReg = AArch64::WZR;
3177  MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3178  SubregToRegSrc = OrDst;
3179  }
3180 
3181  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3182  .addImm(0)
3183  .addUse(SubregToRegSrc)
3184  .addImm(AArch64::sub_32);
3185 
3186  if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3187  MRI)) {
3188  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3189  return false;
3190  }
3191 
3192  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3193  MRI)) {
3194  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3195  return false;
3196  }
3197 
3198  I.eraseFromParent();
3199  return true;
3200  }
3201  }
3202 
3203  if (DstSize == 64) {
3204  if (Opcode != TargetOpcode::G_SEXT_INREG) {
3205  // FIXME: Can we avoid manually doing this?
3206  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3207  MRI)) {
3208  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3209  << " operand\n");
3210  return false;
3211  }
3212  SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3213  {&AArch64::GPR64RegClass}, {})
3214  .addImm(0)
3215  .addUse(SrcReg)
3216  .addImm(AArch64::sub_32)
3217  .getReg(0);
3218  }
3219 
3220  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3221  {DefReg}, {SrcReg})
3222  .addImm(0)
3223  .addImm(SrcSize - 1);
3224  } else if (DstSize <= 32) {
3225  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3226  {DefReg}, {SrcReg})
3227  .addImm(0)
3228  .addImm(SrcSize - 1);
3229  } else {
3230  return false;
3231  }
3232 
3234  I.eraseFromParent();
3235  return true;
3236  }
3237 
3238  case TargetOpcode::G_SITOFP:
3239  case TargetOpcode::G_UITOFP:
3240  case TargetOpcode::G_FPTOSI:
3241  case TargetOpcode::G_FPTOUI: {
3242  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3243  SrcTy = MRI.getType(I.getOperand(1).getReg());
3244  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3245  if (NewOpc == Opcode)
3246  return false;
3247 
3248  I.setDesc(TII.get(NewOpc));
3250 
3251  return true;
3252  }
3253 
3254  case TargetOpcode::G_FREEZE:
3255  return selectCopy(I, TII, MRI, TRI, RBI);
3256 
3257  case TargetOpcode::G_INTTOPTR:
3258  // The importer is currently unable to import pointer types since they
3259  // didn't exist in SelectionDAG.
3260  return selectCopy(I, TII, MRI, TRI, RBI);
3261 
3262  case TargetOpcode::G_BITCAST:
3263  // Imported SelectionDAG rules can handle every bitcast except those that
3264  // bitcast from a type to the same type. Ideally, these shouldn't occur
3265  // but we might not run an optimizer that deletes them. The other exception
3266  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3267  // of them.
3268  return selectCopy(I, TII, MRI, TRI, RBI);
3269 
3270  case TargetOpcode::G_SELECT: {
3271  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3272  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3273  << ", expected: " << LLT::scalar(1) << '\n');
3274  return false;
3275  }
3276 
3277  const Register CondReg = I.getOperand(1).getReg();
3278  const Register TReg = I.getOperand(2).getReg();
3279  const Register FReg = I.getOperand(3).getReg();
3280 
3281  if (tryOptSelect(I))
3282  return true;
3283 
3284  // Make sure to use an unused vreg instead of wzr, so that the peephole
3285  // optimizations will be able to optimize these.
3286  Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3287  auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3288  .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3289  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3290  if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3291  return false;
3292  I.eraseFromParent();
3293  return true;
3294  }
3295  case TargetOpcode::G_ICMP: {
3296  if (Ty.isVector())
3297  return selectVectorICmp(I, MRI);
3298 
3299  if (Ty != LLT::scalar(32)) {
3300  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3301  << ", expected: " << LLT::scalar(32) << '\n');
3302  return false;
3303  }
3304 
3305  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3306  emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3307  MIB);
3308  emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
3309  I.eraseFromParent();
3310  return true;
3311  }
3312 
3313  case TargetOpcode::G_FCMP: {
3314  CmpInst::Predicate Pred =
3315  static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3316  if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3317  Pred) ||
3318  !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3319  return false;
3320  I.eraseFromParent();
3321  return true;
3322  }
3323  case TargetOpcode::G_VASTART:
3324  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3325  : selectVaStartAAPCS(I, MF, MRI);
3326  case TargetOpcode::G_INTRINSIC:
3327  return selectIntrinsic(I, MRI);
3328  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3329  return selectIntrinsicWithSideEffects(I, MRI);
3330  case TargetOpcode::G_IMPLICIT_DEF: {
3331  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3332  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3333  const Register DstReg = I.getOperand(0).getReg();
3334  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3335  const TargetRegisterClass *DstRC =
3336  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3337  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3338  return true;
3339  }
3340  case TargetOpcode::G_BLOCK_ADDR: {
3341  if (TM.getCodeModel() == CodeModel::Large) {
3342  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3343  I.eraseFromParent();
3344  return true;
3345  } else {
3346  I.setDesc(TII.get(AArch64::MOVaddrBA));
3347  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3348  I.getOperand(0).getReg())
3349  .addBlockAddress(I.getOperand(1).getBlockAddress(),
3350  /* Offset */ 0, AArch64II::MO_PAGE)
3351  .addBlockAddress(
3352  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3354  I.eraseFromParent();
3355  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3356  }
3357  }
3358  case AArch64::G_DUP: {
3359  // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3360  // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3361  // difficult because at RBS we may end up pessimizing the fpr case if we
3362  // decided to add an anyextend to fix this. Manual selection is the most
3363  // robust solution for now.
3364  if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3365  AArch64::GPRRegBankID)
3366  return false; // We expect the fpr regbank case to be imported.
3367  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3368  if (VecTy == LLT::fixed_vector(8, 8))
3369  I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3370  else if (VecTy == LLT::fixed_vector(16, 8))
3371  I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3372  else if (VecTy == LLT::fixed_vector(4, 16))
3373  I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3374  else if (VecTy == LLT::fixed_vector(8, 16))
3375  I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3376  else
3377  return false;
3378  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3379  }
3380  case TargetOpcode::G_INTRINSIC_TRUNC:
3381  return selectIntrinsicTrunc(I, MRI);
3382  case TargetOpcode::G_INTRINSIC_ROUND:
3383  return selectIntrinsicRound(I, MRI);
3384  case TargetOpcode::G_BUILD_VECTOR:
3385  return selectBuildVector(I, MRI);
3386  case TargetOpcode::G_MERGE_VALUES:
3387  return selectMergeValues(I, MRI);
3388  case TargetOpcode::G_UNMERGE_VALUES:
3389  return selectUnmergeValues(I, MRI);
3390  case TargetOpcode::G_SHUFFLE_VECTOR:
3391  return selectShuffleVector(I, MRI);
3392  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3393  return selectExtractElt(I, MRI);
3394  case TargetOpcode::G_INSERT_VECTOR_ELT:
3395  return selectInsertElt(I, MRI);
3396  case TargetOpcode::G_CONCAT_VECTORS:
3397  return selectConcatVectors(I, MRI);
3398  case TargetOpcode::G_JUMP_TABLE:
3399  return selectJumpTable(I, MRI);
3400  case TargetOpcode::G_VECREDUCE_FADD:
3401  case TargetOpcode::G_VECREDUCE_ADD:
3402  return selectReduction(I, MRI);
3403  }
3404 
3405  return false;
3406 }
3407 
3408 bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3410  Register VecReg = I.getOperand(1).getReg();
3411  LLT VecTy = MRI.getType(VecReg);
3412  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3413  // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3414  // a subregister copy afterwards.
3415  if (VecTy == LLT::fixed_vector(2, 32)) {
3416  Register DstReg = I.getOperand(0).getReg();
3417  auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3418  {VecReg, VecReg});
3419  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3420  .addReg(AddP.getReg(0), 0, AArch64::ssub)
3421  .getReg(0);
3422  RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3423  I.eraseFromParent();
3424  return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3425  }
3426 
3427  unsigned Opc = 0;
3428  if (VecTy == LLT::fixed_vector(16, 8))
3429  Opc = AArch64::ADDVv16i8v;
3430  else if (VecTy == LLT::fixed_vector(8, 16))
3431  Opc = AArch64::ADDVv8i16v;
3432  else if (VecTy == LLT::fixed_vector(4, 32))
3433  Opc = AArch64::ADDVv4i32v;
3434  else if (VecTy == LLT::fixed_vector(2, 64))
3435  Opc = AArch64::ADDPv2i64p;
3436  else {
3437  LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3438  return false;
3439  }
3440  I.setDesc(TII.get(Opc));
3441  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3442  }
3443 
3444  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3445  unsigned Opc = 0;
3446  if (VecTy == LLT::fixed_vector(2, 32))
3447  Opc = AArch64::FADDPv2i32p;
3448  else if (VecTy == LLT::fixed_vector(2, 64))
3449  Opc = AArch64::FADDPv2i64p;
3450  else {
3451  LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3452  return false;
3453  }
3454  I.setDesc(TII.get(Opc));
3455  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3456  }
3457  return false;
3458 }
3459 
3460 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3462  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3463  Register JTAddr = I.getOperand(0).getReg();
3464  unsigned JTI = I.getOperand(1).getIndex();
3465  Register Index = I.getOperand(2).getReg();
3466 
3467  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3468  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3469 
3470  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3471  auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3472  {TargetReg, ScratchReg}, {JTAddr, Index})
3473  .addJumpTableIndex(JTI);
3474  // Build the indirect branch.
3475  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3476  I.eraseFromParent();
3477  return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3478 }
3479 
3480 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3482  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3483  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3484 
3485  Register DstReg = I.getOperand(0).getReg();
3486  unsigned JTI = I.getOperand(1).getIndex();
3487  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3488  auto MovMI =
3489  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3490  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3492  I.eraseFromParent();
3493  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3494 }
3495 
3496 bool AArch64InstructionSelector::selectTLSGlobalValue(
3498  if (!STI.isTargetMachO())
3499  return false;
3500  MachineFunction &MF = *I.getParent()->getParent();
3501  MF.getFrameInfo().setAdjustsStack(true);
3502 
3503  const auto &GlobalOp = I.getOperand(1);
3504  assert(GlobalOp.getOffset() == 0 &&
3505  "Shouldn't have an offset on TLS globals!");
3506  const GlobalValue &GV = *GlobalOp.getGlobal();
3507 
3508  auto LoadGOT =
3509  MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3510  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3511 
3512  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3513  {LoadGOT.getReg(0)})
3514  .addImm(0);
3515 
3516  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3517  // TLS calls preserve all registers except those that absolutely must be
3518  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3519  // silly).
3520  MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3521  .addUse(AArch64::X0, RegState::Implicit)
3522  .addDef(AArch64::X0, RegState::Implicit)
3523  .addRegMask(TRI.getTLSCallPreservedMask());
3524 
3525  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3526  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3527  MRI);
3528  I.eraseFromParent();
3529  return true;
3530 }
3531 
3532 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3534  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3535 
3536  // Select the correct opcode.
3537  unsigned Opc = 0;
3538  if (!SrcTy.isVector()) {
3539  switch (SrcTy.getSizeInBits()) {
3540  default:
3541  case 16:
3542  Opc = AArch64::FRINTZHr;
3543  break;
3544  case 32:
3545  Opc = AArch64::FRINTZSr;
3546  break;
3547  case 64:
3548  Opc = AArch64::FRINTZDr;
3549  break;
3550  }
3551  } else {
3552  unsigned NumElts = SrcTy.getNumElements();
3553  switch (SrcTy.getElementType().getSizeInBits()) {
3554  default:
3555  break;
3556  case 16:
3557  if (NumElts == 4)
3558  Opc = AArch64::FRINTZv4f16;
3559  else if (NumElts == 8)
3560  Opc = AArch64::FRINTZv8f16;
3561  break;
3562  case 32:
3563  if (NumElts == 2)
3564  Opc = AArch64::FRINTZv2f32;
3565  else if (NumElts == 4)
3566  Opc = AArch64::FRINTZv4f32;
3567  break;
3568  case 64:
3569  if (NumElts == 2)
3570  Opc = AArch64::FRINTZv2f64;
3571  break;
3572  }
3573  }
3574 
3575  if (!Opc) {
3576  // Didn't get an opcode above, bail.
3577  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3578  return false;
3579  }
3580 
3581  // Legalization would have set us up perfectly for this; we just need to
3582  // set the opcode and move on.
3583  I.setDesc(TII.get(Opc));
3584  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3585 }
3586 
3587 bool AArch64InstructionSelector::selectIntrinsicRound(
3589  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3590 
3591  // Select the correct opcode.
3592  unsigned Opc = 0;
3593  if (!SrcTy.isVector()) {
3594  switch (SrcTy.getSizeInBits()) {
3595  default:
3596  case 16:
3597  Opc = AArch64::FRINTAHr;
3598  break;
3599  case 32:
3600  Opc = AArch64::FRINTASr;
3601  break;
3602  case 64:
3603  Opc = AArch64::FRINTADr;
3604  break;
3605  }
3606  } else {
3607  unsigned NumElts = SrcTy.getNumElements();
3608  switch (SrcTy.getElementType().getSizeInBits()) {
3609  default:
3610  break;
3611  case 16:
3612  if (NumElts == 4)
3613  Opc = AArch64::FRINTAv4f16;
3614  else if (NumElts == 8)
3615  Opc = AArch64::FRINTAv8f16;
3616  break;
3617  case 32:
3618  if (NumElts == 2)
3619  Opc = AArch64::FRINTAv2f32;
3620  else if (NumElts == 4)
3621  Opc = AArch64::FRINTAv4f32;
3622  break;
3623  case 64:
3624  if (NumElts == 2)
3625  Opc = AArch64::FRINTAv2f64;
3626  break;
3627  }
3628  }
3629 
3630  if (!Opc) {
3631  // Didn't get an opcode above, bail.
3632  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3633  return false;
3634  }
3635 
3636  // Legalization would have set us up perfectly for this; we just need to
3637  // set the opcode and move on.
3638  I.setDesc(TII.get(Opc));
3639  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3640 }
3641 
3642 bool AArch64InstructionSelector::selectVectorICmp(
3644  Register DstReg = I.getOperand(0).getReg();
3645  LLT DstTy = MRI.getType(DstReg);
3646  Register SrcReg = I.getOperand(2).getReg();
3647  Register Src2Reg = I.getOperand(3).getReg();
3648  LLT SrcTy = MRI.getType(SrcReg);
3649 
3650  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3651  unsigned NumElts = DstTy.getNumElements();
3652 
3653  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3654  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3655  // Third index is cc opcode:
3656  // 0 == eq
3657  // 1 == ugt
3658  // 2 == uge
3659  // 3 == ult
3660  // 4 == ule
3661  // 5 == sgt
3662  // 6 == sge
3663  // 7 == slt
3664  // 8 == sle
3665  // ne is done by negating 'eq' result.
3666 
3667  // This table below assumes that for some comparisons the operands will be
3668  // commuted.
3669  // ult op == commute + ugt op
3670  // ule op == commute + uge op
3671  // slt op == commute + sgt op
3672  // sle op == commute + sge op
3673  unsigned PredIdx = 0;
3674  bool SwapOperands = false;
3675  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3676  switch (Pred) {
3677  case CmpInst::ICMP_NE:
3678  case CmpInst::ICMP_EQ:
3679  PredIdx = 0;
3680  break;
3681  case CmpInst::ICMP_UGT:
3682  PredIdx = 1;
3683  break;
3684  case CmpInst::ICMP_UGE:
3685  PredIdx = 2;
3686  break;
3687  case CmpInst::ICMP_ULT:
3688  PredIdx = 3;
3689  SwapOperands = true;
3690  break;
3691  case CmpInst::ICMP_ULE:
3692  PredIdx = 4;
3693  SwapOperands = true;
3694  break;
3695  case CmpInst::ICMP_SGT:
3696  PredIdx = 5;
3697  break;
3698  case CmpInst::ICMP_SGE:
3699  PredIdx = 6;
3700  break;
3701  case CmpInst::ICMP_SLT:
3702  PredIdx = 7;
3703  SwapOperands = true;
3704  break;
3705  case CmpInst::ICMP_SLE:
3706  PredIdx = 8;
3707  SwapOperands = true;
3708  break;
3709  default:
3710  llvm_unreachable("Unhandled icmp predicate");
3711  return false;
3712  }
3713 
3714  // This table obviously should be tablegen'd when we have our GISel native
3715  // tablegen selector.
3716 
3717  static const unsigned OpcTable[4][4][9] = {
3718  {
3719  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3720  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3721  0 /* invalid */},
3722  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3723  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3724  0 /* invalid */},
3725  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3726  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3727  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3728  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3729  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3730  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3731  },
3732  {
3733  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3734  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3735  0 /* invalid */},
3736  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3737  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3738  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3739  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3740  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3741  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3742  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3743  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3744  0 /* invalid */}
3745  },
3746  {
3747  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3748  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3749  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3750  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3751  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3752  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3753  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3754  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3755  0 /* invalid */},
3756  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3757  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3758  0 /* invalid */}
3759  },
3760  {
3761  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3762  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3763  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3764  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3765  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3766  0 /* invalid */},
3767  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3769  0 /* invalid */},
3770  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3772  0 /* invalid */}
3773  },
3774  };
3775  unsigned EltIdx = Log2_32(SrcEltSize / 8);
3776  unsigned NumEltsIdx = Log2_32(NumElts / 2);
3777  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3778  if (!Opc) {
3779  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3780  return false;
3781  }
3782 
3783  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3784  const TargetRegisterClass *SrcRC =
3785  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3786  if (!SrcRC) {
3787  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3788  return false;
3789  }
3790 
3791  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3792  if (SrcTy.getSizeInBits() == 128)
3793  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3794 
3795  if (SwapOperands)
3796  std::swap(SrcReg, Src2Reg);
3797 
3798  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3800 
3801  // Invert if we had a 'ne' cc.
3802  if (NotOpc) {
3803  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3805  } else {
3806  MIB.buildCopy(DstReg, Cmp.getReg(0));
3807  }
3808  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3809  I.eraseFromParent();
3810  return true;
3811 }
3812 
3813 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3814  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3815  MachineIRBuilder &MIRBuilder) const {
3816  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3817 
3818  auto BuildFn = [&](unsigned SubregIndex) {
3819  auto Ins =
3820  MIRBuilder
3821  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3822  .addImm(SubregIndex);
3825  return &*Ins;
3826  };
3827 
3828  switch (EltSize) {
3829  case 16:
3830  return BuildFn(AArch64::hsub);
3831  case 32:
3832  return BuildFn(AArch64::ssub);
3833  case 64:
3834  return BuildFn(AArch64::dsub);
3835  default:
3836  return nullptr;
3837  }
3838 }
3839 
3842  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3843  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3844  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3845  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3846  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3847 
3848  if (I.getNumOperands() != 3)
3849  return false;
3850 
3851  // Merging 2 s64s into an s128.
3852  if (DstTy == LLT::scalar(128)) {
3853  if (SrcTy.getSizeInBits() != 64)
3854  return false;
3855  Register DstReg = I.getOperand(0).getReg();
3856  Register Src1Reg = I.getOperand(1).getReg();
3857  Register Src2Reg = I.getOperand(2).getReg();
3858  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3859  MachineInstr *InsMI =
3860  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3861  if (!InsMI)
3862  return false;
3863  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3864  Src2Reg, /* LaneIdx */ 1, RB, MIB);
3865  if (!Ins2MI)
3866  return false;
3867  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3868  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3869  I.eraseFromParent();
3870  return true;
3871  }
3872 
3873  if (RB.getID() != AArch64::GPRRegBankID)
3874  return false;
3875 
3876  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3877  return false;
3878 
3879  auto *DstRC = &AArch64::GPR64RegClass;
3880  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3881  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3882  TII.get(TargetOpcode::SUBREG_TO_REG))
3883  .addDef(SubToRegDef)
3884  .addImm(0)
3885  .addUse(I.getOperand(1).getReg())
3886  .addImm(AArch64::sub_32);
3887  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3888  // Need to anyext the second scalar before we can use bfm
3889  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3890  TII.get(TargetOpcode::SUBREG_TO_REG))
3891  .addDef(SubToRegDef2)
3892  .addImm(0)
3893  .addUse(I.getOperand(2).getReg())
3894  .addImm(AArch64::sub_32);
3895  MachineInstr &BFM =
3896  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3897  .addDef(I.getOperand(0).getReg())
3898  .addUse(SubToRegDef)
3899  .addUse(SubToRegDef2)
3900  .addImm(32)
3901  .addImm(31);
3902  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3903  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3905  I.eraseFromParent();
3906  return true;
3907 }
3908 
3909 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3910  const unsigned EltSize) {
3911  // Choose a lane copy opcode and subregister based off of the size of the
3912  // vector's elements.
3913  switch (EltSize) {
3914  case 8:
3915  CopyOpc = AArch64::CPYi8;
3916  ExtractSubReg = AArch64::bsub;
3917  break;
3918  case 16:
3919  CopyOpc = AArch64::CPYi16;
3920  ExtractSubReg = AArch64::hsub;
3921  break;
3922  case 32:
3923  CopyOpc = AArch64::CPYi32;
3924  ExtractSubReg = AArch64::ssub;
3925  break;
3926  case 64:
3927  CopyOpc = AArch64::CPYi64;
3928  ExtractSubReg = AArch64::dsub;
3929  break;
3930  default:
3931  // Unknown size, bail out.
3932  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3933  return false;
3934  }
3935  return true;
3936 }
3937 
3938 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3939  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3940  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3941  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3942  unsigned CopyOpc = 0;
3943  unsigned ExtractSubReg = 0;
3944  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3945  LLVM_DEBUG(
3946  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3947  return nullptr;
3948  }
3949 
3950  const TargetRegisterClass *DstRC =
3951  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3952  if (!DstRC) {
3953  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3954  return nullptr;
3955  }
3956 
3957  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3958  const LLT &VecTy = MRI.getType(VecReg);
3959  const TargetRegisterClass *VecRC =
3960  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3961  if (!VecRC) {
3962  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3963  return nullptr;
3964  }
3965 
3966  // The register that we're going to copy into.
3967  Register InsertReg = VecReg;
3968  if (!DstReg)
3969  DstReg = MRI.createVirtualRegister(DstRC);
3970  // If the lane index is 0, we just use a subregister COPY.
3971  if (LaneIdx == 0) {
3972  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3973  .addReg(VecReg, 0, ExtractSubReg);
3974  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3975  return &*Copy;
3976  }
3977 
3978  // Lane copies require 128-bit wide registers. If we're dealing with an
3979  // unpacked vector, then we need to move up to that width. Insert an implicit
3980  // def and a subregister insert to get us there.
3981  if (VecTy.getSizeInBits() != 128) {
3982  MachineInstr *ScalarToVector = emitScalarToVector(
3983  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3984  if (!ScalarToVector)
3985  return nullptr;
3986  InsertReg = ScalarToVector->getOperand(0).getReg();
3987  }
3988 
3989  MachineInstr *LaneCopyMI =
3990  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3991  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3992 
3993  // Make sure that we actually constrain the initial copy.
3994  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3995  return LaneCopyMI;
3996 }
3997 
3998 bool AArch64InstructionSelector::selectExtractElt(
4000  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4001  "unexpected opcode!");
4002  Register DstReg = I.getOperand(0).getReg();
4003  const LLT NarrowTy = MRI.getType(DstReg);
4004  const Register SrcReg = I.getOperand(1).getReg();
4005  const LLT WideTy = MRI.getType(SrcReg);
4006  (void)WideTy;
4007  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4008  "source register size too small!");
4009  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4010 
4011  // Need the lane index to determine the correct copy opcode.
4012  MachineOperand &LaneIdxOp = I.getOperand(2);
4013  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4014 
4015  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4016  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4017  return false;
4018  }
4019 
4020  // Find the index to extract from.
4021  auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4022  if (!VRegAndVal)
4023  return false;
4024  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4025 
4026 
4027  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4028  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4029  LaneIdx, MIB);
4030  if (!Extract)
4031  return false;
4032 
4033  I.eraseFromParent();
4034  return true;
4035 }
4036 
4037 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4039  unsigned NumElts = I.getNumOperands() - 1;
4040  Register SrcReg = I.getOperand(NumElts).getReg();
4041  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4042  const LLT SrcTy = MRI.getType(SrcReg);
4043 
4044  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4045  if (SrcTy.getSizeInBits() > 128) {
4046  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4047  return false;
4048  }
4049 
4050  // We implement a split vector operation by treating the sub-vectors as
4051  // scalars and extracting them.
4052  const RegisterBank &DstRB =
4053  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4054  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4055  Register Dst = I.getOperand(OpIdx).getReg();
4056  MachineInstr *Extract =
4057  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4058  if (!Extract)
4059  return false;
4060  }
4061  I.eraseFromParent();
4062  return true;
4063 }
4064 
4067  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4068  "unexpected opcode");
4069 
4070  // TODO: Handle unmerging into GPRs and from scalars to scalars.
4071  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4072  AArch64::FPRRegBankID ||
4073  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4074  AArch64::FPRRegBankID) {
4075  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4076  "currently unsupported.\n");
4077  return false;
4078  }
4079 
4080  // The last operand is the vector source register, and every other operand is
4081  // a register to unpack into.
4082  unsigned NumElts = I.getNumOperands() - 1;
4083  Register SrcReg = I.getOperand(NumElts).getReg();
4084  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4085  const LLT WideTy = MRI.getType(SrcReg);
4086  (void)WideTy;
4087  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4088  "can only unmerge from vector or s128 types!");
4089  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4090  "source register size too small!");
4091 
4092  if (!NarrowTy.isScalar())
4093  return selectSplitVectorUnmerge(I, MRI);
4094 
4095  // Choose a lane copy opcode and subregister based off of the size of the
4096  // vector's elements.
4097  unsigned CopyOpc = 0;
4098  unsigned ExtractSubReg = 0;
4099  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4100  return false;
4101 
4102  // Set up for the lane copies.
4103  MachineBasicBlock &MBB = *I.getParent();
4104 
4105  // Stores the registers we'll be copying from.
4106  SmallVector<Register, 4> InsertRegs;
4107 
4108  // We'll use the first register twice, so we only need NumElts-1 registers.
4109  unsigned NumInsertRegs = NumElts - 1;
4110 
4111  // If our elements fit into exactly 128 bits, then we can copy from the source
4112  // directly. Otherwise, we need to do a bit of setup with some subregister
4113  // inserts.
4114  if (NarrowTy.getSizeInBits() * NumElts == 128) {
4115  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4116  } else {
4117  // No. We have to perform subregister inserts. For each insert, create an
4118  // implicit def and a subregister insert, and save the register we create.
4119  const TargetRegisterClass *RC =
4120  getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4121  WideTy.getScalarSizeInBits() * NumElts);
4122  unsigned SubReg = 0;
4123  bool Found = getSubRegForClass(RC, TRI, SubReg);
4124  (void)Found;
4125  assert(Found && "expected to find last operand's subeg idx");
4126  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4127  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4128  MachineInstr &ImpDefMI =
4129  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4130  ImpDefReg);
4131 
4132  // Now, create the subregister insert from SrcReg.
4133  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4134  MachineInstr &InsMI =
4135  *BuildMI(MBB, I, I.getDebugLoc(),
4136  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4137  .addUse(ImpDefReg)
4138  .addUse(SrcReg)
4139  .addImm(SubReg);
4140 
4141  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4143 
4144  // Save the register so that we can copy from it after.
4145  InsertRegs.push_back(InsertReg);
4146  }
4147  }
4148 
4149  // Now that we've created any necessary subregister inserts, we can
4150  // create the copies.
4151  //
4152  // Perform the first copy separately as a subregister copy.
4153  Register CopyTo = I.getOperand(0).getReg();
4154  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4155  .addReg(InsertRegs[0], 0, ExtractSubReg);
4156  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4157 
4158  // Now, perform the remaining copies as vector lane copies.
4159  unsigned LaneIdx = 1;
4160  for (Register InsReg : InsertRegs) {
4161  Register CopyTo = I.getOperand(LaneIdx).getReg();
4162  MachineInstr &CopyInst =
4163  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4164  .addUse(InsReg)
4165  .addImm(LaneIdx);
4166  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4167  ++LaneIdx;
4168  }
4169 
4170  // Separately constrain the first copy's destination. Because of the
4171  // limitation in constrainOperandRegClass, we can't guarantee that this will
4172  // actually be constrained. So, do it ourselves using the second operand.
4173  const TargetRegisterClass *RC =
4174  MRI.getRegClassOrNull(I.getOperand(1).getReg());
4175  if (!RC) {
4176  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4177  return false;
4178  }
4179 
4180  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4181  I.eraseFromParent();
4182  return true;
4183 }
4184 
4185 bool AArch64InstructionSelector::selectConcatVectors(
4187  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4188  "Unexpected opcode");
4189  Register Dst = I.getOperand(0).getReg();
4190  Register Op1 = I.getOperand(1).getReg();
4191  Register Op2 = I.getOperand(2).getReg();
4192  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4193  if (!ConcatMI)
4194  return false;
4195  I.eraseFromParent();
4196  return true;
4197 }
4198 
4199 unsigned
4200 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4201  MachineFunction &MF) const {
4202  Type *CPTy = CPVal->getType();
4203  Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4204 
4206  return MCP->getConstantPoolIndex(CPVal, Alignment);
4207 }
4208 
4209 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4210  const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4211  auto &MF = MIRBuilder.getMF();
4212  unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4213 
4214  auto Adrp =
4215  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4216  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4217 
4218  MachineInstr *LoadMI = nullptr;
4219  MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4220  unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4221  switch (Size) {
4222  case 16:
4223  LoadMI =
4224  &*MIRBuilder
4225  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4226  .addConstantPoolIndex(CPIdx, 0,
4228  break;
4229  case 8:
4230  LoadMI =
4231  &*MIRBuilder
4232  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4233  .addConstantPoolIndex(CPIdx, 0,
4235  break;
4236  case 4:
4237  LoadMI =
4238  &*MIRBuilder
4239  .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4240  .addConstantPoolIndex(CPIdx, 0,
4242  break;
4243  case 2:
4244  LoadMI =
4245  &*MIRBuilder
4246  .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4247  .addConstantPoolIndex(CPIdx, 0,
4249  break;
4250  default:
4251  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4252  << *CPVal->getType());
4253  return nullptr;
4254  }
4255  LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4256  MachineMemOperand::MOLoad,
4257  Size, Align(Size)));
4259  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4260  return LoadMI;
4261 }
4262 
4263 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4264 /// size and RB.
4265 static std::pair<unsigned, unsigned>
4266 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4267  unsigned Opc, SubregIdx;
4268  if (RB.getID() == AArch64::GPRRegBankID) {
4269  if (EltSize == 16) {
4270  Opc = AArch64::INSvi16gpr;
4271  SubregIdx = AArch64::ssub;
4272  } else if (EltSize == 32) {
4273  Opc = AArch64::INSvi32gpr;
4274  SubregIdx = AArch64::ssub;
4275  } else if (EltSize == 64) {
4276  Opc = AArch64::INSvi64gpr;
4277  SubregIdx = AArch64::dsub;
4278  } else {
4279  llvm_unreachable("invalid elt size!");
4280  }
4281  } else {
4282  if (EltSize == 8) {
4283  Opc = AArch64::INSvi8lane;
4284  SubregIdx = AArch64::bsub;
4285  } else if (EltSize == 16) {
4286  Opc = AArch64::INSvi16lane;
4287  SubregIdx = AArch64::hsub;
4288  } else if (EltSize == 32) {
4289  Opc = AArch64::INSvi32lane;
4290  SubregIdx = AArch64::ssub;
4291  } else if (EltSize == 64) {
4292  Opc = AArch64::INSvi64lane;
4293  SubregIdx = AArch64::dsub;
4294  } else {
4295  llvm_unreachable("invalid elt size!");
4296  }
4297  }
4298  return std::make_pair(Opc, SubregIdx);
4299 }
4300 
4301 MachineInstr *AArch64InstructionSelector::emitInstr(
4302  unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4303  std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4304  const ComplexRendererFns &RenderFns) const {
4305  assert(Opcode && "Expected an opcode?");
4306  assert(!isPreISelGenericOpcode(Opcode) &&
4307  "Function should only be used to produce selected instructions!");
4308  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4309  if (RenderFns)
4310  for (auto &Fn : *RenderFns)
4311  Fn(MI);
4313  return &*MI;
4314 }
4315 
4316 MachineInstr *AArch64InstructionSelector::emitAddSub(
4317  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4318  Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4319  MachineIRBuilder &MIRBuilder) const {
4320  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4321  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4322  auto Ty = MRI.getType(LHS.getReg());
4323  assert(!Ty.isVector() && "Expected a scalar or pointer?");
4324  unsigned Size = Ty.getSizeInBits();
4325  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4326  bool Is32Bit = Size == 32;
4327 
4328  // INSTRri form with positive arithmetic immediate.
4329  if (auto Fns = selectArithImmed(RHS))
4330  return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4331  MIRBuilder, Fns);
4332 
4333  // INSTRri form with negative arithmetic immediate.
4334  if (auto Fns = selectNegArithImmed(RHS))
4335  return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4336  MIRBuilder, Fns);
4337 
4338  // INSTRrx form.
4339  if (auto Fns = selectArithExtendedRegister(RHS))
4340  return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4341  MIRBuilder, Fns);
4342 
4343  // INSTRrs form.
4344  if (auto Fns = selectShiftedRegister(RHS))
4345  return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4346  MIRBuilder, Fns);
4347  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4348  MIRBuilder);
4349 }
4350 
4351 MachineInstr *
4352 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4353  MachineOperand &RHS,
4354  MachineIRBuilder &MIRBuilder) const {
4355  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4356  {{AArch64::ADDXri, AArch64::ADDWri},
4357  {AArch64::ADDXrs, AArch64::ADDWrs},
4358  {AArch64::ADDXrr, AArch64::ADDWrr},
4359  {AArch64::SUBXri, AArch64::SUBWri},
4360  {AArch64::ADDXrx, AArch64::ADDWrx}}};
4361  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4362 }
4363 
4364 MachineInstr *
4365 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4366  MachineOperand &RHS,
4367  MachineIRBuilder &MIRBuilder) const {
4368  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4369  {{AArch64::ADDSXri, AArch64::ADDSWri},
4370  {AArch64::ADDSXrs, AArch64::ADDSWrs},
4371  {AArch64::ADDSXrr, AArch64::ADDSWrr},
4372  {AArch64::SUBSXri, AArch64::SUBSWri},
4373  {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4374  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4375 }
4376 
4377 MachineInstr *
4378 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4379  MachineOperand &RHS,
4380  MachineIRBuilder &MIRBuilder) const {
4381  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4382  {{AArch64::SUBSXri, AArch64::SUBSWri},
4383  {AArch64::SUBSXrs, AArch64::SUBSWrs},
4384  {AArch64::SUBSXrr, AArch64::SUBSWrr},
4385  {AArch64::ADDSXri, AArch64::ADDSWri},
4386  {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4387  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4388 }
4389 
4390 MachineInstr *
4391 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4392  MachineIRBuilder &MIRBuilder) const {
4393  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4394  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4395  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4396  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4397 }
4398 
4399 MachineInstr *
4400 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4401  MachineIRBuilder &MIRBuilder) const {
4402  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4403  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4404  LLT Ty = MRI.getType(LHS.getReg());
4405  unsigned RegSize = Ty.getSizeInBits();
4406  bool Is32Bit = (RegSize == 32);
4407  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4408  {AArch64::ANDSXrs, AArch64::ANDSWrs},
4409  {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4410  // ANDS needs a logical immediate for its immediate form. Check if we can
4411  // fold one in.
4412  if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4413  int64_t Imm = ValAndVReg->Value.getSExtValue();
4414 
4415  if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4416  auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4417  TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4418  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4419  return &*TstMI;
4420  }
4421  }
4422 
4423  if (auto Fns = selectLogicalShiftedRegister(RHS))
4424  return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4425  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4426 }
4427 
4428 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4430  MachineIRBuilder &MIRBuilder) const {
4431  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4432  assert(Predicate.isPredicate() && "Expected predicate?");
4433  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4434  LLT CmpTy = MRI.getType(LHS.getReg());
4435  assert(!CmpTy.isVector() && "Expected scalar or pointer");
4436  unsigned Size = CmpTy.getSizeInBits();
4437  (void)Size;
4438  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4439  // Fold the compare into a cmn or tst if possible.
4440  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4441  return FoldCmp;
4442  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4443  return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4444 }
4445 
4446 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4447  Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4448  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4449 #ifndef NDEBUG
4450  LLT Ty = MRI.getType(Dst);
4451  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4452  "Expected a 32-bit scalar register?");
4453 #endif
4454  const Register ZeroReg = AArch64::WZR;
4455  auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4456  auto CSet =
4457  MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4458  .addImm(getInvertedCondCode(CC));
4460  return &*CSet;
4461  };
4462 
4463  AArch64CC::CondCode CC1, CC2;
4464  changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4465  if (CC2 == AArch64CC::AL)
4466  return EmitCSet(Dst, CC1);
4467 
4468  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4469  Register Def1Reg = MRI.createVirtualRegister(RC);
4470  Register Def2Reg = MRI.createVirtualRegister(RC);
4471  EmitCSet(Def1Reg, CC1);
4472  EmitCSet(Def2Reg, CC2);
4473  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4475  return &*OrMI;
4476 }
4477 
4478 MachineInstr *
4479 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4480  MachineIRBuilder &MIRBuilder,
4481  Optional<CmpInst::Predicate> Pred) const {
4482  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4483  LLT Ty = MRI.getType(LHS);
4484  if (Ty.isVector())
4485  return nullptr;
4486  unsigned OpSize = Ty.getSizeInBits();
4487  if (OpSize != 32 && OpSize != 64)
4488  return nullptr;
4489 
4490  // If this is a compare against +0.0, then we don't have
4491  // to explicitly materialize a constant.
4492  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4493  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4494 
4495  auto IsEqualityPred = [](CmpInst::Predicate P) {
4496  return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4497  P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4498  };
4499  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4500  // Try commutating the operands.
4501  const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4502  if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4503  ShouldUseImm = true;
4504  std::swap(LHS, RHS);
4505  }
4506  }
4507  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4508  {AArch64::FCMPSri, AArch64::FCMPDri}};
4509  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4510 
4511  // Partially build the compare. Decide if we need to add a use for the
4512  // third operand based off whether or not we're comparing against 0.0.
4513  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4514  if (!ShouldUseImm)
4515  CmpMI.addUse(RHS);
4516  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4517  return &*CmpMI;
4518 }
4519 
4520 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4521  Optional<Register> Dst, Register Op1, Register Op2,
4522  MachineIRBuilder &MIRBuilder) const {
4523  // We implement a vector concat by:
4524  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4525  // 2. Insert the upper vector into the destination's upper element
4526  // TODO: some of this code is common with G_BUILD_VECTOR handling.
4527  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4528 
4529  const LLT Op1Ty = MRI.getType(Op1);
4530  const LLT Op2Ty = MRI.getType(Op2);
4531 
4532  if (Op1Ty != Op2Ty) {
4533  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4534  return nullptr;
4535  }
4536  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4537 
4538  if (Op1Ty.getSizeInBits() >= 128) {
4539  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4540  return nullptr;
4541  }
4542 
4543  // At the moment we just support 64 bit vector concats.
4544  if (Op1Ty.getSizeInBits() != 64) {
4545  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4546  return nullptr;
4547  }
4548 
4549  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4550  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4551  const TargetRegisterClass *DstRC =
4552  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4553 
4554  MachineInstr *WidenedOp1 =
4555  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4556  MachineInstr *WidenedOp2 =
4557  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4558  if (!WidenedOp1 || !WidenedOp2) {
4559  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4560  return nullptr;
4561  }
4562 
4563  // Now do the insert of the upper element.
4564  unsigned InsertOpc, InsSubRegIdx;
4565  std::tie(InsertOpc, InsSubRegIdx) =
4566  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4567 
4568  if (!Dst)
4569  Dst = MRI.createVirtualRegister(DstRC);
4570  auto InsElt =
4571  MIRBuilder
4572  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4573  .addImm(1) /* Lane index */
4574  .addUse(WidenedOp2->getOperand(0).getReg())
4575  .addImm(0);
4576  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4577  return &*InsElt;
4578 }
4579 
4580 MachineInstr *
4581 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4582  MachineIRBuilder &MIRBuilder,
4583  Register SrcReg) const {
4584  // CSINC increments the result when the predicate is false. Invert it.
4586  CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4587  auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4588  .addImm(InvCC);
4590  return &*I;
4591 }
4592 
4593 std::pair<MachineInstr *, AArch64CC::CondCode>
4594 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4595  MachineOperand &LHS,
4596  MachineOperand &RHS,
4597  MachineIRBuilder &MIRBuilder) const {
4598  switch (Opcode) {
4599  default:
4600  llvm_unreachable("Unexpected opcode!");
4601  case TargetOpcode::G_SADDO:
4602  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4603  case TargetOpcode::G_UADDO:
4604  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4605  case TargetOpcode::G_SSUBO:
4606  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4607  case TargetOpcode::G_USUBO:
4608  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4609  }
4610 }
4611 
4612 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4613  MachineRegisterInfo &MRI = *MIB.getMRI();
4614  // We want to recognize this pattern:
4615  //
4616  // $z = G_FCMP pred, $x, $y
4617  // ...
4618  // $w = G_SELECT $z, $a, $b
4619  //
4620  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4621  // some copies/truncs in between.)
4622  //
4623  // If we see this, then we can emit something like this:
4624  //
4625  // fcmp $x, $y
4626  // fcsel $w, $a, $b, pred
4627  //
4628  // Rather than emitting both of the rather long sequences in the standard
4629  // G_FCMP/G_SELECT select methods.
4630 
4631  // First, check if the condition is defined by a compare.
4632  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4633  while (CondDef) {
4634  // We can only fold if all of the defs have one use.
4635  Register CondDefReg = CondDef->getOperand(0).getReg();
4636  if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4637  // Unless it's another select.
4638  for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4639  if (CondDef == &UI)
4640  continue;
4641  if (UI.getOpcode() != TargetOpcode::G_SELECT)
4642  return false;
4643  }
4644  }
4645 
4646  // We can skip over G_TRUNC since the condition is 1-bit.
4647  // Truncating/extending can have no impact on the value.
4648  unsigned Opc = CondDef->getOpcode();
4649  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4650  break;
4651 
4652  // Can't see past copies from physregs.
4653  if (Opc == TargetOpcode::COPY &&
4654  Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4655  return false;
4656 
4657  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4658  }
4659 
4660  // Is the condition defined by a compare?
4661  if (!CondDef)
4662  return false;
4663 
4664  unsigned CondOpc = CondDef->getOpcode();
4665  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4666  return false;
4667 
4669  if (CondOpc == TargetOpcode::G_ICMP) {
4670  auto Pred =
4671  static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4673  emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4674  CondDef->getOperand(1), MIB);
4675  } else {
4676  // Get the condition code for the select.
4677  auto Pred =
4678  static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4679  AArch64CC::CondCode CondCode2;
4680  changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4681 
4682  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4683  // instructions to emit the comparison.
4684  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4685  // unnecessary.
4686  if (CondCode2 != AArch64CC::AL)
4687  return false;
4688 
4689  if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4690  CondDef->getOperand(3).getReg(), MIB)) {
4691  LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4692  return false;
4693  }
4694  }
4695 
4696  // Emit the select.
4697  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4698  I.getOperand(3).getReg(), CondCode, MIB);
4699  I.eraseFromParent();
4700  return true;
4701 }
4702 
4703 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4705  MachineIRBuilder &MIRBuilder) const {
4706  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4707  "Unexpected MachineOperand");
4708  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4709  // We want to find this sort of thing:
4710  // x = G_SUB 0, y
4711  // G_ICMP z, x
4712  //
4713  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4714  // e.g:
4715  //
4716  // cmn z, y
4717 
4718  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4719  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4720  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4721  auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4722  // Given this:
4723  //
4724  // x = G_SUB 0, y
4725  // G_ICMP x, z
4726  //
4727  // Produce this:
4728  //
4729  // cmn y, z
4730  if (isCMN(LHSDef, P, MRI))
4731  return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4732 
4733  // Same idea here, but with the RHS of the compare instead:
4734  //
4735  // Given this:
4736  //
4737  // x = G_SUB 0, y
4738  // G_ICMP z, x
4739  //
4740  // Produce this:
4741  //
4742  // cmn z, y
4743  if (isCMN(RHSDef, P, MRI))
4744  return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4745 
4746  // Given this:
4747  //
4748  // z = G_AND x, y
4749  // G_ICMP z, 0
4750  //
4751  // Produce this if the compare is signed:
4752  //
4753  // tst x, y
4754  if (!CmpInst::isUnsigned(P) && LHSDef &&
4755  LHSDef->getOpcode() == TargetOpcode::G_AND) {
4756  // Make sure that the RHS is 0.
4757  auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4758  if (!ValAndVReg || ValAndVReg->Value != 0)
4759  return nullptr;
4760 
4761  return emitTST(LHSDef->getOperand(1),
4762  LHSDef->getOperand(2), MIRBuilder);
4763  }
4764 
4765  return nullptr;
4766 }
4767 
4768 bool AArch64InstructionSelector::selectShuffleVector(
4770  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4771  Register Src1Reg = I.getOperand(1).getReg();
4772  const LLT Src1Ty = MRI.getType(Src1Reg);
4773  Register Src2Reg = I.getOperand(2).getReg();
4774  const LLT Src2Ty = MRI.getType(Src2Reg);
4775  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4776 
4777  MachineBasicBlock &MBB = *I.getParent();
4778  MachineFunction &MF = *MBB.getParent();
4779  LLVMContext &Ctx = MF.getFunction().getContext();
4780 
4781  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4782  // it's originated from a <1 x T> type. Those should have been lowered into
4783  // G_BUILD_VECTOR earlier.
4784  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4785  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4786  return false;
4787  }
4788 
4789  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4790 
4792  for (int Val : Mask) {
4793  // For now, any undef indexes we'll just assume to be 0. This should be
4794  // optimized in future, e.g. to select DUP etc.
4795  Val = Val < 0 ? 0 : Val;
4796  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4797  unsigned Offset = Byte + Val * BytesPerElt;
4798  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4799  }
4800  }
4801 
4802  // Use a constant pool to load the index vector for TBL.
4803  Constant *CPVal = ConstantVector::get(CstIdxs);
4804  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4805  if (!IndexLoad) {
4806  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4807  return false;
4808  }
4809 
4810  if (DstTy.getSizeInBits() != 128) {
4811  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4812  // This case can be done with TBL1.
4813  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4814  if (!Concat) {
4815  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4816  return false;
4817  }
4818 
4819  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4820  IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4821  IndexLoad->getOperand(0).getReg(), MIB);
4822 
4823  auto TBL1 = MIB.buildInstr(
4824  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4825  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4827 
4828  auto Copy =
4829  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4830  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4831  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4832  I.eraseFromParent();
4833  return true;
4834  }
4835 
4836  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4837  // Q registers for regalloc.
4838  SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4839  auto RegSeq = createQTuple(Regs, MIB);
4840  auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4841  {RegSeq, IndexLoad->getOperand(0)});
4843  I.eraseFromParent();
4844  return true;
4845 }
4846 
4847 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4848  Optional<Register> DstReg, Register SrcReg, Register EltReg,
4849  unsigned LaneIdx, const RegisterBank &RB,
4850  MachineIRBuilder &MIRBuilder) const {
4851  MachineInstr *InsElt = nullptr;
4852  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4853  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4854 
4855  // Create a register to define with the insert if one wasn't passed in.
4856  if (!DstReg)
4857  DstReg = MRI.createVirtualRegister(DstRC);
4858 
4859  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4860  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4861 
4862  if (RB.getID() == AArch64::FPRRegBankID) {
4863  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4864  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4865  .addImm(LaneIdx)
4866  .addUse(InsSub->getOperand(0).getReg())
4867  .addImm(0);
4868  } else {
4869  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4870  .addImm(LaneIdx)
4871  .addUse(EltReg);
4872  }
4873 
4874  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4875  return InsElt;
4876 }
4877 
4878 bool AArch64InstructionSelector::selectUSMovFromExtend(
4880  if (MI.getOpcode() != TargetOpcode::G_SEXT &&
4881  MI.getOpcode() != TargetOpcode::G_ZEXT &&
4882  MI.getOpcode() != TargetOpcode::G_ANYEXT)
4883  return false;
4884  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
4885  const Register DefReg = MI.getOperand(0).getReg();
4886  const LLT DstTy = MRI.getType(DefReg);
4887  unsigned DstSize = DstTy.getSizeInBits();
4888 
4889  MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
4890  MI.getOperand(1).getReg(), MRI);
4891  int64_t Lane;
4892  if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
4893  return false;
4894  Register Src0 = Extract->getOperand(1).getReg();
4895 
4896  const LLT &VecTy = MRI.getType(Src0);
4897 
4898  if (VecTy.getSizeInBits() != 128) {
4899  const MachineInstr *ScalarToVector = emitScalarToVector(
4900  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
4901  assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
4902  Src0 = ScalarToVector->getOperand(0).getReg();
4903  }
4904 
4905  unsigned Opcode;
4906  if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
4907  Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
4908  else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
4909  Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
4910  else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
4911  Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
4912  else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
4913  Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
4914  else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
4915  Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
4916  else
4917  llvm_unreachable("Unexpected type combo for S/UMov!");
4918 
4919  // We may need to generate one of these, depending on the type and sign of the
4920  // input:
4921  // DstReg = SMOV Src0, Lane;
4922  // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
4923  MachineInstr *ExtI = nullptr;
4924  if (DstSize == 64 && !IsSigned) {
4925  Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4926  MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
4927  ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
4928  .addImm(0)
4929  .addUse(NewReg)
4930  .addImm(AArch64::sub_32);
4931  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
4932  } else
4933  ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
4934 
4936  MI.eraseFromParent();
4937  return true;
4938 }
4939 
4940 bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4942  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4943 
4944  // Get information on the destination.
4945  Register DstReg = I.getOperand(0).getReg();
4946  const LLT DstTy = MRI.getType(DstReg);
4947  unsigned VecSize = DstTy.getSizeInBits();
4948 
4949  // Get information on the element we want to insert into the destination.
4950  Register EltReg = I.getOperand(2).getReg();
4951  const LLT EltTy = MRI.getType(EltReg);
4952  unsigned EltSize = EltTy.getSizeInBits();
4953  if (EltSize < 16 || EltSize > 64)
4954  return false; // Don't support all element types yet.
4955 
4956  // Find the definition of the index. Bail out if it's not defined by a
4957  // G_CONSTANT.
4958  Register IdxReg = I.getOperand(3).getReg();
4959  auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
4960  if (!VRegAndVal)
4961  return false;
4962  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4963 
4964  // Perform the lane insert.
4965  Register SrcReg = I.getOperand(1).getReg();
4966  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4967 
4968  if (VecSize < 128) {
4969  // If the vector we're inserting into is smaller than 128 bits, widen it
4970  // to 128 to do the insert.
4971  MachineInstr *ScalarToVec =
4972  emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
4973  if (!ScalarToVec)
4974  return false;
4975  SrcReg = ScalarToVec->getOperand(0).getReg();
4976  }
4977 
4978  // Create an insert into a new FPR128 register.
4979  // Note that if our vector is already 128 bits, we end up emitting an extra
4980  // register.
4981  MachineInstr *InsMI =
4982  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
4983 
4984  if (VecSize < 128) {
4985  // If we had to widen to perform the insert, then we have to demote back to
4986  // the original size to get the result we want.
4987  Register DemoteVec = InsMI->getOperand(0).getReg();
4988  const TargetRegisterClass *RC =
4989  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4990  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4991  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4992  return false;
4993  }
4994  unsigned SubReg = 0;
4995  if (!getSubRegForClass(RC, TRI, SubReg))
4996  return false;
4997  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4998  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4999  << "\n");
5000  return false;
5001  }
5002  MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5003  .addReg(DemoteVec, 0, SubReg);
5004  RBI.constrainGenericRegister(DstReg, *RC, MRI);
5005  } else {
5006  // No widening needed.
5007  InsMI->getOperand(0).setReg(DstReg);
5008  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5009  }
5010 
5011  I.eraseFromParent();
5012  return true;
5013 }
5014 
5015 MachineInstr *
5016 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5017