LLVM  13.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
21 #include "AArch64GlobalISelUtils.h"
24 #include "llvm/ADT/Optional.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/IntrinsicsAArch64.h"
43 #include "llvm/Pass.h"
44 #include "llvm/Support/Debug.h"
46 
47 #define DEBUG_TYPE "aarch64-isel"
48 
49 using namespace llvm;
50 using namespace MIPatternMatch;
51 using namespace AArch64GISelUtils;
52 
53 namespace llvm {
54 class BlockFrequencyInfo;
55 class ProfileSummaryInfo;
56 }
57 
58 namespace {
59 
60 #define GET_GLOBALISEL_PREDICATE_BITSET
61 #include "AArch64GenGlobalISel.inc"
62 #undef GET_GLOBALISEL_PREDICATE_BITSET
63 
64 class AArch64InstructionSelector : public InstructionSelector {
65 public:
66  AArch64InstructionSelector(const AArch64TargetMachine &TM,
67  const AArch64Subtarget &STI,
68  const AArch64RegisterBankInfo &RBI);
69 
70  bool select(MachineInstr &I) override;
71  static const char *getName() { return DEBUG_TYPE; }
72 
73  void setupMF(MachineFunction &MF, GISelKnownBits *KB,
74  CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
75  BlockFrequencyInfo *BFI) override {
76  InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
77 
78  // hasFnAttribute() is expensive to call on every BRCOND selection, so
79  // cache it here for each run of the selector.
80  ProduceNonFlagSettingCondBr =
81  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
82  MFReturnAddr = Register();
83 
84  processPHIs(MF);
85  }
86 
87 private:
88  /// tblgen-erated 'select' implementation, used as the initial selector for
89  /// the patterns that don't require complex C++.
90  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
91 
92  // A lowering phase that runs before any selection attempts.
93  // Returns true if the instruction was modified.
94  bool preISelLower(MachineInstr &I);
95 
96  // An early selection function that runs before the selectImpl() call.
97  bool earlySelect(MachineInstr &I) const;
98 
99  // Do some preprocessing of G_PHIs before we begin selection.
100  void processPHIs(MachineFunction &MF);
101 
102  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
103 
104  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
105  bool contractCrossBankCopyIntoStore(MachineInstr &I,
107 
108  bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
109 
110  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
111  MachineRegisterInfo &MRI) const;
112  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
113  MachineRegisterInfo &MRI) const;
114 
115  ///@{
116  /// Helper functions for selectCompareBranch.
117  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
118  MachineIRBuilder &MIB) const;
119  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
120  MachineIRBuilder &MIB) const;
121  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
122  MachineIRBuilder &MIB) const;
123  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
124  MachineBasicBlock *DstMBB,
125  MachineIRBuilder &MIB) const;
126  ///@}
127 
128  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
129  MachineRegisterInfo &MRI) const;
130 
131  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
132  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
133 
134  // Helper to generate an equivalent of scalar_to_vector into a new register,
135  // returned via 'Dst'.
136  MachineInstr *emitScalarToVector(unsigned EltSize,
137  const TargetRegisterClass *DstRC,
138  Register Scalar,
139  MachineIRBuilder &MIRBuilder) const;
140 
141  /// Emit a lane insert into \p DstReg, or a new vector register if None is
142  /// provided.
143  ///
144  /// The lane inserted into is defined by \p LaneIdx. The vector source
145  /// register is given by \p SrcReg. The register containing the element is
146  /// given by \p EltReg.
147  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
148  Register EltReg, unsigned LaneIdx,
149  const RegisterBank &RB,
150  MachineIRBuilder &MIRBuilder) const;
151 
152  /// Emit a sequence of instructions representing a constant \p CV for a
153  /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
154  ///
155  /// \returns the last instruction in the sequence on success, and nullptr
156  /// otherwise.
157  MachineInstr *emitConstantVector(Register Dst, Constant *CV,
158  MachineIRBuilder &MIRBuilder,
159  MachineRegisterInfo &MRI) const;
160 
161  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
162  bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
163  MachineRegisterInfo &MRI) const;
164  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
167 
168  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
169  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
170  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
171  bool selectSplitVectorUnmerge(MachineInstr &I,
172  MachineRegisterInfo &MRI) const;
173  bool selectIntrinsicWithSideEffects(MachineInstr &I,
174  MachineRegisterInfo &MRI) const;
175  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
176  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
177  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
178  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
179  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
180  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
181  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
182  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
183 
184  unsigned emitConstantPoolEntry(const Constant *CPVal,
185  MachineFunction &MF) const;
186  MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
187  MachineIRBuilder &MIRBuilder) const;
188 
189  // Emit a vector concat operation.
190  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
191  Register Op2,
192  MachineIRBuilder &MIRBuilder) const;
193 
194  // Emit an integer compare between LHS and RHS, which checks for Predicate.
195  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
197  MachineIRBuilder &MIRBuilder) const;
198 
199  /// Emit a floating point comparison between \p LHS and \p RHS.
200  /// \p Pred if given is the intended predicate to use.
201  MachineInstr *emitFPCompare(Register LHS, Register RHS,
202  MachineIRBuilder &MIRBuilder,
204 
205  MachineInstr *emitInstr(unsigned Opcode,
206  std::initializer_list<llvm::DstOp> DstOps,
207  std::initializer_list<llvm::SrcOp> SrcOps,
208  MachineIRBuilder &MIRBuilder,
209  const ComplexRendererFns &RenderFns = None) const;
210  /// Helper function to emit an add or sub instruction.
211  ///
212  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
213  /// in a specific order.
214  ///
215  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
216  ///
217  /// \code
218  /// const std::array<std::array<unsigned, 2>, 4> Table {
219  /// {{AArch64::ADDXri, AArch64::ADDWri},
220  /// {AArch64::ADDXrs, AArch64::ADDWrs},
221  /// {AArch64::ADDXrr, AArch64::ADDWrr},
222  /// {AArch64::SUBXri, AArch64::SUBWri},
223  /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
224  /// \endcode
225  ///
226  /// Each row in the table corresponds to a different addressing mode. Each
227  /// column corresponds to a different register size.
228  ///
229  /// \attention Rows must be structured as follows:
230  /// - Row 0: The ri opcode variants
231  /// - Row 1: The rs opcode variants
232  /// - Row 2: The rr opcode variants
233  /// - Row 3: The ri opcode variants for negative immediates
234  /// - Row 4: The rx opcode variants
235  ///
236  /// \attention Columns must be structured as follows:
237  /// - Column 0: The 64-bit opcode variants
238  /// - Column 1: The 32-bit opcode variants
239  ///
240  /// \p Dst is the destination register of the binop to emit.
241  /// \p LHS is the left-hand operand of the binop to emit.
242  /// \p RHS is the right-hand operand of the binop to emit.
243  MachineInstr *emitAddSub(
244  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
245  Register Dst, MachineOperand &LHS, MachineOperand &RHS,
246  MachineIRBuilder &MIRBuilder) const;
247  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
248  MachineOperand &RHS,
249  MachineIRBuilder &MIRBuilder) const;
250  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
251  MachineIRBuilder &MIRBuilder) const;
252  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
253  MachineIRBuilder &MIRBuilder) const;
254  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
255  MachineIRBuilder &MIRBuilder) const;
256  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
257  MachineIRBuilder &MIRBuilder) const;
258  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
260  MachineIRBuilder &MIRBuilder) const;
261  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
262  const RegisterBank &DstRB, LLT ScalarTy,
263  Register VecReg, unsigned LaneIdx,
264  MachineIRBuilder &MIRBuilder) const;
265 
266  /// Emit a CSet for an integer compare.
267  ///
268  /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
269  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
270  MachineIRBuilder &MIRBuilder,
271  Register SrcReg = AArch64::WZR) const;
272  /// Emit a CSet for a FP compare.
273  ///
274  /// \p Dst is expected to be a 32-bit scalar register.
275  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
276  MachineIRBuilder &MIRBuilder) const;
277 
278  /// Emit the overflow op for \p Opcode.
279  ///
280  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
281  /// G_USUBO, etc.
282  std::pair<MachineInstr *, AArch64CC::CondCode>
283  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
284  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
285 
286  /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
287  /// \p IsNegative is true if the test should be "not zero".
288  /// This will also optimize the test bit instruction when possible.
289  MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
290  MachineBasicBlock *DstMBB,
291  MachineIRBuilder &MIB) const;
292 
293  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
294  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
295  MachineBasicBlock *DestMBB,
296  MachineIRBuilder &MIB) const;
297 
298  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
299  // We use these manually instead of using the importer since it doesn't
300  // support SDNodeXForm.
301  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
302  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
303  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
304  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
305 
306  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
307  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
308  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
309 
310  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
311  unsigned Size) const;
312 
313  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
314  return selectAddrModeUnscaled(Root, 1);
315  }
316  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
317  return selectAddrModeUnscaled(Root, 2);
318  }
319  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
320  return selectAddrModeUnscaled(Root, 4);
321  }
322  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
323  return selectAddrModeUnscaled(Root, 8);
324  }
325  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
326  return selectAddrModeUnscaled(Root, 16);
327  }
328 
329  /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
330  /// from complex pattern matchers like selectAddrModeIndexed().
331  ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
332  MachineRegisterInfo &MRI) const;
333 
334  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
335  unsigned Size) const;
336  template <int Width>
337  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
338  return selectAddrModeIndexed(Root, Width / 8);
339  }
340 
341  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
342  const MachineRegisterInfo &MRI) const;
343  ComplexRendererFns
344  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
345  unsigned SizeInBytes) const;
346 
347  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
348  /// or not a shift + extend should be folded into an addressing mode. Returns
349  /// None when this is not profitable or possible.
350  ComplexRendererFns
351  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
352  MachineOperand &Offset, unsigned SizeInBytes,
353  bool WantsExt) const;
354  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
355  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
356  unsigned SizeInBytes) const;
357  template <int Width>
358  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
359  return selectAddrModeXRO(Root, Width / 8);
360  }
361 
362  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
363  unsigned SizeInBytes) const;
364  template <int Width>
365  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
366  return selectAddrModeWRO(Root, Width / 8);
367  }
368 
369  ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
370 
371  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
372  return selectShiftedRegister(Root);
373  }
374 
375  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
376  // TODO: selectShiftedRegister should allow for rotates on logical shifts.
377  // For now, make them the same. The only difference between the two is that
378  // logical shifts are allowed to fold in rotates. Otherwise, these are
379  // functionally the same.
380  return selectShiftedRegister(Root);
381  }
382 
383  /// Given an extend instruction, determine the correct shift-extend type for
384  /// that instruction.
385  ///
386  /// If the instruction is going to be used in a load or store, pass
387  /// \p IsLoadStore = true.
389  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
390  bool IsLoadStore = false) const;
391 
392  /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
393  ///
394  /// \returns Either \p Reg if no change was necessary, or the new register
395  /// created by moving \p Reg.
396  ///
397  /// Note: This uses emitCopy right now.
398  Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
399  MachineIRBuilder &MIB) const;
400 
401  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
402 
403  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
404  int OpIdx = -1) const;
405  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
406  int OpIdx = -1) const;
407  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
408  int OpIdx = -1) const;
409  void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
410  int OpIdx = -1) const;
411  void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
412  int OpIdx = -1) const;
413  void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
414  int OpIdx = -1) const;
415 
416  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
417  void materializeLargeCMVal(MachineInstr &I, const Value *V,
418  unsigned OpFlags) const;
419 
420  // Optimization methods.
421  bool tryOptSelect(MachineInstr &MI) const;
422  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
424  MachineIRBuilder &MIRBuilder) const;
425 
426  /// Return true if \p MI is a load or store of \p NumBytes bytes.
427  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
428 
429  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
430  /// register zeroed out. In other words, the result of MI has been explicitly
431  /// zero extended.
432  bool isDef32(const MachineInstr &MI) const;
433 
434  const AArch64TargetMachine &TM;
435  const AArch64Subtarget &STI;
436  const AArch64InstrInfo &TII;
437  const AArch64RegisterInfo &TRI;
438  const AArch64RegisterBankInfo &RBI;
439 
440  bool ProduceNonFlagSettingCondBr = false;
441 
442  // Some cached values used during selection.
443  // We use LR as a live-in register, and we keep track of it here as it can be
444  // clobbered by calls.
445  Register MFReturnAddr;
446 
447 #define GET_GLOBALISEL_PREDICATES_DECL
448 #include "AArch64GenGlobalISel.inc"
449 #undef GET_GLOBALISEL_PREDICATES_DECL
450 
451 // We declare the temporaries used by selectImpl() in the class to minimize the
452 // cost of constructing placeholder values.
453 #define GET_GLOBALISEL_TEMPORARIES_DECL
454 #include "AArch64GenGlobalISel.inc"
455 #undef GET_GLOBALISEL_TEMPORARIES_DECL
456 };
457 
458 } // end anonymous namespace
459 
460 #define GET_GLOBALISEL_IMPL
461 #include "AArch64GenGlobalISel.inc"
462 #undef GET_GLOBALISEL_IMPL
463 
464 AArch64InstructionSelector::AArch64InstructionSelector(
465  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
466  const AArch64RegisterBankInfo &RBI)
467  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
468  TRI(*STI.getRegisterInfo()), RBI(RBI),
470 #include "AArch64GenGlobalISel.inc"
473 #include "AArch64GenGlobalISel.inc"
475 {
476 }
477 
478 // FIXME: This should be target-independent, inferred from the types declared
479 // for each class in the bank.
480 static const TargetRegisterClass *
481 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
482  const RegisterBankInfo &RBI,
483  bool GetAllRegSet = false) {
484  if (RB.getID() == AArch64::GPRRegBankID) {
485  if (Ty.getSizeInBits() <= 32)
486  return GetAllRegSet ? &AArch64::GPR32allRegClass
487  : &AArch64::GPR32RegClass;
488  if (Ty.getSizeInBits() == 64)
489  return GetAllRegSet ? &AArch64::GPR64allRegClass
490  : &AArch64::GPR64RegClass;
491  return nullptr;
492  }
493 
494  if (RB.getID() == AArch64::FPRRegBankID) {
495  if (Ty.getSizeInBits() <= 16)
496  return &AArch64::FPR16RegClass;
497  if (Ty.getSizeInBits() == 32)
498  return &AArch64::FPR32RegClass;
499  if (Ty.getSizeInBits() == 64)
500  return &AArch64::FPR64RegClass;
501  if (Ty.getSizeInBits() == 128)
502  return &AArch64::FPR128RegClass;
503  return nullptr;
504  }
505 
506  return nullptr;
507 }
508 
509 /// Given a register bank, and size in bits, return the smallest register class
510 /// that can represent that combination.
511 static const TargetRegisterClass *
512 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
513  bool GetAllRegSet = false) {
514  unsigned RegBankID = RB.getID();
515 
516  if (RegBankID == AArch64::GPRRegBankID) {
517  if (SizeInBits <= 32)
518  return GetAllRegSet ? &AArch64::GPR32allRegClass
519  : &AArch64::GPR32RegClass;
520  if (SizeInBits == 64)
521  return GetAllRegSet ? &AArch64::GPR64allRegClass
522  : &AArch64::GPR64RegClass;
523  }
524 
525  if (RegBankID == AArch64::FPRRegBankID) {
526  switch (SizeInBits) {
527  default:
528  return nullptr;
529  case 8:
530  return &AArch64::FPR8RegClass;
531  case 16:
532  return &AArch64::FPR16RegClass;
533  case 32:
534  return &AArch64::FPR32RegClass;
535  case 64:
536  return &AArch64::FPR64RegClass;
537  case 128:
538  return &AArch64::FPR128RegClass;
539  }
540  }
541 
542  return nullptr;
543 }
544 
545 /// Returns the correct subregister to use for a given register class.
547  const TargetRegisterInfo &TRI, unsigned &SubReg) {
548  switch (TRI.getRegSizeInBits(*RC)) {
549  case 8:
550  SubReg = AArch64::bsub;
551  break;
552  case 16:
553  SubReg = AArch64::hsub;
554  break;
555  case 32:
556  if (RC != &AArch64::FPR32RegClass)
557  SubReg = AArch64::sub_32;
558  else
559  SubReg = AArch64::ssub;
560  break;
561  case 64:
562  SubReg = AArch64::dsub;
563  break;
564  default:
565  LLVM_DEBUG(
566  dbgs() << "Couldn't find appropriate subregister for register class.");
567  return false;
568  }
569 
570  return true;
571 }
572 
573 /// Returns the minimum size the given register bank can hold.
574 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
575  switch (RB.getID()) {
576  case AArch64::GPRRegBankID:
577  return 32;
578  case AArch64::FPRRegBankID:
579  return 8;
580  default:
581  llvm_unreachable("Tried to get minimum size for unknown register bank.");
582  }
583 }
584 
586  auto &MI = *Root.getParent();
587  auto &MBB = *MI.getParent();
588  auto &MF = *MBB.getParent();
589  auto &MRI = MF.getRegInfo();
590  uint64_t Immed;
591  if (Root.isImm())
592  Immed = Root.getImm();
593  else if (Root.isCImm())
594  Immed = Root.getCImm()->getZExtValue();
595  else if (Root.isReg()) {
596  auto ValAndVReg =
598  if (!ValAndVReg)
599  return None;
600  Immed = ValAndVReg->Value.getSExtValue();
601  } else
602  return None;
603  return Immed;
604 }
605 
606 /// Check whether \p I is a currently unsupported binary operation:
607 /// - it has an unsized type
608 /// - an operand is not a vreg
609 /// - all operands are not in the same bank
610 /// These are checks that should someday live in the verifier, but right now,
611 /// these are mostly limitations of the aarch64 selector.
612 static bool unsupportedBinOp(const MachineInstr &I,
613  const AArch64RegisterBankInfo &RBI,
614  const MachineRegisterInfo &MRI,
615  const AArch64RegisterInfo &TRI) {
616  LLT Ty = MRI.getType(I.getOperand(0).getReg());
617  if (!Ty.isValid()) {
618  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
619  return true;
620  }
621 
622  const RegisterBank *PrevOpBank = nullptr;
623  for (auto &MO : I.operands()) {
624  // FIXME: Support non-register operands.
625  if (!MO.isReg()) {
626  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
627  return true;
628  }
629 
630  // FIXME: Can generic operations have physical registers operands? If
631  // so, this will need to be taught about that, and we'll need to get the
632  // bank out of the minimal class for the register.
633  // Either way, this needs to be documented (and possibly verified).
634  if (!Register::isVirtualRegister(MO.getReg())) {
635  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
636  return true;
637  }
638 
639  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
640  if (!OpBank) {
641  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
642  return true;
643  }
644 
645  if (PrevOpBank && OpBank != PrevOpBank) {
646  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
647  return true;
648  }
649  PrevOpBank = OpBank;
650  }
651  return false;
652 }
653 
654 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
655 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
656 /// and of size \p OpSize.
657 /// \returns \p GenericOpc if the combination is unsupported.
658 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
659  unsigned OpSize) {
660  switch (RegBankID) {
661  case AArch64::GPRRegBankID:
662  if (OpSize == 32) {
663  switch (GenericOpc) {
664  case TargetOpcode::G_SHL:
665  return AArch64::LSLVWr;
666  case TargetOpcode::G_LSHR:
667  return AArch64::LSRVWr;
668  case TargetOpcode::G_ASHR:
669  return AArch64::ASRVWr;
670  default:
671  return GenericOpc;
672  }
673  } else if (OpSize == 64) {
674  switch (GenericOpc) {
675  case TargetOpcode::G_PTR_ADD:
676  return AArch64::ADDXrr;
677  case TargetOpcode::G_SHL:
678  return AArch64::LSLVXr;
679  case TargetOpcode::G_LSHR:
680  return AArch64::LSRVXr;
681  case TargetOpcode::G_ASHR:
682  return AArch64::ASRVXr;
683  default:
684  return GenericOpc;
685  }
686  }
687  break;
688  case AArch64::FPRRegBankID:
689  switch (OpSize) {
690  case 32:
691  switch (GenericOpc) {
692  case TargetOpcode::G_FADD:
693  return AArch64::FADDSrr;
694  case TargetOpcode::G_FSUB:
695  return AArch64::FSUBSrr;
696  case TargetOpcode::G_FMUL:
697  return AArch64::FMULSrr;
698  case TargetOpcode::G_FDIV:
699  return AArch64::FDIVSrr;
700  default:
701  return GenericOpc;
702  }
703  case 64:
704  switch (GenericOpc) {
705  case TargetOpcode::G_FADD:
706  return AArch64::FADDDrr;
707  case TargetOpcode::G_FSUB:
708  return AArch64::FSUBDrr;
709  case TargetOpcode::G_FMUL:
710  return AArch64::FMULDrr;
711  case TargetOpcode::G_FDIV:
712  return AArch64::FDIVDrr;
713  case TargetOpcode::G_OR:
714  return AArch64::ORRv8i8;
715  default:
716  return GenericOpc;
717  }
718  }
719  break;
720  }
721  return GenericOpc;
722 }
723 
724 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
725 /// appropriate for the (value) register bank \p RegBankID and of memory access
726 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
727 /// addressing mode (e.g., LDRXui).
728 /// \returns \p GenericOpc if the combination is unsupported.
729 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
730  unsigned OpSize) {
731  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
732  switch (RegBankID) {
733  case AArch64::GPRRegBankID:
734  switch (OpSize) {
735  case 8:
736  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
737  case 16:
738  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
739  case 32:
740  return isStore ? AArch64::STRWui : AArch64::LDRWui;
741  case 64:
742  return isStore ? AArch64::STRXui : AArch64::LDRXui;
743  }
744  break;
745  case AArch64::FPRRegBankID:
746  switch (OpSize) {
747  case 8:
748  return isStore ? AArch64::STRBui : AArch64::LDRBui;
749  case 16:
750  return isStore ? AArch64::STRHui : AArch64::LDRHui;
751  case 32:
752  return isStore ? AArch64::STRSui : AArch64::LDRSui;
753  case 64:
754  return isStore ? AArch64::STRDui : AArch64::LDRDui;
755  }
756  break;
757  }
758  return GenericOpc;
759 }
760 
761 #ifndef NDEBUG
762 /// Helper function that verifies that we have a valid copy at the end of
763 /// selectCopy. Verifies that the source and dest have the expected sizes and
764 /// then returns true.
765 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
766  const MachineRegisterInfo &MRI,
767  const TargetRegisterInfo &TRI,
768  const RegisterBankInfo &RBI) {
769  const Register DstReg = I.getOperand(0).getReg();
770  const Register SrcReg = I.getOperand(1).getReg();
771  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
772  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
773 
774  // Make sure the size of the source and dest line up.
775  assert(
776  (DstSize == SrcSize ||
777  // Copies are a mean to setup initial types, the number of
778  // bits may not exactly match.
779  (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
780  // Copies are a mean to copy bits around, as long as we are
781  // on the same register class, that's fine. Otherwise, that
782  // means we need some SUBREG_TO_REG or AND & co.
783  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
784  "Copy with different width?!");
785 
786  // Check the size of the destination.
787  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
788  "GPRs cannot get more than 64-bit width values");
789 
790  return true;
791 }
792 #endif
793 
794 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
795 /// to \p *To.
796 ///
797 /// E.g "To = COPY SrcReg:SubReg"
799  const RegisterBankInfo &RBI, Register SrcReg,
800  const TargetRegisterClass *To, unsigned SubReg) {
801  assert(SrcReg.isValid() && "Expected a valid source register?");
802  assert(To && "Destination register class cannot be null");
803  assert(SubReg && "Expected a valid subregister");
804 
805  MachineIRBuilder MIB(I);
806  auto SubRegCopy =
807  MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
808  MachineOperand &RegOp = I.getOperand(1);
809  RegOp.setReg(SubRegCopy.getReg(0));
810 
811  // It's possible that the destination register won't be constrained. Make
812  // sure that happens.
813  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
814  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
815 
816  return true;
817 }
818 
819 /// Helper function to get the source and destination register classes for a
820 /// copy. Returns a std::pair containing the source register class for the
821 /// copy, and the destination register class for the copy. If a register class
822 /// cannot be determined, then it will be nullptr.
823 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
826  const RegisterBankInfo &RBI) {
827  Register DstReg = I.getOperand(0).getReg();
828  Register SrcReg = I.getOperand(1).getReg();
829  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
830  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
831  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
832  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
833 
834  // Special casing for cross-bank copies of s1s. We can technically represent
835  // a 1-bit value with any size of register. The minimum size for a GPR is 32
836  // bits. So, we need to put the FPR on 32 bits as well.
837  //
838  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
839  // then we can pull it into the helpers that get the appropriate class for a
840  // register bank. Or make a new helper that carries along some constraint
841  // information.
842  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
843  SrcSize = DstSize = 32;
844 
845  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
846  getMinClassForRegBank(DstRegBank, DstSize, true)};
847 }
848 
851  const RegisterBankInfo &RBI) {
852  Register DstReg = I.getOperand(0).getReg();
853  Register SrcReg = I.getOperand(1).getReg();
854  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
855  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
856 
857  // Find the correct register classes for the source and destination registers.
858  const TargetRegisterClass *SrcRC;
859  const TargetRegisterClass *DstRC;
860  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
861 
862  if (!DstRC) {
863  LLVM_DEBUG(dbgs() << "Unexpected dest size "
864  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
865  return false;
866  }
867 
868  // A couple helpers below, for making sure that the copy we produce is valid.
869 
870  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
871  // to verify that the src and dst are the same size, since that's handled by
872  // the SUBREG_TO_REG.
873  bool KnownValid = false;
874 
875  // Returns true, or asserts if something we don't expect happens. Instead of
876  // returning true, we return isValidCopy() to ensure that we verify the
877  // result.
878  auto CheckCopy = [&]() {
879  // If we have a bitcast or something, we can't have physical registers.
880  assert((I.isCopy() ||
881  (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
882  !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
883  "No phys reg on generic operator!");
884  bool ValidCopy = true;
885 #ifndef NDEBUG
886  ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
887  assert(ValidCopy && "Invalid copy.");
888 #endif
889  (void)KnownValid;
890  return ValidCopy;
891  };
892 
893  // Is this a copy? If so, then we may need to insert a subregister copy.
894  if (I.isCopy()) {
895  // Yes. Check if there's anything to fix up.
896  if (!SrcRC) {
897  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
898  return false;
899  }
900 
901  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
902  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
903  unsigned SubReg;
904 
905  // If the source bank doesn't support a subregister copy small enough,
906  // then we first need to copy to the destination bank.
907  if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
908  const TargetRegisterClass *DstTempRC =
909  getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
910  getSubRegForClass(DstRC, TRI, SubReg);
911 
912  MachineIRBuilder MIB(I);
913  auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
914  copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
915  } else if (SrcSize > DstSize) {
916  // If the source register is bigger than the destination we need to
917  // perform a subregister copy.
918  const TargetRegisterClass *SubRegRC =
919  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
920  getSubRegForClass(SubRegRC, TRI, SubReg);
921  copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
922  } else if (DstSize > SrcSize) {
923  // If the destination register is bigger than the source we need to do
924  // a promotion using SUBREG_TO_REG.
925  const TargetRegisterClass *PromotionRC =
926  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
927  getSubRegForClass(SrcRC, TRI, SubReg);
928 
929  Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
930  BuildMI(*I.getParent(), I, I.getDebugLoc(),
931  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
932  .addImm(0)
933  .addUse(SrcReg)
934  .addImm(SubReg);
935  MachineOperand &RegOp = I.getOperand(1);
936  RegOp.setReg(PromoteReg);
937 
938  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
939  KnownValid = true;
940  }
941 
942  // If the destination is a physical register, then there's nothing to
943  // change, so we're done.
944  if (Register::isPhysicalRegister(DstReg))
945  return CheckCopy();
946  }
947 
948  // No need to constrain SrcReg. It will get constrained when we hit another
949  // of its use or its defs. Copies do not have constraints.
950  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
951  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
952  << " operand\n");
953  return false;
954  }
955  I.setDesc(TII.get(AArch64::COPY));
956  return CheckCopy();
957 }
958 
959 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
960  if (!DstTy.isScalar() || !SrcTy.isScalar())
961  return GenericOpc;
962 
963  const unsigned DstSize = DstTy.getSizeInBits();
964  const unsigned SrcSize = SrcTy.getSizeInBits();
965 
966  switch (DstSize) {
967  case 32:
968  switch (SrcSize) {
969  case 32:
970  switch (GenericOpc) {
971  case TargetOpcode::G_SITOFP:
972  return AArch64::SCVTFUWSri;
973  case TargetOpcode::G_UITOFP:
974  return AArch64::UCVTFUWSri;
975  case TargetOpcode::G_FPTOSI:
976  return AArch64::FCVTZSUWSr;
977  case TargetOpcode::G_FPTOUI:
978  return AArch64::FCVTZUUWSr;
979  default:
980  return GenericOpc;
981  }
982  case 64:
983  switch (GenericOpc) {
984  case TargetOpcode::G_SITOFP:
985  return AArch64::SCVTFUXSri;
986  case TargetOpcode::G_UITOFP:
987  return AArch64::UCVTFUXSri;
988  case TargetOpcode::G_FPTOSI:
989  return AArch64::FCVTZSUWDr;
990  case TargetOpcode::G_FPTOUI:
991  return AArch64::FCVTZUUWDr;
992  default:
993  return GenericOpc;
994  }
995  default:
996  return GenericOpc;
997  }
998  case 64:
999  switch (SrcSize) {
1000  case 32:
1001  switch (GenericOpc) {
1002  case TargetOpcode::G_SITOFP:
1003  return AArch64::SCVTFUWDri;
1004  case TargetOpcode::G_UITOFP:
1005  return AArch64::UCVTFUWDri;
1006  case TargetOpcode::G_FPTOSI:
1007  return AArch64::FCVTZSUXSr;
1008  case TargetOpcode::G_FPTOUI:
1009  return AArch64::FCVTZUUXSr;
1010  default:
1011  return GenericOpc;
1012  }
1013  case 64:
1014  switch (GenericOpc) {
1015  case TargetOpcode::G_SITOFP:
1016  return AArch64::SCVTFUXDri;
1017  case TargetOpcode::G_UITOFP:
1018  return AArch64::UCVTFUXDri;
1019  case TargetOpcode::G_FPTOSI:
1020  return AArch64::FCVTZSUXDr;
1021  case TargetOpcode::G_FPTOUI:
1022  return AArch64::FCVTZUUXDr;
1023  default:
1024  return GenericOpc;
1025  }
1026  default:
1027  return GenericOpc;
1028  }
1029  default:
1030  return GenericOpc;
1031  };
1032  return GenericOpc;
1033 }
1034 
1035 MachineInstr *
1036 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1037  Register False, AArch64CC::CondCode CC,
1038  MachineIRBuilder &MIB) const {
1039  MachineRegisterInfo &MRI = *MIB.getMRI();
1040  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1041  RBI.getRegBank(True, MRI, TRI)->getID() &&
1042  "Expected both select operands to have the same regbank?");
1043  LLT Ty = MRI.getType(True);
1044  if (Ty.isVector())
1045  return nullptr;
1046  const unsigned Size = Ty.getSizeInBits();
1047  assert((Size == 32 || Size == 64) &&
1048  "Expected 32 bit or 64 bit select only?");
1049  const bool Is32Bit = Size == 32;
1050  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1051  unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1052  auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1053  constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1054  return &*FCSel;
1055  }
1056 
1057  // By default, we'll try and emit a CSEL.
1058  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1059  bool Optimized = false;
1060  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1061  &Optimized](Register &Reg, Register &OtherReg,
1062  bool Invert) {
1063  if (Optimized)
1064  return false;
1065 
1066  // Attempt to fold:
1067  //
1068  // %sub = G_SUB 0, %x
1069  // %select = G_SELECT cc, %reg, %sub
1070  //
1071  // Into:
1072  // %select = CSNEG %reg, %x, cc
1073  Register MatchReg;
1074  if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1075  Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1076  Reg = MatchReg;
1077  if (Invert) {
1079  std::swap(Reg, OtherReg);
1080  }
1081  return true;
1082  }
1083 
1084  // Attempt to fold:
1085  //
1086  // %xor = G_XOR %x, -1
1087  // %select = G_SELECT cc, %reg, %xor
1088  //
1089  // Into:
1090  // %select = CSINV %reg, %x, cc
1091  if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1092  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1093  Reg = MatchReg;
1094  if (Invert) {
1096  std::swap(Reg, OtherReg);
1097  }
1098  return true;
1099  }
1100 
1101  // Attempt to fold:
1102  //
1103  // %add = G_ADD %x, 1
1104  // %select = G_SELECT cc, %reg, %add
1105  //
1106  // Into:
1107  // %select = CSINC %reg, %x, cc
1108  if (mi_match(Reg, MRI,
1109  m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1110  m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1111  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1112  Reg = MatchReg;
1113  if (Invert) {
1115  std::swap(Reg, OtherReg);
1116  }
1117  return true;
1118  }
1119 
1120  return false;
1121  };
1122 
1123  // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1124  // true/false values are constants.
1125  // FIXME: All of these patterns already exist in tablegen. We should be
1126  // able to import these.
1127  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1128  &Optimized]() {
1129  if (Optimized)
1130  return false;
1131  auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1132  auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1133  if (!TrueCst && !FalseCst)
1134  return false;
1135 
1136  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1137  if (TrueCst && FalseCst) {
1138  int64_t T = TrueCst->Value.getSExtValue();
1139  int64_t F = FalseCst->Value.getSExtValue();
1140 
1141  if (T == 0 && F == 1) {
1142  // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1143  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1144  True = ZReg;
1145  False = ZReg;
1146  return true;
1147  }
1148 
1149  if (T == 0 && F == -1) {
1150  // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1151  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1152  True = ZReg;
1153  False = ZReg;
1154  return true;
1155  }
1156  }
1157 
1158  if (TrueCst) {
1159  int64_t T = TrueCst->Value.getSExtValue();
1160  if (T == 1) {
1161  // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1162  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1163  True = False;
1164  False = ZReg;
1166  return true;
1167  }
1168 
1169  if (T == -1) {
1170  // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1171  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1172  True = False;
1173  False = ZReg;
1175  return true;
1176  }
1177  }
1178 
1179  if (FalseCst) {
1180  int64_t F = FalseCst->Value.getSExtValue();
1181  if (F == 1) {
1182  // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1183  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1184  False = ZReg;
1185  return true;
1186  }
1187 
1188  if (F == -1) {
1189  // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1190  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1191  False = ZReg;
1192  return true;
1193  }
1194  }
1195  return false;
1196  };
1197 
1198  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1199  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1200  Optimized |= TryOptSelectCst();
1201  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1203  return &*SelectInst;
1204 }
1205 
1207  switch (P) {
1208  default:
1209  llvm_unreachable("Unknown condition code!");
1210  case CmpInst::ICMP_NE:
1211  return AArch64CC::NE;
1212  case CmpInst::ICMP_EQ:
1213  return AArch64CC::EQ;
1214  case CmpInst::ICMP_SGT:
1215  return AArch64CC::GT;
1216  case CmpInst::ICMP_SGE:
1217  return AArch64CC::GE;
1218  case CmpInst::ICMP_SLT:
1219  return AArch64CC::LT;
1220  case CmpInst::ICMP_SLE:
1221  return AArch64CC::LE;
1222  case CmpInst::ICMP_UGT:
1223  return AArch64CC::HI;
1224  case CmpInst::ICMP_UGE:
1225  return AArch64CC::HS;
1226  case CmpInst::ICMP_ULT:
1227  return AArch64CC::LO;
1228  case CmpInst::ICMP_ULE:
1229  return AArch64CC::LS;
1230  }
1231 }
1232 
1235  AArch64CC::CondCode &CondCode2) {
1236  CondCode2 = AArch64CC::AL;
1237  switch (P) {
1238  default:
1239  llvm_unreachable("Unknown FP condition!");
1240  case CmpInst::FCMP_OEQ:
1242  break;
1243  case CmpInst::FCMP_OGT:
1245  break;
1246  case CmpInst::FCMP_OGE:
1248  break;
1249  case CmpInst::FCMP_OLT:
1251  break;
1252  case CmpInst::FCMP_OLE:
1254  break;
1255  case CmpInst::FCMP_ONE:
1257  CondCode2 = AArch64CC::GT;
1258  break;
1259  case CmpInst::FCMP_ORD:
1261  break;
1262  case CmpInst::FCMP_UNO:
1264  break;
1265  case CmpInst::FCMP_UEQ:
1267  CondCode2 = AArch64CC::VS;
1268  break;
1269  case CmpInst::FCMP_UGT:
1271  break;
1272  case CmpInst::FCMP_UGE:
1274  break;
1275  case CmpInst::FCMP_ULT:
1277  break;
1278  case CmpInst::FCMP_ULE:
1280  break;
1281  case CmpInst::FCMP_UNE:
1283  break;
1284  }
1285 }
1286 
1287 /// Return a register which can be used as a bit to test in a TB(N)Z.
1288 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1290  assert(Reg.isValid() && "Expected valid register!");
1291  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1292  unsigned Opc = MI->getOpcode();
1293 
1294  if (!MI->getOperand(0).isReg() ||
1295  !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1296  break;
1297 
1298  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1299  //
1300  // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1301  // on the truncated x is the same as the bit number on x.
1302  if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1303  Opc == TargetOpcode::G_TRUNC) {
1304  Register NextReg = MI->getOperand(1).getReg();
1305  // Did we find something worth folding?
1306  if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1307  break;
1308 
1309  // NextReg is worth folding. Keep looking.
1310  Reg = NextReg;
1311  continue;
1312  }
1313 
1314  // Attempt to find a suitable operation with a constant on one side.
1316  Register TestReg;
1317  switch (Opc) {
1318  default:
1319  break;
1320  case TargetOpcode::G_AND:
1321  case TargetOpcode::G_XOR: {
1322  TestReg = MI->getOperand(1).getReg();
1323  Register ConstantReg = MI->getOperand(2).getReg();
1324  auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1325  if (!VRegAndVal) {
1326  // AND commutes, check the other side for a constant.
1327  // FIXME: Can we canonicalize the constant so that it's always on the
1328  // same side at some point earlier?
1329  std::swap(ConstantReg, TestReg);
1330  VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1331  }
1332  if (VRegAndVal)
1333  C = VRegAndVal->Value.getSExtValue();
1334  break;
1335  }
1336  case TargetOpcode::G_ASHR:
1337  case TargetOpcode::G_LSHR:
1338  case TargetOpcode::G_SHL: {
1339  TestReg = MI->getOperand(1).getReg();
1340  auto VRegAndVal =
1341  getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1342  if (VRegAndVal)
1343  C = VRegAndVal->Value.getSExtValue();
1344  break;
1345  }
1346  }
1347 
1348  // Didn't find a constant or viable register. Bail out of the loop.
1349  if (!C || !TestReg.isValid())
1350  break;
1351 
1352  // We found a suitable instruction with a constant. Check to see if we can
1353  // walk through the instruction.
1354  Register NextReg;
1355  unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1356  switch (Opc) {
1357  default:
1358  break;
1359  case TargetOpcode::G_AND:
1360  // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1361  if ((*C >> Bit) & 1)
1362  NextReg = TestReg;
1363  break;
1364  case TargetOpcode::G_SHL:
1365  // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1366  // the type of the register.
1367  if (*C <= Bit && (Bit - *C) < TestRegSize) {
1368  NextReg = TestReg;
1369  Bit = Bit - *C;
1370  }
1371  break;
1372  case TargetOpcode::G_ASHR:
1373  // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1374  // in x
1375  NextReg = TestReg;
1376  Bit = Bit + *C;
1377  if (Bit >= TestRegSize)
1378  Bit = TestRegSize - 1;
1379  break;
1380  case TargetOpcode::G_LSHR:
1381  // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1382  if ((Bit + *C) < TestRegSize) {
1383  NextReg = TestReg;
1384  Bit = Bit + *C;
1385  }
1386  break;
1387  case TargetOpcode::G_XOR:
1388  // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1389  // appropriate.
1390  //
1391  // e.g. If x' = xor x, c, and the b-th bit is set in c then
1392  //
1393  // tbz x', b -> tbnz x, b
1394  //
1395  // Because x' only has the b-th bit set if x does not.
1396  if ((*C >> Bit) & 1)
1397  Invert = !Invert;
1398  NextReg = TestReg;
1399  break;
1400  }
1401 
1402  // Check if we found anything worth folding.
1403  if (!NextReg.isValid())
1404  return Reg;
1405  Reg = NextReg;
1406  }
1407 
1408  return Reg;
1409 }
1410 
1411 MachineInstr *AArch64InstructionSelector::emitTestBit(
1412  Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1413  MachineIRBuilder &MIB) const {
1414  assert(TestReg.isValid());
1415  assert(ProduceNonFlagSettingCondBr &&
1416  "Cannot emit TB(N)Z with speculation tracking!");
1417  MachineRegisterInfo &MRI = *MIB.getMRI();
1418 
1419  // Attempt to optimize the test bit by walking over instructions.
1420  TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1421  LLT Ty = MRI.getType(TestReg);
1422  unsigned Size = Ty.getSizeInBits();
1423  assert(!Ty.isVector() && "Expected a scalar!");
1424  assert(Bit < 64 && "Bit is too large!");
1425 
1426  // When the test register is a 64-bit register, we have to narrow to make
1427  // TBNZW work.
1428  bool UseWReg = Bit < 32;
1429  unsigned NecessarySize = UseWReg ? 32 : 64;
1430  if (Size != NecessarySize)
1431  TestReg = moveScalarRegClass(
1432  TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1433  MIB);
1434 
1435  static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1436  {AArch64::TBZW, AArch64::TBNZW}};
1437  unsigned Opc = OpcTable[UseWReg][IsNegative];
1438  auto TestBitMI =
1439  MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1440  constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1441  return &*TestBitMI;
1442 }
1443 
1444 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1445  MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1446  MachineIRBuilder &MIB) const {
1447  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1448  // Given something like this:
1449  //
1450  // %x = ...Something...
1451  // %one = G_CONSTANT i64 1
1452  // %zero = G_CONSTANT i64 0
1453  // %and = G_AND %x, %one
1454  // %cmp = G_ICMP intpred(ne), %and, %zero
1455  // %cmp_trunc = G_TRUNC %cmp
1456  // G_BRCOND %cmp_trunc, %bb.3
1457  //
1458  // We want to try and fold the AND into the G_BRCOND and produce either a
1459  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1460  //
1461  // In this case, we'd get
1462  //
1463  // TBNZ %x %bb.3
1464  //
1465 
1466  // Check if the AND has a constant on its RHS which we can use as a mask.
1467  // If it's a power of 2, then it's the same as checking a specific bit.
1468  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1469  auto MaybeBit = getConstantVRegValWithLookThrough(
1470  AndInst.getOperand(2).getReg(), *MIB.getMRI());
1471  if (!MaybeBit)
1472  return false;
1473 
1474  int32_t Bit = MaybeBit->Value.exactLogBase2();
1475  if (Bit < 0)
1476  return false;
1477 
1478  Register TestReg = AndInst.getOperand(1).getReg();
1479 
1480  // Emit a TB(N)Z.
1481  emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1482  return true;
1483 }
1484 
1485 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1486  bool IsNegative,
1487  MachineBasicBlock *DestMBB,
1488  MachineIRBuilder &MIB) const {
1489  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1490  MachineRegisterInfo &MRI = *MIB.getMRI();
1491  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1492  AArch64::GPRRegBankID &&
1493  "Expected GPRs only?");
1494  auto Ty = MRI.getType(CompareReg);
1495  unsigned Width = Ty.getSizeInBits();
1496  assert(!Ty.isVector() && "Expected scalar only?");
1497  assert(Width <= 64 && "Expected width to be at most 64?");
1498  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1499  {AArch64::CBNZW, AArch64::CBNZX}};
1500  unsigned Opc = OpcTable[IsNegative][Width == 64];
1501  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1502  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1503  return &*BranchMI;
1504 }
1505 
1506 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1507  MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1508  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1509  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1510  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1511  // totally clean. Some of them require two branches to implement.
1512  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1513  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1514  Pred);
1515  AArch64CC::CondCode CC1, CC2;
1516  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1517  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1518  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1519  if (CC2 != AArch64CC::AL)
1520  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1521  I.eraseFromParent();
1522  return true;
1523 }
1524 
1525 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1526  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1527  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1528  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1529  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1530  //
1531  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1532  // instructions will not be produced, as they are conditional branch
1533  // instructions that do not set flags.
1534  if (!ProduceNonFlagSettingCondBr)
1535  return false;
1536 
1537  MachineRegisterInfo &MRI = *MIB.getMRI();
1538  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1539  auto Pred =
1540  static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1541  Register LHS = ICmp.getOperand(2).getReg();
1542  Register RHS = ICmp.getOperand(3).getReg();
1543 
1544  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1545  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1546  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1547 
1548  // When we can emit a TB(N)Z, prefer that.
1549  //
1550  // Handle non-commutative condition codes first.
1551  // Note that we don't want to do this when we have a G_AND because it can
1552  // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1553  if (VRegAndVal && !AndInst) {
1554  int64_t C = VRegAndVal->Value.getSExtValue();
1555 
1556  // When we have a greater-than comparison, we can just test if the msb is
1557  // zero.
1558  if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1559  uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1560  emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1561  I.eraseFromParent();
1562  return true;
1563  }
1564 
1565  // When we have a less than comparison, we can just test if the msb is not
1566  // zero.
1567  if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1568  uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1569  emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1570  I.eraseFromParent();
1571  return true;
1572  }
1573  }
1574 
1575  // Attempt to handle commutative condition codes. Right now, that's only
1576  // eq/ne.
1577  if (ICmpInst::isEquality(Pred)) {
1578  if (!VRegAndVal) {
1579  std::swap(RHS, LHS);
1580  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1581  AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1582  }
1583 
1584  if (VRegAndVal && VRegAndVal->Value == 0) {
1585  // If there's a G_AND feeding into this branch, try to fold it away by
1586  // emitting a TB(N)Z instead.
1587  //
1588  // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1589  // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1590  // would be redundant.
1591  if (AndInst &&
1592  tryOptAndIntoCompareBranch(
1593  *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1594  I.eraseFromParent();
1595  return true;
1596  }
1597 
1598  // Otherwise, try to emit a CB(N)Z instead.
1599  auto LHSTy = MRI.getType(LHS);
1600  if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1601  emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1602  I.eraseFromParent();
1603  return true;
1604  }
1605  }
1606  }
1607 
1608  return false;
1609 }
1610 
1611 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1612  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1613  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1614  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1615  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1616  return true;
1617 
1618  // Couldn't optimize. Emit a compare + a Bcc.
1619  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1620  auto PredOp = ICmp.getOperand(1);
1621  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1623  static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1624  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1625  I.eraseFromParent();
1626  return true;
1627 }
1628 
1629 bool AArch64InstructionSelector::selectCompareBranch(
1631  Register CondReg = I.getOperand(0).getReg();
1632  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1633  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1634  CondReg = CCMI->getOperand(1).getReg();
1635  CCMI = MRI.getVRegDef(CondReg);
1636  }
1637 
1638  // Try to select the G_BRCOND using whatever is feeding the condition if
1639  // possible.
1640  MachineIRBuilder MIB(I);
1641  unsigned CCMIOpc = CCMI->getOpcode();
1642  if (CCMIOpc == TargetOpcode::G_FCMP)
1643  return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1644  if (CCMIOpc == TargetOpcode::G_ICMP)
1645  return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1646 
1647  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1648  // instructions will not be produced, as they are conditional branch
1649  // instructions that do not set flags.
1650  if (ProduceNonFlagSettingCondBr) {
1651  emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1652  I.getOperand(1).getMBB(), MIB);
1653  I.eraseFromParent();
1654  return true;
1655  }
1656 
1657  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1658  auto TstMI =
1659  MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1660  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1661  auto Bcc = MIB.buildInstr(AArch64::Bcc)
1663  .addMBB(I.getOperand(1).getMBB());
1664  I.eraseFromParent();
1665  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1666 }
1667 
1668 /// Returns the element immediate value of a vector shift operand if found.
1669 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1672  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1673  MachineInstr *OpMI = MRI.getVRegDef(Reg);
1674  assert(OpMI && "Expected to find a vreg def for vector shift operand");
1675  return getAArch64VectorSplatScalar(*OpMI, MRI);
1676 }
1677 
1678 /// Matches and returns the shift immediate value for a SHL instruction given
1679 /// a shift operand.
1682  if (!ShiftImm)
1683  return None;
1684  // Check the immediate is in range for a SHL.
1685  int64_t Imm = *ShiftImm;
1686  if (Imm < 0)
1687  return None;
1688  switch (SrcTy.getElementType().getSizeInBits()) {
1689  default:
1690  LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1691  return None;
1692  case 8:
1693  if (Imm > 7)
1694  return None;
1695  break;
1696  case 16:
1697  if (Imm > 15)
1698  return None;
1699  break;
1700  case 32:
1701  if (Imm > 31)
1702  return None;
1703  break;
1704  case 64:
1705  if (Imm > 63)
1706  return None;
1707  break;
1708  }
1709  return Imm;
1710 }
1711 
1712 bool AArch64InstructionSelector::selectVectorSHL(
1714  assert(I.getOpcode() == TargetOpcode::G_SHL);
1715  Register DstReg = I.getOperand(0).getReg();
1716  const LLT Ty = MRI.getType(DstReg);
1717  Register Src1Reg = I.getOperand(1).getReg();
1718  Register Src2Reg = I.getOperand(2).getReg();
1719 
1720  if (!Ty.isVector())
1721  return false;
1722 
1723  // Check if we have a vector of constants on RHS that we can select as the
1724  // immediate form.
1725  Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1726 
1727  unsigned Opc = 0;
1728  if (Ty == LLT::vector(2, 64)) {
1729  Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1730  } else if (Ty == LLT::vector(4, 32)) {
1731  Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1732  } else if (Ty == LLT::vector(2, 32)) {
1733  Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1734  } else if (Ty == LLT::vector(4, 16)) {
1735  Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1736  } else if (Ty == LLT::vector(8, 16)) {
1737  Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1738  } else if (Ty == LLT::vector(16, 8)) {
1739  Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1740  } else if (Ty == LLT::vector(8, 8)) {
1741  Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1742  } else {
1743  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1744  return false;
1745  }
1746 
1747  MachineIRBuilder MIB(I);
1748  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1749  if (ImmVal)
1750  Shl.addImm(*ImmVal);
1751  else
1752  Shl.addUse(Src2Reg);
1754  I.eraseFromParent();
1755  return true;
1756 }
1757 
1758 bool AArch64InstructionSelector::selectVectorAshrLshr(
1760  assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1761  I.getOpcode() == TargetOpcode::G_LSHR);
1762  Register DstReg = I.getOperand(0).getReg();
1763  const LLT Ty = MRI.getType(DstReg);
1764  Register Src1Reg = I.getOperand(1).getReg();
1765  Register Src2Reg = I.getOperand(2).getReg();
1766 
1767  if (!Ty.isVector())
1768  return false;
1769 
1770  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1771 
1772  // We expect the immediate case to be lowered in the PostLegalCombiner to
1773  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1774 
1775  // There is not a shift right register instruction, but the shift left
1776  // register instruction takes a signed value, where negative numbers specify a
1777  // right shift.
1778 
1779  unsigned Opc = 0;
1780  unsigned NegOpc = 0;
1781  const TargetRegisterClass *RC =
1782  getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1783  if (Ty == LLT::vector(2, 64)) {
1784  Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1785  NegOpc = AArch64::NEGv2i64;
1786  } else if (Ty == LLT::vector(4, 32)) {
1787  Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1788  NegOpc = AArch64::NEGv4i32;
1789  } else if (Ty == LLT::vector(2, 32)) {
1790  Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1791  NegOpc = AArch64::NEGv2i32;
1792  } else if (Ty == LLT::vector(4, 16)) {
1793  Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1794  NegOpc = AArch64::NEGv4i16;
1795  } else if (Ty == LLT::vector(8, 16)) {
1796  Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1797  NegOpc = AArch64::NEGv8i16;
1798  } else if (Ty == LLT::vector(16, 8)) {
1799  Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1800  NegOpc = AArch64::NEGv16i8;
1801  } else if (Ty == LLT::vector(8, 8)) {
1802  Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1803  NegOpc = AArch64::NEGv8i8;
1804  } else {
1805  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1806  return false;
1807  }
1808 
1809  MachineIRBuilder MIB(I);
1810  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1812  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1814  I.eraseFromParent();
1815  return true;
1816 }
1817 
1818 bool AArch64InstructionSelector::selectVaStartAAPCS(
1820  return false;
1821 }
1822 
1823 bool AArch64InstructionSelector::selectVaStartDarwin(
1826  Register ListReg = I.getOperand(0).getReg();
1827 
1828  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1829 
1830  auto MIB =
1831  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1832  .addDef(ArgsAddrReg)
1833  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1834  .addImm(0)
1835  .addImm(0);
1836 
1838 
1839  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1840  .addUse(ArgsAddrReg)
1841  .addUse(ListReg)
1842  .addImm(0)
1843  .addMemOperand(*I.memoperands_begin());
1844 
1846  I.eraseFromParent();
1847  return true;
1848 }
1849 
1850 void AArch64InstructionSelector::materializeLargeCMVal(
1851  MachineInstr &I, const Value *V, unsigned OpFlags) const {
1852  MachineBasicBlock &MBB = *I.getParent();
1853  MachineFunction &MF = *MBB.getParent();
1855  MachineIRBuilder MIB(I);
1856 
1857  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1858  MovZ->addOperand(MF, I.getOperand(1));
1859  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1861  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1863 
1864  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1865  Register ForceDstReg) {
1866  Register DstReg = ForceDstReg
1867  ? ForceDstReg
1868  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1869  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1870  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1871  MovI->addOperand(MF, MachineOperand::CreateGA(
1872  GV, MovZ->getOperand(1).getOffset(), Flags));
1873  } else {
1874  MovI->addOperand(
1875  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1876  MovZ->getOperand(1).getOffset(), Flags));
1877  }
1878  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1880  return DstReg;
1881  };
1882  Register DstReg = BuildMovK(MovZ.getReg(0),
1884  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1885  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1886 }
1887 
1888 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1889  MachineBasicBlock &MBB = *I.getParent();
1890  MachineFunction &MF = *MBB.getParent();
1892 
1893  switch (I.getOpcode()) {
1894  case TargetOpcode::G_SHL:
1895  case TargetOpcode::G_ASHR:
1896  case TargetOpcode::G_LSHR: {
1897  // These shifts are legalized to have 64 bit shift amounts because we want
1898  // to take advantage of the existing imported selection patterns that assume
1899  // the immediates are s64s. However, if the shifted type is 32 bits and for
1900  // some reason we receive input GMIR that has an s64 shift amount that's not
1901  // a G_CONSTANT, insert a truncate so that we can still select the s32
1902  // register-register variant.
1903  Register SrcReg = I.getOperand(1).getReg();
1904  Register ShiftReg = I.getOperand(2).getReg();
1905  const LLT ShiftTy = MRI.getType(ShiftReg);
1906  const LLT SrcTy = MRI.getType(SrcReg);
1907  if (SrcTy.isVector())
1908  return false;
1909  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1910  if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1911  return false;
1912  auto *AmtMI = MRI.getVRegDef(ShiftReg);
1913  assert(AmtMI && "could not find a vreg definition for shift amount");
1914  if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1915  // Insert a subregister copy to implement a 64->32 trunc
1916  MachineIRBuilder MIB(I);
1917  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1918  .addReg(ShiftReg, 0, AArch64::sub_32);
1919  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1920  I.getOperand(2).setReg(Trunc.getReg(0));
1921  }
1922  return true;
1923  }
1924  case TargetOpcode::G_STORE: {
1925  bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1926  MachineOperand &SrcOp = I.getOperand(0);
1927  if (MRI.getType(SrcOp.getReg()).isPointer()) {
1928  // Allow matching with imported patterns for stores of pointers. Unlike
1929  // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1930  // and constrain.
1931  MachineIRBuilder MIB(I);
1932  auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1933  Register NewSrc = Copy.getReg(0);
1934  SrcOp.setReg(NewSrc);
1935  RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1936  Changed = true;
1937  }
1938  return Changed;
1939  }
1940  case TargetOpcode::G_PTR_ADD:
1941  return convertPtrAddToAdd(I, MRI);
1942  case TargetOpcode::G_LOAD: {
1943  // For scalar loads of pointers, we try to convert the dest type from p0
1944  // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1945  // conversion, this should be ok because all users should have been
1946  // selected already, so the type doesn't matter for them.
1947  Register DstReg = I.getOperand(0).getReg();
1948  const LLT DstTy = MRI.getType(DstReg);
1949  if (!DstTy.isPointer())
1950  return false;
1951  MRI.setType(DstReg, LLT::scalar(64));
1952  return true;
1953  }
1954  case AArch64::G_DUP: {
1955  // Convert the type from p0 to s64 to help selection.
1956  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1957  if (!DstTy.getElementType().isPointer())
1958  return false;
1959  MachineIRBuilder MIB(I);
1960  auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1961  MRI.setType(I.getOperand(0).getReg(),
1962  DstTy.changeElementType(LLT::scalar(64)));
1963  MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1964  I.getOperand(1).setReg(NewSrc.getReg(0));
1965  return true;
1966  }
1967  case TargetOpcode::G_UITOFP:
1968  case TargetOpcode::G_SITOFP: {
1969  // If both source and destination regbanks are FPR, then convert the opcode
1970  // to G_SITOF so that the importer can select it to an fpr variant.
1971  // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1972  // copy.
1973  Register SrcReg = I.getOperand(1).getReg();
1974  LLT SrcTy = MRI.getType(SrcReg);
1975  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1976  if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1977  return false;
1978 
1979  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1980  if (I.getOpcode() == TargetOpcode::G_SITOFP)
1981  I.setDesc(TII.get(AArch64::G_SITOF));
1982  else
1983  I.setDesc(TII.get(AArch64::G_UITOF));
1984  return true;
1985  }
1986  return false;
1987  }
1988  default:
1989  return false;
1990  }
1991 }
1992 
1993 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1994 /// them to a standard G_ADD with a COPY on the source.
1995 ///
1996 /// The motivation behind this is to expose the add semantics to the imported
1997 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1998 /// because the selector works bottom up, uses before defs. By the time we
1999 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2000 /// fold this into addressing modes and were therefore unsuccessful.
2001 bool AArch64InstructionSelector::convertPtrAddToAdd(
2003  assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2004  Register DstReg = I.getOperand(0).getReg();
2005  Register AddOp1Reg = I.getOperand(1).getReg();
2006  const LLT PtrTy = MRI.getType(DstReg);
2007  if (PtrTy.getAddressSpace() != 0)
2008  return false;
2009 
2010  MachineIRBuilder MIB(I);
2011  const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
2012  auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2013  // Set regbanks on the registers.
2014  if (PtrTy.isVector())
2015  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2016  else
2017  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2018 
2019  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2020  // %dst(intty) = G_ADD %intbase, off
2021  I.setDesc(TII.get(TargetOpcode::G_ADD));
2022  MRI.setType(DstReg, CastPtrTy);
2023  I.getOperand(1).setReg(PtrToInt.getReg(0));
2024  if (!select(*PtrToInt)) {
2025  LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2026  return false;
2027  }
2028 
2029  // Also take the opportunity here to try to do some optimization.
2030  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2031  Register NegatedReg;
2032  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2033  return true;
2034  I.getOperand(2).setReg(NegatedReg);
2035  I.setDesc(TII.get(TargetOpcode::G_SUB));
2036  return true;
2037 }
2038 
2039 bool AArch64InstructionSelector::earlySelectSHL(
2041  // We try to match the immediate variant of LSL, which is actually an alias
2042  // for a special case of UBFM. Otherwise, we fall back to the imported
2043  // selector which will match the register variant.
2044  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2045  const auto &MO = I.getOperand(2);
2046  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2047  if (!VRegAndVal)
2048  return false;
2049 
2050  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2051  if (DstTy.isVector())
2052  return false;
2053  bool Is64Bit = DstTy.getSizeInBits() == 64;
2054  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2055  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2056  MachineIRBuilder MIB(I);
2057 
2058  if (!Imm1Fn || !Imm2Fn)
2059  return false;
2060 
2061  auto NewI =
2062  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2063  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2064 
2065  for (auto &RenderFn : *Imm1Fn)
2066  RenderFn(NewI);
2067  for (auto &RenderFn : *Imm2Fn)
2068  RenderFn(NewI);
2069 
2070  I.eraseFromParent();
2071  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2072 }
2073 
2074 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2076  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2077  // If we're storing a scalar, it doesn't matter what register bank that
2078  // scalar is on. All that matters is the size.
2079  //
2080  // So, if we see something like this (with a 32-bit scalar as an example):
2081  //
2082  // %x:gpr(s32) = ... something ...
2083  // %y:fpr(s32) = COPY %x:gpr(s32)
2084  // G_STORE %y:fpr(s32)
2085  //
2086  // We can fix this up into something like this:
2087  //
2088  // G_STORE %x:gpr(s32)
2089  //
2090  // And then continue the selection process normally.
2091  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2092  if (!DefDstReg.isValid())
2093  return false;
2094  LLT DefDstTy = MRI.getType(DefDstReg);
2095  Register StoreSrcReg = I.getOperand(0).getReg();
2096  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2097 
2098  // If we get something strange like a physical register, then we shouldn't
2099  // go any further.
2100  if (!DefDstTy.isValid())
2101  return false;
2102 
2103  // Are the source and dst types the same size?
2104  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2105  return false;
2106 
2107  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2108  RBI.getRegBank(DefDstReg, MRI, TRI))
2109  return false;
2110 
2111  // We have a cross-bank copy, which is entering a store. Let's fold it.
2112  I.getOperand(0).setReg(DefDstReg);
2113  return true;
2114 }
2115 
2116 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2117  assert(I.getParent() && "Instruction should be in a basic block!");
2118  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2119 
2120  MachineBasicBlock &MBB = *I.getParent();
2121  MachineFunction &MF = *MBB.getParent();
2123 
2124  switch (I.getOpcode()) {
2125  case AArch64::G_DUP: {
2126  // Before selecting a DUP instruction, check if it is better selected as a
2127  // MOV or load from a constant pool.
2128  Register Src = I.getOperand(1).getReg();
2129  auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
2130  if (!ValAndVReg)
2131  return false;
2132  LLVMContext &Ctx = MF.getFunction().getContext();
2133  Register Dst = I.getOperand(0).getReg();
2134  auto *CV = ConstantDataVector::getSplat(
2135  MRI.getType(Dst).getNumElements(),
2136  ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2137  ValAndVReg->Value));
2138  MachineIRBuilder MIRBuilder(I);
2139  if (!emitConstantVector(Dst, CV, MIRBuilder, MRI))
2140  return false;
2141  I.eraseFromParent();
2142  return true;
2143  }
2144  case TargetOpcode::G_BR: {
2145  // If the branch jumps to the fallthrough block, don't bother emitting it.
2146  // Only do this for -O0 for a good code size improvement, because when
2147  // optimizations are enabled we want to leave this choice to
2148  // MachineBlockPlacement.
2149  bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2150  if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2151  return false;
2152  I.eraseFromParent();
2153  return true;
2154  }
2155  case TargetOpcode::G_SHL:
2156  return earlySelectSHL(I, MRI);
2157  case TargetOpcode::G_CONSTANT: {
2158  bool IsZero = false;
2159  if (I.getOperand(1).isCImm())
2160  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2161  else if (I.getOperand(1).isImm())
2162  IsZero = I.getOperand(1).getImm() == 0;
2163 
2164  if (!IsZero)
2165  return false;
2166 
2167  Register DefReg = I.getOperand(0).getReg();
2168  LLT Ty = MRI.getType(DefReg);
2169  if (Ty.getSizeInBits() == 64) {
2170  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2171  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2172  } else if (Ty.getSizeInBits() == 32) {
2173  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2174  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2175  } else
2176  return false;
2177 
2178  I.setDesc(TII.get(TargetOpcode::COPY));
2179  return true;
2180  }
2181 
2182  case TargetOpcode::G_ADD: {
2183  // Check if this is being fed by a G_ICMP on either side.
2184  //
2185  // (cmp pred, x, y) + z
2186  //
2187  // In the above case, when the cmp is true, we increment z by 1. So, we can
2188  // fold the add into the cset for the cmp by using cinc.
2189  //
2190  // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2191  Register X = I.getOperand(1).getReg();
2192 
2193  // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2194  // early if we see it.
2195  LLT Ty = MRI.getType(X);
2196  if (Ty.isVector() || Ty.getSizeInBits() != 32)
2197  return false;
2198 
2199  Register CmpReg = I.getOperand(2).getReg();
2200  MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2201  if (!Cmp) {
2202  std::swap(X, CmpReg);
2203  Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2204  if (!Cmp)
2205  return false;
2206  }
2207  MachineIRBuilder MIRBuilder(I);
2208  auto Pred =
2209  static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2210  emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2211  Cmp->getOperand(1), MIRBuilder);
2212  emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder, X);
2213  I.eraseFromParent();
2214  return true;
2215  }
2216  default:
2217  return false;
2218  }
2219 }
2220 
2222  assert(I.getParent() && "Instruction should be in a basic block!");
2223  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2224 
2225  MachineBasicBlock &MBB = *I.getParent();
2226  MachineFunction &MF = *MBB.getParent();
2228 
2229  const AArch64Subtarget *Subtarget =
2230  &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2231  if (Subtarget->requiresStrictAlign()) {
2232  // We don't support this feature yet.
2233  LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2234  return false;
2235  }
2236 
2237  unsigned Opcode = I.getOpcode();
2238  // G_PHI requires same handling as PHI
2239  if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2240  // Certain non-generic instructions also need some special handling.
2241 
2242  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2243  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2244 
2245  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2246  const Register DefReg = I.getOperand(0).getReg();
2247  const LLT DefTy = MRI.getType(DefReg);
2248 
2249  const RegClassOrRegBank &RegClassOrBank =
2250  MRI.getRegClassOrRegBank(DefReg);
2251 
2252  const TargetRegisterClass *DefRC
2253  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2254  if (!DefRC) {
2255  if (!DefTy.isValid()) {
2256  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2257  return false;
2258  }
2259  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2260  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2261  if (!DefRC) {
2262  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2263  return false;
2264  }
2265  }
2266 
2267  I.setDesc(TII.get(TargetOpcode::PHI));
2268 
2269  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2270  }
2271 
2272  if (I.isCopy())
2273  return selectCopy(I, TII, MRI, TRI, RBI);
2274 
2275  return true;
2276  }
2277 
2278 
2279  if (I.getNumOperands() != I.getNumExplicitOperands()) {
2280  LLVM_DEBUG(
2281  dbgs() << "Generic instruction has unexpected implicit operands\n");
2282  return false;
2283  }
2284 
2285  // Try to do some lowering before we start instruction selecting. These
2286  // lowerings are purely transformations on the input G_MIR and so selection
2287  // must continue after any modification of the instruction.
2288  if (preISelLower(I)) {
2289  Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2290  }
2291 
2292  // There may be patterns where the importer can't deal with them optimally,
2293  // but does select it to a suboptimal sequence so our custom C++ selection
2294  // code later never has a chance to work on it. Therefore, we have an early
2295  // selection attempt here to give priority to certain selection routines
2296  // over the imported ones.
2297  if (earlySelect(I))
2298  return true;
2299 
2300  if (selectImpl(I, *CoverageInfo))
2301  return true;
2302 
2303  LLT Ty =
2304  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2305 
2306  MachineIRBuilder MIB(I);
2307 
2308  switch (Opcode) {
2309  case TargetOpcode::G_SBFX:
2310  case TargetOpcode::G_UBFX: {
2311  static const unsigned OpcTable[2][2] = {
2312  {AArch64::UBFMWri, AArch64::UBFMXri},
2313  {AArch64::SBFMWri, AArch64::SBFMXri}};
2314  bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2315  unsigned Size = Ty.getSizeInBits();
2316  unsigned Opc = OpcTable[IsSigned][Size == 64];
2317  auto Cst1 =
2318  getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2319  assert(Cst1 && "Should have gotten a constant for src 1?");
2320  auto Cst2 =
2321  getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2322  assert(Cst2 && "Should have gotten a constant for src 2?");
2323  auto LSB = Cst1->Value.getZExtValue();
2324  auto Width = Cst2->Value.getZExtValue();
2325  MachineIRBuilder MIB(I);
2326  auto BitfieldInst =
2327  MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2328  .addImm(LSB)
2329  .addImm(Width);
2330  I.eraseFromParent();
2331  return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2332  }
2333  case TargetOpcode::G_BRCOND:
2334  return selectCompareBranch(I, MF, MRI);
2335 
2336  case TargetOpcode::G_BRINDIRECT: {
2337  I.setDesc(TII.get(AArch64::BR));
2338  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2339  }
2340 
2341  case TargetOpcode::G_BRJT:
2342  return selectBrJT(I, MRI);
2343 
2344  case AArch64::G_ADD_LOW: {
2345  // This op may have been separated from it's ADRP companion by the localizer
2346  // or some other code motion pass. Given that many CPUs will try to
2347  // macro fuse these operations anyway, select this into a MOVaddr pseudo
2348  // which will later be expanded into an ADRP+ADD pair after scheduling.
2349  MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2350  if (BaseMI->getOpcode() != AArch64::ADRP) {
2351  I.setDesc(TII.get(AArch64::ADDXri));
2352  I.addOperand(MachineOperand::CreateImm(0));
2353  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2354  }
2355  assert(TM.getCodeModel() == CodeModel::Small &&
2356  "Expected small code model");
2357  MachineIRBuilder MIB(I);
2358  auto Op1 = BaseMI->getOperand(1);
2359  auto Op2 = I.getOperand(2);
2360  auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2361  .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2362  Op1.getTargetFlags())
2363  .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2364  Op2.getTargetFlags());
2365  I.eraseFromParent();
2366  return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2367  }
2368 
2369  case TargetOpcode::G_BSWAP: {
2370  // Handle vector types for G_BSWAP directly.
2371  Register DstReg = I.getOperand(0).getReg();
2372  LLT DstTy = MRI.getType(DstReg);
2373 
2374  // We should only get vector types here; everything else is handled by the
2375  // importer right now.
2376  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2377  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2378  return false;
2379  }
2380 
2381  // Only handle 4 and 2 element vectors for now.
2382  // TODO: 16-bit elements.
2383  unsigned NumElts = DstTy.getNumElements();
2384  if (NumElts != 4 && NumElts != 2) {
2385  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2386  return false;
2387  }
2388 
2389  // Choose the correct opcode for the supported types. Right now, that's
2390  // v2s32, v4s32, and v2s64.
2391  unsigned Opc = 0;
2392  unsigned EltSize = DstTy.getElementType().getSizeInBits();
2393  if (EltSize == 32)
2394  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2395  : AArch64::REV32v16i8;
2396  else if (EltSize == 64)
2397  Opc = AArch64::REV64v16i8;
2398 
2399  // We should always get something by the time we get here...
2400  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2401 
2402  I.setDesc(TII.get(Opc));
2403  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2404  }
2405 
2406  case TargetOpcode::G_FCONSTANT:
2407  case TargetOpcode::G_CONSTANT: {
2408  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2409 
2410  const LLT s8 = LLT::scalar(8);
2411  const LLT s16 = LLT::scalar(16);
2412  const LLT s32 = LLT::scalar(32);
2413  const LLT s64 = LLT::scalar(64);
2414  const LLT s128 = LLT::scalar(128);
2415  const LLT p0 = LLT::pointer(0, 64);
2416 
2417  const Register DefReg = I.getOperand(0).getReg();
2418  const LLT DefTy = MRI.getType(DefReg);
2419  const unsigned DefSize = DefTy.getSizeInBits();
2420  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2421 
2422  // FIXME: Redundant check, but even less readable when factored out.
2423  if (isFP) {
2424  if (Ty != s32 && Ty != s64 && Ty != s128) {
2425  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2426  << " constant, expected: " << s32 << " or " << s64
2427  << " or " << s128 << '\n');
2428  return false;
2429  }
2430 
2431  if (RB.getID() != AArch64::FPRRegBankID) {
2432  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2433  << " constant on bank: " << RB
2434  << ", expected: FPR\n");
2435  return false;
2436  }
2437 
2438  // The case when we have 0.0 is covered by tablegen. Reject it here so we
2439  // can be sure tablegen works correctly and isn't rescued by this code.
2440  // 0.0 is not covered by tablegen for FP128. So we will handle this
2441  // scenario in the code here.
2442  if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2443  return false;
2444  } else {
2445  // s32 and s64 are covered by tablegen.
2446  if (Ty != p0 && Ty != s8 && Ty != s16) {
2447  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2448  << " constant, expected: " << s32 << ", " << s64
2449  << ", or " << p0 << '\n');
2450  return false;
2451  }
2452 
2453  if (RB.getID() != AArch64::GPRRegBankID) {
2454  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2455  << " constant on bank: " << RB
2456  << ", expected: GPR\n");
2457  return false;
2458  }
2459  }
2460 
2461  // We allow G_CONSTANT of types < 32b.
2462  const unsigned MovOpc =
2463  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2464 
2465  if (isFP) {
2466  // Either emit a FMOV, or emit a copy to emit a normal mov.
2467  const TargetRegisterClass &GPRRC =
2468  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2469  const TargetRegisterClass &FPRRC =
2470  DefSize == 32 ? AArch64::FPR32RegClass
2471  : (DefSize == 64 ? AArch64::FPR64RegClass
2472  : AArch64::FPR128RegClass);
2473 
2474  // For 64b values, emit a constant pool load instead.
2475  // For s32, use a cp load if we have optsize/minsize.
2476  if (DefSize == 64 || DefSize == 128 ||
2477  (DefSize == 32 && shouldOptForSize(&MF))) {
2478  auto *FPImm = I.getOperand(1).getFPImm();
2479  MachineIRBuilder MIB(I);
2480  auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2481  if (!LoadMI) {
2482  LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2483  return false;
2484  }
2485  MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2486  I.eraseFromParent();
2487  return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2488  }
2489 
2490  // Nope. Emit a copy and use a normal mov instead.
2491  const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2492  MachineOperand &RegOp = I.getOperand(0);
2493  RegOp.setReg(DefGPRReg);
2494  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2495  MIB.buildCopy({DefReg}, {DefGPRReg});
2496 
2497  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2498  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2499  return false;
2500  }
2501 
2502  MachineOperand &ImmOp = I.getOperand(1);
2503  // FIXME: Is going through int64_t always correct?
2504  ImmOp.ChangeToImmediate(
2506  } else if (I.getOperand(1).isCImm()) {
2507  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2508  I.getOperand(1).ChangeToImmediate(Val);
2509  } else if (I.getOperand(1).isImm()) {
2510  uint64_t Val = I.getOperand(1).getImm();
2511  I.getOperand(1).ChangeToImmediate(Val);
2512  }
2513 
2514  I.setDesc(TII.get(MovOpc));
2516  return true;
2517  }
2518  case TargetOpcode::G_EXTRACT: {
2519  Register DstReg = I.getOperand(0).getReg();
2520  Register SrcReg = I.getOperand(1).getReg();
2521  LLT SrcTy = MRI.getType(SrcReg);
2522  LLT DstTy = MRI.getType(DstReg);
2523  (void)DstTy;
2524  unsigned SrcSize = SrcTy.getSizeInBits();
2525 
2526  if (SrcTy.getSizeInBits() > 64) {
2527  // This should be an extract of an s128, which is like a vector extract.
2528  if (SrcTy.getSizeInBits() != 128)
2529  return false;
2530  // Only support extracting 64 bits from an s128 at the moment.
2531  if (DstTy.getSizeInBits() != 64)
2532  return false;
2533 
2534  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2535  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2536  // Check we have the right regbank always.
2537  assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2538  DstRB.getID() == AArch64::FPRRegBankID &&
2539  "Wrong extract regbank!");
2540  (void)SrcRB;
2541 
2542  // Emit the same code as a vector extract.
2543  // Offset must be a multiple of 64.
2544  unsigned Offset = I.getOperand(2).getImm();
2545  if (Offset % 64 != 0)
2546  return false;
2547  unsigned LaneIdx = Offset / 64;
2548  MachineIRBuilder MIB(I);
2549  MachineInstr *Extract = emitExtractVectorElt(
2550  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2551  if (!Extract)
2552  return false;
2553  I.eraseFromParent();
2554  return true;
2555  }
2556 
2557  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2558  MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2559  Ty.getSizeInBits() - 1);
2560 
2561  if (SrcSize < 64) {
2562  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2563  "unexpected G_EXTRACT types");
2564  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2565  }
2566 
2567  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2568  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2569  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2570  .addReg(DstReg, 0, AArch64::sub_32);
2571  RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2572  AArch64::GPR32RegClass, MRI);
2573  I.getOperand(0).setReg(DstReg);
2574 
2575  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2576  }
2577 
2578  case TargetOpcode::G_INSERT: {
2579  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2580  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2581  unsigned DstSize = DstTy.getSizeInBits();
2582  // Larger inserts are vectors, same-size ones should be something else by
2583  // now (split up or turned into COPYs).
2584  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2585  return false;
2586 
2587  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2588  unsigned LSB = I.getOperand(3).getImm();
2589  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2590  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2591  MachineInstrBuilder(MF, I).addImm(Width - 1);
2592 
2593  if (DstSize < 64) {
2594  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2595  "unexpected G_INSERT types");
2596  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2597  }
2598 
2599  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2600  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2601  TII.get(AArch64::SUBREG_TO_REG))
2602  .addDef(SrcReg)
2603  .addImm(0)
2604  .addUse(I.getOperand(2).getReg())
2605  .addImm(AArch64::sub_32);
2606  RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2607  AArch64::GPR32RegClass, MRI);
2608  I.getOperand(2).setReg(SrcReg);
2609 
2610  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2611  }
2612  case TargetOpcode::G_FRAME_INDEX: {
2613  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2614  if (Ty != LLT::pointer(0, 64)) {
2615  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2616  << ", expected: " << LLT::pointer(0, 64) << '\n');
2617  return false;
2618  }
2619  I.setDesc(TII.get(AArch64::ADDXri));
2620 
2621  // MOs for a #0 shifted immediate.
2622  I.addOperand(MachineOperand::CreateImm(0));
2623  I.addOperand(MachineOperand::CreateImm(0));
2624 
2625  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2626  }
2627 
2628  case TargetOpcode::G_GLOBAL_VALUE: {
2629  auto GV = I.getOperand(1).getGlobal();
2630  if (GV->isThreadLocal())
2631  return selectTLSGlobalValue(I, MRI);
2632 
2633  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2634  if (OpFlags & AArch64II::MO_GOT) {
2635  I.setDesc(TII.get(AArch64::LOADgot));
2636  I.getOperand(1).setTargetFlags(OpFlags);
2637  } else if (TM.getCodeModel() == CodeModel::Large) {
2638  // Materialize the global using movz/movk instructions.
2639  materializeLargeCMVal(I, GV, OpFlags);
2640  I.eraseFromParent();
2641  return true;
2642  } else if (TM.getCodeModel() == CodeModel::Tiny) {
2643  I.setDesc(TII.get(AArch64::ADR));
2644  I.getOperand(1).setTargetFlags(OpFlags);
2645  } else {
2646  I.setDesc(TII.get(AArch64::MOVaddr));
2647  I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2648  MachineInstrBuilder MIB(MF, I);
2649  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2651  }
2652  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2653  }
2654 
2655  case TargetOpcode::G_ZEXTLOAD:
2656  case TargetOpcode::G_LOAD:
2657  case TargetOpcode::G_STORE: {
2658  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2659  MachineIRBuilder MIB(I);
2660 
2661  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2662 
2663  if (PtrTy != LLT::pointer(0, 64)) {
2664  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2665  << ", expected: " << LLT::pointer(0, 64) << '\n');
2666  return false;
2667  }
2668 
2669  auto &MemOp = **I.memoperands_begin();
2670  uint64_t MemSizeInBytes = MemOp.getSize();
2671  if (MemOp.isAtomic()) {
2672  // For now we just support s8 acquire loads to be able to compile stack
2673  // protector code.
2674  if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2675  MemSizeInBytes == 1) {
2676  I.setDesc(TII.get(AArch64::LDARB));
2677  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2678  }
2679  LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2680  return false;
2681  }
2682  unsigned MemSizeInBits = MemSizeInBytes * 8;
2683 
2684 #ifndef NDEBUG
2685  const Register PtrReg = I.getOperand(1).getReg();
2686  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2687  // Sanity-check the pointer register.
2688  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2689  "Load/Store pointer operand isn't a GPR");
2690  assert(MRI.getType(PtrReg).isPointer() &&
2691  "Load/Store pointer operand isn't a pointer");
2692 #endif
2693 
2694  const Register ValReg = I.getOperand(0).getReg();
2695  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2696 
2697  // Helper lambda for partially selecting I. Either returns the original
2698  // instruction with an updated opcode, or a new instruction.
2699  auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2700  bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2701  const unsigned NewOpc =
2702  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2703  if (NewOpc == I.getOpcode())
2704  return nullptr;
2705  // Check if we can fold anything into the addressing mode.
2706  auto AddrModeFns =
2707  selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2708  if (!AddrModeFns) {
2709  // Can't fold anything. Use the original instruction.
2710  I.setDesc(TII.get(NewOpc));
2711  I.addOperand(MachineOperand::CreateImm(0));
2712  return &I;
2713  }
2714 
2715  // Folded something. Create a new instruction and return it.
2716  auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2717  IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2718  NewInst.cloneMemRefs(I);
2719  for (auto &Fn : *AddrModeFns)
2720  Fn(NewInst);
2721  I.eraseFromParent();
2722  return &*NewInst;
2723  };
2724 
2725  MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2726  if (!LoadStore)
2727  return false;
2728 
2729  // If we're storing a 0, use WZR/XZR.
2730  if (Opcode == TargetOpcode::G_STORE) {
2732  LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2733  /*HandleFConstants = */ false);
2734  if (CVal && CVal->Value == 0) {
2735  switch (LoadStore->getOpcode()) {
2736  case AArch64::STRWui:
2737  case AArch64::STRHHui:
2738  case AArch64::STRBBui:
2739  LoadStore->getOperand(0).setReg(AArch64::WZR);
2740  break;
2741  case AArch64::STRXui:
2742  LoadStore->getOperand(0).setReg(AArch64::XZR);
2743  break;
2744  }
2745  }
2746  }
2747 
2748  if (IsZExtLoad) {
2749  // The zextload from a smaller type to i32 should be handled by the
2750  // importer.
2751  if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2752  return false;
2753  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2754  // and zero_extend with SUBREG_TO_REG.
2755  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2756  Register DstReg = LoadStore->getOperand(0).getReg();
2757  LoadStore->getOperand(0).setReg(LdReg);
2758 
2759  MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2760  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2761  .addImm(0)
2762  .addUse(LdReg)
2763  .addImm(AArch64::sub_32);
2765  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2766  MRI);
2767  }
2769  }
2770 
2771  case TargetOpcode::G_SMULH:
2772  case TargetOpcode::G_UMULH: {
2773  // Reject the various things we don't support yet.
2774  if (unsupportedBinOp(I, RBI, MRI, TRI))
2775  return false;
2776 
2777  const Register DefReg = I.getOperand(0).getReg();
2778  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2779 
2780  if (RB.getID() != AArch64::GPRRegBankID) {
2781  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2782  return false;
2783  }
2784 
2785  if (Ty != LLT::scalar(64)) {
2786  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2787  << ", expected: " << LLT::scalar(64) << '\n');
2788  return false;
2789  }
2790 
2791  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2792  : AArch64::UMULHrr;
2793  I.setDesc(TII.get(NewOpc));
2794 
2795  // Now that we selected an opcode, we need to constrain the register
2796  // operands to use appropriate classes.
2797  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2798  }
2799  case TargetOpcode::G_LSHR:
2800  case TargetOpcode::G_ASHR:
2801  if (MRI.getType(I.getOperand(0).getReg()).isVector())
2802  return selectVectorAshrLshr(I, MRI);
2804  case TargetOpcode::G_SHL:
2805  if (Opcode == TargetOpcode::G_SHL &&
2806  MRI.getType(I.getOperand(0).getReg()).isVector())
2807  return selectVectorSHL(I, MRI);
2809  case TargetOpcode::G_FADD:
2810  case TargetOpcode::G_FSUB:
2811  case TargetOpcode::G_FMUL:
2812  case TargetOpcode::G_FDIV:
2813  case TargetOpcode::G_OR: {
2814  // Reject the various things we don't support yet.
2815  if (unsupportedBinOp(I, RBI, MRI, TRI))
2816  return false;
2817 
2818  const unsigned OpSize = Ty.getSizeInBits();
2819 
2820  const Register DefReg = I.getOperand(0).getReg();
2821  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2822 
2823  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2824  if (NewOpc == I.getOpcode())
2825  return false;
2826 
2827  I.setDesc(TII.get(NewOpc));
2828  // FIXME: Should the type be always reset in setDesc?
2829 
2830  // Now that we selected an opcode, we need to constrain the register
2831  // operands to use appropriate classes.
2832  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2833  }
2834 
2835  case TargetOpcode::G_PTR_ADD: {
2836  MachineIRBuilder MIRBuilder(I);
2837  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2838  MIRBuilder);
2839  I.eraseFromParent();
2840  return true;
2841  }
2842  case TargetOpcode::G_SADDO:
2843  case TargetOpcode::G_UADDO:
2844  case TargetOpcode::G_SSUBO:
2845  case TargetOpcode::G_USUBO: {
2846  // Emit the operation and get the correct condition code.
2847  MachineIRBuilder MIRBuilder(I);
2848  auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2849  I.getOperand(2), I.getOperand(3), MIRBuilder);
2850 
2851  // Now, put the overflow result in the register given by the first operand
2852  // to the overflow op. CSINC increments the result when the predicate is
2853  // false, so to get the increment when it's true, we need to use the
2854  // inverse. In this case, we want to increment when carry is set.
2855  Register ZReg = AArch64::WZR;
2856  auto CsetMI = MIRBuilder
2857  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2858  {ZReg, ZReg})
2859  .addImm(getInvertedCondCode(OpAndCC.second));
2860  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2861  I.eraseFromParent();
2862  return true;
2863  }
2864 
2865  case TargetOpcode::G_PTRMASK: {
2866  Register MaskReg = I.getOperand(2).getReg();
2867  Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2868  // TODO: Implement arbitrary cases
2869  if (!MaskVal || !isShiftedMask_64(*MaskVal))
2870  return false;
2871 
2872  uint64_t Mask = *MaskVal;
2873  I.setDesc(TII.get(AArch64::ANDXri));
2874  I.getOperand(2).ChangeToImmediate(
2876 
2877  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2878  }
2879  case TargetOpcode::G_PTRTOINT:
2880  case TargetOpcode::G_TRUNC: {
2881  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2882  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2883 
2884  const Register DstReg = I.getOperand(0).getReg();
2885  const Register SrcReg = I.getOperand(1).getReg();
2886 
2887  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2888  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2889 
2890  if (DstRB.getID() != SrcRB.getID()) {
2891  LLVM_DEBUG(
2892  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2893  return false;
2894  }
2895 
2896  if (DstRB.getID() == AArch64::GPRRegBankID) {
2897  const TargetRegisterClass *DstRC =
2898  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2899  if (!DstRC)
2900  return false;
2901 
2902  const TargetRegisterClass *SrcRC =
2903  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2904  if (!SrcRC)
2905  return false;
2906 
2907  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2908  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2909  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2910  return false;
2911  }
2912 
2913  if (DstRC == SrcRC) {
2914  // Nothing to be done
2915  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2916  SrcTy == LLT::scalar(64)) {
2917  llvm_unreachable("TableGen can import this case");
2918  return false;
2919  } else if (DstRC == &AArch64::GPR32RegClass &&
2920  SrcRC == &AArch64::GPR64RegClass) {
2921  I.getOperand(1).setSubReg(AArch64::sub_32);
2922  } else {
2923  LLVM_DEBUG(
2924  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2925  return false;
2926  }
2927 
2928  I.setDesc(TII.get(TargetOpcode::COPY));
2929  return true;
2930  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2931  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2932  I.setDesc(TII.get(AArch64::XTNv4i16));
2934  return true;
2935  }
2936 
2937  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2938  MachineIRBuilder MIB(I);
2939  MachineInstr *Extract = emitExtractVectorElt(
2940  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2941  if (!Extract)
2942  return false;
2943  I.eraseFromParent();
2944  return true;
2945  }
2946 
2947  // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2948  if (Opcode == TargetOpcode::G_PTRTOINT) {
2949  assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2950  I.setDesc(TII.get(TargetOpcode::COPY));
2951  return true;
2952  }
2953  }
2954 
2955  return false;
2956  }
2957 
2958  case TargetOpcode::G_ANYEXT: {
2959  const Register DstReg = I.getOperand(0).getReg();
2960  const Register SrcReg = I.getOperand(1).getReg();
2961 
2962  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2963  if (RBDst.getID() != AArch64::GPRRegBankID) {
2964  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2965  << ", expected: GPR\n");
2966  return false;
2967  }
2968 
2969  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2970  if (RBSrc.getID() != AArch64::GPRRegBankID) {
2971  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2972  << ", expected: GPR\n");
2973  return false;
2974  }
2975 
2976  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2977 
2978  if (DstSize == 0) {
2979  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2980  return false;
2981  }
2982 
2983  if (DstSize != 64 && DstSize > 32) {
2984  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2985  << ", expected: 32 or 64\n");
2986  return false;
2987  }
2988  // At this point G_ANYEXT is just like a plain COPY, but we need
2989  // to explicitly form the 64-bit value if any.
2990  if (DstSize > 32) {
2991  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2992  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2993  .addDef(ExtSrc)
2994  .addImm(0)
2995  .addUse(SrcReg)
2996  .addImm(AArch64::sub_32);
2997  I.getOperand(1).setReg(ExtSrc);
2998  }
2999  return selectCopy(I, TII, MRI, TRI, RBI);
3000  }
3001 
3002  case TargetOpcode::G_ZEXT:
3003  case TargetOpcode::G_SEXT_INREG:
3004  case TargetOpcode::G_SEXT: {
3005  unsigned Opcode = I.getOpcode();
3006  const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3007  const Register DefReg = I.getOperand(0).getReg();
3008  Register SrcReg = I.getOperand(1).getReg();
3009  const LLT DstTy = MRI.getType(DefReg);
3010  const LLT SrcTy = MRI.getType(SrcReg);
3011  unsigned DstSize = DstTy.getSizeInBits();
3012  unsigned SrcSize = SrcTy.getSizeInBits();
3013 
3014  // SEXT_INREG has the same src reg size as dst, the size of the value to be
3015  // extended is encoded in the imm.
3016  if (Opcode == TargetOpcode::G_SEXT_INREG)
3017  SrcSize = I.getOperand(2).getImm();
3018 
3019  if (DstTy.isVector())
3020  return false; // Should be handled by imported patterns.
3021 
3022  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3023  AArch64::GPRRegBankID &&
3024  "Unexpected ext regbank");
3025 
3026  MachineIRBuilder MIB(I);
3027  MachineInstr *ExtI;
3028 
3029  // First check if we're extending the result of a load which has a dest type
3030  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3031  // GPR register on AArch64 and all loads which are smaller automatically
3032  // zero-extend the upper bits. E.g.
3033  // %v(s8) = G_LOAD %p, :: (load 1)
3034  // %v2(s32) = G_ZEXT %v(s8)
3035  if (!IsSigned) {
3036  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3037  bool IsGPR =
3038  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3039  if (LoadMI && IsGPR) {
3040  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3041  unsigned BytesLoaded = MemOp->getSize();
3042  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3043  return selectCopy(I, TII, MRI, TRI, RBI);
3044  }
3045 
3046  // If we are zero extending from 32 bits to 64 bits, it's possible that
3047  // the instruction implicitly does the zero extend for us. In that case,
3048  // we can just emit a SUBREG_TO_REG.
3049  if (IsGPR && SrcSize == 32 && DstSize == 64) {
3050  // Unlike with the G_LOAD case, we don't want to look through copies
3051  // here.
3052  MachineInstr *Def = MRI.getVRegDef(SrcReg);
3053  if (Def && isDef32(*Def)) {
3054  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3055  .addImm(0)
3056  .addUse(SrcReg)
3057  .addImm(AArch64::sub_32);
3058 
3059  if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3060  MRI)) {
3061  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3062  return false;
3063  }
3064 
3065  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3066  MRI)) {
3067  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3068  return false;
3069  }
3070 
3071  I.eraseFromParent();
3072  return true;
3073  }
3074  }
3075  }
3076 
3077  if (DstSize == 64) {
3078  if (Opcode != TargetOpcode::G_SEXT_INREG) {
3079  // FIXME: Can we avoid manually doing this?
3080  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3081  MRI)) {
3082  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3083  << " operand\n");
3084  return false;
3085  }
3086  SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3087  {&AArch64::GPR64RegClass}, {})
3088  .addImm(0)
3089  .addUse(SrcReg)
3090  .addImm(AArch64::sub_32)
3091  .getReg(0);
3092  }
3093 
3094  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3095  {DefReg}, {SrcReg})
3096  .addImm(0)
3097  .addImm(SrcSize - 1);
3098  } else if (DstSize <= 32) {
3099  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3100  {DefReg}, {SrcReg})
3101  .addImm(0)
3102  .addImm(SrcSize - 1);
3103  } else {
3104  return false;
3105  }
3106 
3108  I.eraseFromParent();
3109  return true;
3110  }
3111 
3112  case TargetOpcode::G_SITOFP:
3113  case TargetOpcode::G_UITOFP:
3114  case TargetOpcode::G_FPTOSI:
3115  case TargetOpcode::G_FPTOUI: {
3116  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3117  SrcTy = MRI.getType(I.getOperand(1).getReg());
3118  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3119  if (NewOpc == Opcode)
3120  return false;
3121 
3122  I.setDesc(TII.get(NewOpc));
3124 
3125  return true;
3126  }
3127 
3128  case TargetOpcode::G_FREEZE:
3129  return selectCopy(I, TII, MRI, TRI, RBI);
3130 
3131  case TargetOpcode::G_INTTOPTR:
3132  // The importer is currently unable to import pointer types since they
3133  // didn't exist in SelectionDAG.
3134  return selectCopy(I, TII, MRI, TRI, RBI);
3135 
3136  case TargetOpcode::G_BITCAST:
3137  // Imported SelectionDAG rules can handle every bitcast except those that
3138  // bitcast from a type to the same type. Ideally, these shouldn't occur
3139  // but we might not run an optimizer that deletes them. The other exception
3140  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3141  // of them.
3142  return selectCopy(I, TII, MRI, TRI, RBI);
3143 
3144  case TargetOpcode::G_SELECT: {
3145  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3146  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3147  << ", expected: " << LLT::scalar(1) << '\n');
3148  return false;
3149  }
3150 
3151  const Register CondReg = I.getOperand(1).getReg();
3152  const Register TReg = I.getOperand(2).getReg();
3153  const Register FReg = I.getOperand(3).getReg();
3154 
3155  if (tryOptSelect(I))
3156  return true;
3157 
3158  // Make sure to use an unused vreg instead of wzr, so that the peephole
3159  // optimizations will be able to optimize these.
3160  MachineIRBuilder MIB(I);
3161  Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3162  auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3163  .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3164  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3165  if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3166  return false;
3167  I.eraseFromParent();
3168  return true;
3169  }
3170  case TargetOpcode::G_ICMP: {
3171  if (Ty.isVector())
3172  return selectVectorICmp(I, MRI);
3173 
3174  if (Ty != LLT::scalar(32)) {
3175  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3176  << ", expected: " << LLT::scalar(32) << '\n');
3177  return false;
3178  }
3179 
3180  MachineIRBuilder MIRBuilder(I);
3181  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3182  emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3183  MIRBuilder);
3184  emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3185  I.eraseFromParent();
3186  return true;
3187  }
3188 
3189  case TargetOpcode::G_FCMP: {
3190  MachineIRBuilder MIRBuilder(I);
3191  CmpInst::Predicate Pred =
3192  static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3193  if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3194  MIRBuilder, Pred) ||
3195  !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3196  return false;
3197  I.eraseFromParent();
3198  return true;
3199  }
3200  case TargetOpcode::G_VASTART:
3201  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3202  : selectVaStartAAPCS(I, MF, MRI);
3203  case TargetOpcode::G_INTRINSIC:
3204  return selectIntrinsic(I, MRI);
3205  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3206  return selectIntrinsicWithSideEffects(I, MRI);
3207  case TargetOpcode::G_IMPLICIT_DEF: {
3208  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3209  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3210  const Register DstReg = I.getOperand(0).getReg();
3211  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3212  const TargetRegisterClass *DstRC =
3213  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3214  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3215  return true;
3216  }
3217  case TargetOpcode::G_BLOCK_ADDR: {
3218  if (TM.getCodeModel() == CodeModel::Large) {
3219  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3220  I.eraseFromParent();
3221  return true;
3222  } else {
3223  I.setDesc(TII.get(AArch64::MOVaddrBA));
3224  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3225  I.getOperand(0).getReg())
3226  .addBlockAddress(I.getOperand(1).getBlockAddress(),
3227  /* Offset */ 0, AArch64II::MO_PAGE)
3228  .addBlockAddress(
3229  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3231  I.eraseFromParent();
3232  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3233  }
3234  }
3235  case AArch64::G_DUP: {
3236  // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3237  // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3238  // difficult because at RBS we may end up pessimizing the fpr case if we
3239  // decided to add an anyextend to fix this. Manual selection is the most
3240  // robust solution for now.
3241  if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3242  AArch64::GPRRegBankID)
3243  return false; // We expect the fpr regbank case to be imported.
3244  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3245  if (VecTy == LLT::vector(8, 8))
3246  I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3247  else if (VecTy == LLT::vector(16, 8))
3248  I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3249  else if (VecTy == LLT::vector(4, 16))
3250  I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3251  else if (VecTy == LLT::vector(8, 16))
3252  I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3253  else
3254  return false;
3255  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3256  }
3257  case TargetOpcode::G_INTRINSIC_TRUNC:
3258  return selectIntrinsicTrunc(I, MRI);
3259  case TargetOpcode::G_INTRINSIC_ROUND:
3260  return selectIntrinsicRound(I, MRI);
3261  case TargetOpcode::G_BUILD_VECTOR:
3262  return selectBuildVector(I, MRI);
3263  case TargetOpcode::G_MERGE_VALUES:
3264  return selectMergeValues(I, MRI);
3265  case TargetOpcode::G_UNMERGE_VALUES:
3266  return selectUnmergeValues(I, MRI);
3267  case TargetOpcode::G_SHUFFLE_VECTOR:
3268  return selectShuffleVector(I, MRI);
3269  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3270  return selectExtractElt(I, MRI);
3271  case TargetOpcode::G_INSERT_VECTOR_ELT:
3272  return selectInsertElt(I, MRI);
3273  case TargetOpcode::G_CONCAT_VECTORS:
3274  return selectConcatVectors(I, MRI);
3275  case TargetOpcode::G_JUMP_TABLE:
3276  return selectJumpTable(I, MRI);
3277  case TargetOpcode::G_VECREDUCE_FADD:
3278  case TargetOpcode::G_VECREDUCE_ADD:
3279  return selectReduction(I, MRI);
3280  }
3281 
3282  return false;
3283 }
3284 
3285 bool AArch64InstructionSelector::selectReduction(
3287  Register VecReg = I.getOperand(1).getReg();
3288  LLT VecTy = MRI.getType(VecReg);
3289  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3290  // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3291  // a subregister copy afterwards.
3292  if (VecTy == LLT::vector(2, 32)) {
3293  MachineIRBuilder MIB(I);
3294  Register DstReg = I.getOperand(0).getReg();
3295  auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3296  {VecReg, VecReg});
3297  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3298  .addReg(AddP.getReg(0), 0, AArch64::ssub)
3299  .getReg(0);
3300  RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3301  I.eraseFromParent();
3302  return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3303  }
3304 
3305  unsigned Opc = 0;
3306  if (VecTy == LLT::vector(16, 8))
3307  Opc = AArch64::ADDVv16i8v;
3308  else if (VecTy == LLT::vector(8, 16))
3309  Opc = AArch64::ADDVv8i16v;
3310  else if (VecTy == LLT::vector(4, 32))
3311  Opc = AArch64::ADDVv4i32v;
3312  else if (VecTy == LLT::vector(2, 64))
3313  Opc = AArch64::ADDPv2i64p;
3314  else {
3315  LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3316  return false;
3317  }
3318  I.setDesc(TII.get(Opc));
3319  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3320  }
3321 
3322  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3323  unsigned Opc = 0;
3324  if (VecTy == LLT::vector(2, 32))
3325  Opc = AArch64::FADDPv2i32p;
3326  else if (VecTy == LLT::vector(2, 64))
3327  Opc = AArch64::FADDPv2i64p;
3328  else {
3329  LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3330  return false;
3331  }
3332  I.setDesc(TII.get(Opc));
3333  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3334  }
3335  return false;
3336 }
3337 
3338 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3339  MachineRegisterInfo &MRI) const {
3340  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3341  Register JTAddr = I.getOperand(0).getReg();
3342  unsigned JTI = I.getOperand(1).getIndex();
3343  Register Index = I.getOperand(2).getReg();
3344  MachineIRBuilder MIB(I);
3345 
3346  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3347  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3348 
3349  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3350  auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3351  {TargetReg, ScratchReg}, {JTAddr, Index})
3352  .addJumpTableIndex(JTI);
3353  // Build the indirect branch.
3354  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3355  I.eraseFromParent();
3356  return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3357 }
3358 
3359 bool AArch64InstructionSelector::selectJumpTable(
3361  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3362  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3363 
3364  Register DstReg = I.getOperand(0).getReg();
3365  unsigned JTI = I.getOperand(1).getIndex();
3366  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3367  MachineIRBuilder MIB(I);
3368  auto MovMI =
3369  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3370  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3372  I.eraseFromParent();
3373  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3374 }
3375 
3376 bool AArch64InstructionSelector::selectTLSGlobalValue(
3378  if (!STI.isTargetMachO())
3379  return false;
3380  MachineFunction &MF = *I.getParent()->getParent();
3381  MF.getFrameInfo().setAdjustsStack(true);
3382 
3383  const GlobalValue &GV = *I.getOperand(1).getGlobal();
3384  MachineIRBuilder MIB(I);
3385 
3386  auto LoadGOT =
3387  MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3388  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3389 
3390  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3391  {LoadGOT.getReg(0)})
3392  .addImm(0);
3393 
3394  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3395  // TLS calls preserve all registers except those that absolutely must be
3396  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3397  // silly).
3398  MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3399  .addUse(AArch64::X0, RegState::Implicit)
3400  .addDef(AArch64::X0, RegState::Implicit)
3401  .addRegMask(TRI.getTLSCallPreservedMask());
3402 
3403  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3404  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3405  MRI);
3406  I.eraseFromParent();
3407  return true;
3408 }
3409 
3410 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3412  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3413 
3414  // Select the correct opcode.
3415  unsigned Opc = 0;
3416  if (!SrcTy.isVector()) {
3417  switch (SrcTy.getSizeInBits()) {
3418  default:
3419  case 16:
3420  Opc = AArch64::FRINTZHr;
3421  break;
3422  case 32:
3423  Opc = AArch64::FRINTZSr;
3424  break;
3425  case 64:
3426  Opc = AArch64::FRINTZDr;
3427  break;
3428  }
3429  } else {
3430  unsigned NumElts = SrcTy.getNumElements();
3431  switch (SrcTy.getElementType().getSizeInBits()) {
3432  default:
3433  break;
3434  case 16:
3435  if (NumElts == 4)
3436  Opc = AArch64::FRINTZv4f16;
3437  else if (NumElts == 8)
3438  Opc = AArch64::FRINTZv8f16;
3439  break;
3440  case 32:
3441  if (NumElts == 2)
3442  Opc = AArch64::FRINTZv2f32;
3443  else if (NumElts == 4)
3444  Opc = AArch64::FRINTZv4f32;
3445  break;
3446  case 64:
3447  if (NumElts == 2)
3448  Opc = AArch64::FRINTZv2f64;
3449  break;
3450  }
3451  }
3452 
3453  if (!Opc) {
3454  // Didn't get an opcode above, bail.
3455  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3456  return false;
3457  }
3458 
3459  // Legalization would have set us up perfectly for this; we just need to
3460  // set the opcode and move on.
3461  I.setDesc(TII.get(Opc));
3462  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3463 }
3464 
3465 bool AArch64InstructionSelector::selectIntrinsicRound(
3467  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3468 
3469  // Select the correct opcode.
3470  unsigned Opc = 0;
3471  if (!SrcTy.isVector()) {
3472  switch (SrcTy.getSizeInBits()) {
3473  default:
3474  case 16:
3475  Opc = AArch64::FRINTAHr;
3476  break;
3477  case 32:
3478  Opc = AArch64::FRINTASr;
3479  break;
3480  case 64:
3481  Opc = AArch64::FRINTADr;
3482  break;
3483  }
3484  } else {
3485  unsigned NumElts = SrcTy.getNumElements();
3486  switch (SrcTy.getElementType().getSizeInBits()) {
3487  default:
3488  break;
3489  case 16:
3490  if (NumElts == 4)
3491  Opc = AArch64::FRINTAv4f16;
3492  else if (NumElts == 8)
3493  Opc = AArch64::FRINTAv8f16;
3494  break;
3495  case 32:
3496  if (NumElts == 2)
3497  Opc = AArch64::FRINTAv2f32;
3498  else if (NumElts == 4)
3499  Opc = AArch64::FRINTAv4f32;
3500  break;
3501  case 64:
3502  if (NumElts == 2)
3503  Opc = AArch64::FRINTAv2f64;
3504  break;
3505  }
3506  }
3507 
3508  if (!Opc) {
3509  // Didn't get an opcode above, bail.
3510  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3511  return false;
3512  }
3513 
3514  // Legalization would have set us up perfectly for this; we just need to
3515  // set the opcode and move on.
3516  I.setDesc(TII.get(Opc));
3517  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3518 }
3519 
3520 bool AArch64InstructionSelector::selectVectorICmp(
3522  Register DstReg = I.getOperand(0).getReg();
3523  LLT DstTy = MRI.getType(DstReg);
3524  Register SrcReg = I.getOperand(2).getReg();
3525  Register Src2Reg = I.getOperand(3).getReg();
3526  LLT SrcTy = MRI.getType(SrcReg);
3527 
3528  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3529  unsigned NumElts = DstTy.getNumElements();
3530 
3531  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3532  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3533  // Third index is cc opcode:
3534  // 0 == eq
3535  // 1 == ugt
3536  // 2 == uge
3537  // 3 == ult
3538  // 4 == ule
3539  // 5 == sgt
3540  // 6 == sge
3541  // 7 == slt
3542  // 8 == sle
3543  // ne is done by negating 'eq' result.
3544 
3545  // This table below assumes that for some comparisons the operands will be
3546  // commuted.
3547  // ult op == commute + ugt op
3548  // ule op == commute + uge op
3549  // slt op == commute + sgt op
3550  // sle op == commute + sge op
3551  unsigned PredIdx = 0;
3552  bool SwapOperands = false;
3553  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3554  switch (Pred) {
3555  case CmpInst::ICMP_NE:
3556  case CmpInst::ICMP_EQ:
3557  PredIdx = 0;
3558  break;
3559  case CmpInst::ICMP_UGT:
3560  PredIdx = 1;
3561  break;
3562  case CmpInst::ICMP_UGE:
3563  PredIdx = 2;
3564  break;
3565  case CmpInst::ICMP_ULT:
3566  PredIdx = 3;
3567  SwapOperands = true;
3568  break;
3569  case CmpInst::ICMP_ULE:
3570  PredIdx = 4;
3571  SwapOperands = true;
3572  break;
3573  case CmpInst::ICMP_SGT:
3574  PredIdx = 5;
3575  break;
3576  case CmpInst::ICMP_SGE:
3577  PredIdx = 6;
3578  break;
3579  case CmpInst::ICMP_SLT:
3580  PredIdx = 7;
3581  SwapOperands = true;
3582  break;
3583  case CmpInst::ICMP_SLE:
3584  PredIdx = 8;
3585  SwapOperands = true;
3586  break;
3587  default:
3588  llvm_unreachable("Unhandled icmp predicate");
3589  return false;
3590  }
3591 
3592  // This table obviously should be tablegen'd when we have our GISel native
3593  // tablegen selector.
3594 
3595  static const unsigned OpcTable[4][4][9] = {
3596  {
3597  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3598  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3599  0 /* invalid */},
3600  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3601  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3602  0 /* invalid */},
3603  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3604  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3605  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3606  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3607  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3608  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3609  },
3610  {
3611  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3612  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3613  0 /* invalid */},
3614  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3615  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3616  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3617  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3618  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3619  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3620  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3621  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3622  0 /* invalid */}
3623  },
3624  {
3625  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3626  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3627  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3628  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3629  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3630  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3631  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3632  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3633  0 /* invalid */},
3634  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3635  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3636  0 /* invalid */}
3637  },
3638  {
3639  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3640  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3641  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3642  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3643  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3644  0 /* invalid */},
3645  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3646  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3647  0 /* invalid */},
3648  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3649  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3650  0 /* invalid */}
3651  },
3652  };
3653  unsigned EltIdx = Log2_32(SrcEltSize / 8);
3654  unsigned NumEltsIdx = Log2_32(NumElts / 2);
3655  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3656  if (!Opc) {
3657  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3658  return false;
3659  }
3660 
3661  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3662  const TargetRegisterClass *SrcRC =
3663  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3664  if (!SrcRC) {
3665  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3666  return false;
3667  }
3668 
3669  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3670  if (SrcTy.getSizeInBits() == 128)
3671  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3672 
3673  if (SwapOperands)
3674  std::swap(SrcReg, Src2Reg);
3675 
3676  MachineIRBuilder MIB(I);
3677  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3679 
3680  // Invert if we had a 'ne' cc.
3681  if (NotOpc) {
3682  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3684  } else {
3685  MIB.buildCopy(DstReg, Cmp.getReg(0));
3686  }
3687  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3688  I.eraseFromParent();
3689  return true;
3690 }
3691 
3692 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3693  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3694  MachineIRBuilder &MIRBuilder) const {
3695  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3696 
3697  auto BuildFn = [&](unsigned SubregIndex) {
3698  auto Ins =
3699  MIRBuilder
3700  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3701  .addImm(SubregIndex);
3704  return &*Ins;
3705  };
3706 
3707  switch (EltSize) {
3708  case 16:
3709  return BuildFn(AArch64::hsub);
3710  case 32:
3711  return BuildFn(AArch64::ssub);
3712  case 64:
3713  return BuildFn(AArch64::dsub);
3714  default:
3715  return nullptr;
3716  }
3717 }
3718 
3721  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3722  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3723  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3724  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3725  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3726 
3727  if (I.getNumOperands() != 3)
3728  return false;
3729 
3730  // Merging 2 s64s into an s128.
3731  if (DstTy == LLT::scalar(128)) {
3732  if (SrcTy.getSizeInBits() != 64)
3733  return false;
3734  MachineIRBuilder MIB(I);
3735  Register DstReg = I.getOperand(0).getReg();
3736  Register Src1Reg = I.getOperand(1).getReg();
3737  Register Src2Reg = I.getOperand(2).getReg();
3738  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3739  MachineInstr *InsMI =
3740  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3741  if (!InsMI)
3742  return false;
3743  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3744  Src2Reg, /* LaneIdx */ 1, RB, MIB);
3745  if (!Ins2MI)
3746  return false;
3747  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3748  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3749  I.eraseFromParent();
3750  return true;
3751  }
3752 
3753  if (RB.getID() != AArch64::GPRRegBankID)
3754  return false;
3755 
3756  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3757  return false;
3758 
3759  auto *DstRC = &AArch64::GPR64RegClass;
3760  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3761  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3762  TII.get(TargetOpcode::SUBREG_TO_REG))
3763  .addDef(SubToRegDef)
3764  .addImm(0)
3765  .addUse(I.getOperand(1).getReg())
3766  .addImm(AArch64::sub_32);
3767  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3768  // Need to anyext the second scalar before we can use bfm
3769  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3770  TII.get(TargetOpcode::SUBREG_TO_REG))
3771  .addDef(SubToRegDef2)
3772  .addImm(0)
3773  .addUse(I.getOperand(2).getReg())
3774  .addImm(AArch64::sub_32);
3775  MachineInstr &BFM =
3776  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3777  .addDef(I.getOperand(0).getReg())
3778  .addUse(SubToRegDef)
3779  .addUse(SubToRegDef2)
3780  .addImm(32)
3781  .addImm(31);
3782  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3783  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3785  I.eraseFromParent();
3786  return true;
3787 }
3788 
3789 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3790  const unsigned EltSize) {
3791  // Choose a lane copy opcode and subregister based off of the size of the
3792  // vector's elements.
3793  switch (EltSize) {
3794  case 16:
3795  CopyOpc = AArch64::CPYi16;
3796  ExtractSubReg = AArch64::hsub;
3797  break;
3798  case 32:
3799  CopyOpc = AArch64::CPYi32;
3800  ExtractSubReg = AArch64::ssub;
3801  break;
3802  case 64:
3803  CopyOpc = AArch64::CPYi64;
3804  ExtractSubReg = AArch64::dsub;
3805  break;
3806  default:
3807  // Unknown size, bail out.
3808  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3809  return false;
3810  }
3811  return true;
3812 }
3813 
3814 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3815  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3816  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3817  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3818  unsigned CopyOpc = 0;
3819  unsigned ExtractSubReg = 0;
3820  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3821  LLVM_DEBUG(
3822  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3823  return nullptr;
3824  }
3825 
3826  const TargetRegisterClass *DstRC =
3827  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3828  if (!DstRC) {
3829  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3830  return nullptr;
3831  }
3832 
3833  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3834  const LLT &VecTy = MRI.getType(VecReg);
3835  const TargetRegisterClass *VecRC =
3836  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3837  if (!VecRC) {
3838  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3839  return nullptr;
3840  }
3841 
3842  // The register that we're going to copy into.
3843  Register InsertReg = VecReg;
3844  if (!DstReg)
3845  DstReg = MRI.createVirtualRegister(DstRC);
3846  // If the lane index is 0, we just use a subregister COPY.
3847  if (LaneIdx == 0) {
3848  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3849  .addReg(VecReg, 0, ExtractSubReg);
3850  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3851  return &*Copy;
3852  }
3853 
3854  // Lane copies require 128-bit wide registers. If we're dealing with an
3855  // unpacked vector, then we need to move up to that width. Insert an implicit
3856  // def and a subregister insert to get us there.
3857  if (VecTy.getSizeInBits() != 128) {
3858  MachineInstr *ScalarToVector = emitScalarToVector(
3859  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3860  if (!ScalarToVector)
3861  return nullptr;
3862  InsertReg = ScalarToVector->getOperand(0).getReg();
3863  }
3864 
3865  MachineInstr *LaneCopyMI =
3866  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3867  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3868 
3869  // Make sure that we actually constrain the initial copy.
3870  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3871  return LaneCopyMI;
3872 }
3873 
3874 bool AArch64InstructionSelector::selectExtractElt(
3876  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3877  "unexpected opcode!");
3878  Register DstReg = I.getOperand(0).getReg();
3879  const LLT NarrowTy = MRI.getType(DstReg);
3880  const Register SrcReg = I.getOperand(1).getReg();
3881  const LLT WideTy = MRI.getType(SrcReg);
3882  (void)WideTy;
3883  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3884  "source register size too small!");
3885  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3886 
3887  // Need the lane index to determine the correct copy opcode.
3888  MachineOperand &LaneIdxOp = I.getOperand(2);
3889  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3890 
3891  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3892  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3893  return false;
3894  }
3895 
3896  // Find the index to extract from.
3897  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3898  if (!VRegAndVal)
3899  return false;
3900  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3901 
3902  MachineIRBuilder MIRBuilder(I);
3903 
3904  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3905  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3906  LaneIdx, MIRBuilder);
3907  if (!Extract)
3908  return false;
3909 
3910  I.eraseFromParent();
3911  return true;
3912 }
3913 
3914 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3916  unsigned NumElts = I.getNumOperands() - 1;
3917  Register SrcReg = I.getOperand(NumElts).getReg();
3918  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3919  const LLT SrcTy = MRI.getType(SrcReg);
3920 
3921  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3922  if (SrcTy.getSizeInBits() > 128) {
3923  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3924  return false;
3925  }
3926 
3927  MachineIRBuilder MIB(I);
3928 
3929  // We implement a split vector operation by treating the sub-vectors as
3930  // scalars and extracting them.
3931  const RegisterBank &DstRB =
3932  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3933  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3934  Register Dst = I.getOperand(OpIdx).getReg();
3935  MachineInstr *Extract =
3936  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3937  if (!Extract)
3938  return false;
3939  }
3940  I.eraseFromParent();
3941  return true;
3942 }
3943 
3946  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3947  "unexpected opcode");
3948 
3949  // TODO: Handle unmerging into GPRs and from scalars to scalars.
3950  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3951  AArch64::FPRRegBankID ||
3952  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3953  AArch64::FPRRegBankID) {
3954  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3955  "currently unsupported.\n");
3956  return false;
3957  }
3958 
3959  // The last operand is the vector source register, and every other operand is
3960  // a register to unpack into.
3961  unsigned NumElts = I.getNumOperands() - 1;
3962  Register SrcReg = I.getOperand(NumElts).getReg();
3963  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3964  const LLT WideTy = MRI.getType(SrcReg);
3965  (void)WideTy;
3966  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3967  "can only unmerge from vector or s128 types!");
3968  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3969  "source register size too small!");
3970 
3971  if (!NarrowTy.isScalar())
3972  return selectSplitVectorUnmerge(I, MRI);
3973 
3974  MachineIRBuilder MIB(I);
3975 
3976  // Choose a lane copy opcode and subregister based off of the size of the
3977  // vector's elements.
3978  unsigned CopyOpc = 0;
3979  unsigned ExtractSubReg = 0;
3980  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3981  return false;
3982 
3983  // Set up for the lane copies.
3984  MachineBasicBlock &MBB = *I.getParent();
3985 
3986  // Stores the registers we'll be copying from.
3987  SmallVector<Register, 4> InsertRegs;
3988 
3989  // We'll use the first register twice, so we only need NumElts-1 registers.
3990  unsigned NumInsertRegs = NumElts - 1;
3991 
3992  // If our elements fit into exactly 128 bits, then we can copy from the source
3993  // directly. Otherwise, we need to do a bit of setup with some subregister
3994  // inserts.
3995  if (NarrowTy.getSizeInBits() * NumElts == 128) {
3996  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3997  } else {
3998  // No. We have to perform subregister inserts. For each insert, create an
3999  // implicit def and a subregister insert, and save the register we create.
4000  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4001  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4002  MachineInstr &ImpDefMI =
4003  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4004  ImpDefReg);
4005 
4006  // Now, create the subregister insert from SrcReg.
4007  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4008  MachineInstr &InsMI =
4009  *BuildMI(MBB, I, I.getDebugLoc(),
4010  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4011  .addUse(ImpDefReg)
4012  .addUse(SrcReg)
4013  .addImm(AArch64::dsub);
4014 
4015  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4017 
4018  // Save the register so that we can copy from it after.
4019  InsertRegs.push_back(InsertReg);
4020  }
4021  }
4022 
4023  // Now that we've created any necessary subregister inserts, we can
4024  // create the copies.
4025  //
4026  // Perform the first copy separately as a subregister copy.
4027  Register CopyTo = I.getOperand(0).getReg();
4028  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4029  .addReg(InsertRegs[0], 0, ExtractSubReg);
4030  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4031 
4032  // Now, perform the remaining copies as vector lane copies.
4033  unsigned LaneIdx = 1;
4034  for (Register InsReg : InsertRegs) {
4035  Register CopyTo = I.getOperand(LaneIdx).getReg();
4036  MachineInstr &CopyInst =
4037  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4038  .addUse(InsReg)
4039  .addImm(LaneIdx);
4040  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4041  ++LaneIdx;
4042  }
4043 
4044  // Separately constrain the first copy's destination. Because of the
4045  // limitation in constrainOperandRegClass, we can't guarantee that this will
4046  // actually be constrained. So, do it ourselves using the second operand.
4047  const TargetRegisterClass *RC =
4048  MRI.getRegClassOrNull(I.getOperand(1).getReg());
4049  if (!RC) {
4050  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4051  return false;
4052  }
4053 
4054  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4055  I.eraseFromParent();
4056  return true;
4057 }
4058 
4059 bool AArch64InstructionSelector::selectConcatVectors(
4061  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4062  "Unexpected opcode");
4063  Register Dst = I.getOperand(0).getReg();
4064  Register Op1 = I.getOperand(1).getReg();
4065  Register Op2 = I.getOperand(2).getReg();
4066  MachineIRBuilder MIRBuilder(I);
4067  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
4068  if (!ConcatMI)
4069  return false;
4070  I.eraseFromParent();
4071  return true;
4072 }
4073 
4074 unsigned
4075 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4076  MachineFunction &MF) const {
4077  Type *CPTy = CPVal->getType();
4078  Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4079 
4081  return MCP->getConstantPoolIndex(CPVal, Alignment);
4082 }
4083 
4084 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4085  const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4086  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
4087 
4088  auto Adrp =
4089  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4090  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4091 
4092  MachineInstr *LoadMI = nullptr;
4093  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
4094  case 16:
4095  LoadMI =
4096  &*MIRBuilder
4097  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4098  .addConstantPoolIndex(CPIdx, 0,
4100  break;
4101  case 8:
4102  LoadMI =
4103  &*MIRBuilder
4104  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4105  .addConstantPoolIndex(CPIdx, 0,
4107  break;
4108  case 4:
4109  LoadMI =
4110  &*MIRBuilder
4111  .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4112  .addConstantPoolIndex(CPIdx, 0,
4114  break;
4115  default:
4116  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4117  << *CPVal->getType());
4118  return nullptr;
4119  }
4121  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4122  return LoadMI;
4123 }
4124 
4125 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4126 /// size and RB.
4127 static std::pair<unsigned, unsigned>
4128 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4129  unsigned Opc, SubregIdx;
4130  if (RB.getID() == AArch64::GPRRegBankID) {
4131  if (EltSize == 16) {
4132  Opc = AArch64::INSvi16gpr;
4133  SubregIdx = AArch64::ssub;
4134  } else if (EltSize == 32) {
4135  Opc = AArch64::INSvi32gpr;
4136  SubregIdx = AArch64::ssub;
4137  } else if (EltSize == 64) {
4138  Opc = AArch64::INSvi64gpr;
4139  SubregIdx = AArch64::dsub;
4140  } else {
4141  llvm_unreachable("invalid elt size!");
4142  }
4143  } else {
4144  if (EltSize == 8) {
4145  Opc = AArch64::INSvi8lane;
4146  SubregIdx = AArch64::bsub;
4147  } else if (EltSize == 16) {
4148  Opc = AArch64::INSvi16lane;
4149  SubregIdx = AArch64::hsub;
4150  } else if (EltSize == 32) {
4151  Opc = AArch64::INSvi32lane;
4152  SubregIdx = AArch64::ssub;
4153  } else if (EltSize == 64) {
4154  Opc = AArch64::INSvi64lane;
4155  SubregIdx = AArch64::dsub;
4156  } else {
4157  llvm_unreachable("invalid elt size!");
4158  }
4159  }
4160  return std::make_pair(Opc, SubregIdx);
4161 }
4162 
4163 MachineInstr *AArch64InstructionSelector::emitInstr(
4164  unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4165  std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4166  const ComplexRendererFns &RenderFns) const {
4167  assert(Opcode && "Expected an opcode?");
4168  assert(!isPreISelGenericOpcode(Opcode) &&
4169  "Function should only be used to produce selected instructions!");
4170  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4171  if (RenderFns)
4172  for (auto &Fn : *RenderFns)
4173  Fn(MI);
4175  return &*MI;
4176 }
4177 
4178 MachineInstr *AArch64InstructionSelector::emitAddSub(
4179  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4180  Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4181  MachineIRBuilder &MIRBuilder) const {
4182  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4183  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4184  auto Ty = MRI.getType(LHS.getReg());
4185  assert(!Ty.isVector() && "Expected a scalar or pointer?");
4186  unsigned Size = Ty.getSizeInBits();
4187  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4188  bool Is32Bit = Size == 32;
4189 
4190  // INSTRri form with positive arithmetic immediate.
4191  if (auto Fns = selectArithImmed(RHS))
4192  return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4193  MIRBuilder, Fns);
4194 
4195  // INSTRri form with negative arithmetic immediate.
4196  if (auto Fns = selectNegArithImmed(RHS))
4197  return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4198  MIRBuilder, Fns);
4199 
4200  // INSTRrx form.
4201  if (auto Fns = selectArithExtendedRegister(RHS))
4202  return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4203  MIRBuilder, Fns);
4204 
4205  // INSTRrs form.
4206  if (auto Fns = selectShiftedRegister(RHS))
4207  return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4208  MIRBuilder, Fns);
4209  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4210  MIRBuilder);
4211 }
4212 
4213 MachineInstr *
4214 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4215  MachineOperand &RHS,
4216  MachineIRBuilder &MIRBuilder) const {
4217  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4218  {{AArch64::ADDXri, AArch64::ADDWri},
4219  {AArch64::ADDXrs, AArch64::ADDWrs},
4220  {AArch64::ADDXrr, AArch64::ADDWrr},
4221  {AArch64::SUBXri, AArch64::SUBWri},
4222  {AArch64::ADDXrx, AArch64::ADDWrx}}};
4223  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4224 }
4225 
4226 MachineInstr *
4227 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4228  MachineOperand &RHS,
4229  MachineIRBuilder &MIRBuilder) const {
4230  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4231  {{AArch64::ADDSXri, AArch64::ADDSWri},
4232  {AArch64::ADDSXrs, AArch64::ADDSWrs},
4233  {AArch64::ADDSXrr, AArch64::ADDSWrr},
4234  {AArch64::SUBSXri, AArch64::SUBSWri},
4235  {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4236  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4237 }
4238 
4239 MachineInstr *
4240 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4241  MachineOperand &RHS,
4242  MachineIRBuilder &MIRBuilder) const {
4243  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4244  {{AArch64::SUBSXri, AArch64::SUBSWri},
4245  {AArch64::SUBSXrs, AArch64::SUBSWrs},
4246  {AArch64::SUBSXrr, AArch64::SUBSWrr},
4247  {AArch64::ADDSXri, AArch64::ADDSWri},
4248  {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4249  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4250 }
4251 
4252 MachineInstr *
4253 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4254  MachineIRBuilder &MIRBuilder) const {
4255  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4256  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4257  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4258  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4259 }
4260 
4261 MachineInstr *
4262 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4263  MachineIRBuilder &MIRBuilder) const {
4264  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4265  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4266  LLT Ty = MRI.getType(LHS.getReg());
4267  unsigned RegSize = Ty.getSizeInBits();
4268  bool Is32Bit = (RegSize == 32);
4269  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4270  {AArch64::ANDSXrs, AArch64::ANDSWrs},
4271  {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4272  // ANDS needs a logical immediate for its immediate form. Check if we can
4273  // fold one in.
4274  if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4275  int64_t Imm = ValAndVReg->Value.getSExtValue();
4276 
4277  if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4278  auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4279  TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4280  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4281  return &*TstMI;
4282  }
4283  }
4284 
4285  if (auto Fns = selectLogicalShiftedRegister(RHS))
4286  return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4287  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4288 }
4289 
4290 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4292  MachineIRBuilder &MIRBuilder) const {
4293  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4294  assert(Predicate.isPredicate() && "Expected predicate?");
4295  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4296  LLT CmpTy = MRI.getType(LHS.getReg());
4297  assert(!CmpTy.isVector() && "Expected scalar or pointer");
4298  unsigned Size = CmpTy.getSizeInBits();
4299  (void)Size;
4300  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4301  // Fold the compare into a cmn or tst if possible.
4302  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4303  return FoldCmp;
4304  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4305  return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4306 }
4307 
4308 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4309  Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4310  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4311 #ifndef NDEBUG
4312  LLT Ty = MRI.getType(Dst);
4313  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4314  "Expected a 32-bit scalar register?");
4315 #endif
4316  const Register ZeroReg = AArch64::WZR;
4317  auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4318  auto CSet =
4319  MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4320  .addImm(getInvertedCondCode(CC));
4322  return &*CSet;
4323  };
4324 
4325  AArch64CC::CondCode CC1, CC2;
4326  changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4327  if (CC2 == AArch64CC::AL)
4328  return EmitCSet(Dst, CC1);
4329 
4330  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4331  Register Def1Reg = MRI.createVirtualRegister(RC);
4332  Register Def2Reg = MRI.createVirtualRegister(RC);
4333  EmitCSet(Def1Reg, CC1);
4334  EmitCSet(Def2Reg, CC2);
4335  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4337  return &*OrMI;
4338 }
4339 
4340 MachineInstr *
4341 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4342  MachineIRBuilder &MIRBuilder,
4343  Optional<CmpInst::Predicate> Pred) const {
4344  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4345  LLT Ty = MRI.getType(LHS);
4346  if (Ty.isVector())
4347  return nullptr;
4348  unsigned OpSize = Ty.getSizeInBits();
4349  if (OpSize != 32 && OpSize != 64)
4350  return nullptr;
4351 
4352  // If this is a compare against +0.0, then we don't have
4353  // to explicitly materialize a constant.
4354  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4355  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4356 
4357  auto IsEqualityPred = [](CmpInst::Predicate P) {
4358  return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4359  P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4360  };
4361  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4362  // Try commutating the operands.
4363  const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4364  if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4365  ShouldUseImm = true;
4366  std::swap(LHS, RHS);
4367  }
4368  }
4369  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4370  {AArch64::FCMPSri, AArch64::FCMPDri}};
4371  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4372 
4373  // Partially build the compare. Decide if we need to add a use for the
4374  // third operand based off whether or not we're comparing against 0.0.
4375  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4376  if (!ShouldUseImm)
4377  CmpMI.addUse(RHS);
4378  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4379  return &*CmpMI;
4380 }
4381 
4382 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4383  Optional<Register> Dst, Register Op1, Register Op2,
4384  MachineIRBuilder &MIRBuilder) const {
4385  // We implement a vector concat by:
4386  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4387  // 2. Insert the upper vector into the destination's upper element
4388  // TODO: some of this code is common with G_BUILD_VECTOR handling.
4389  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4390 
4391  const LLT Op1Ty = MRI.getType(Op1);
4392  const LLT Op2Ty = MRI.getType(Op2);
4393 
4394  if (Op1Ty != Op2Ty) {
4395  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4396  return nullptr;
4397  }
4398  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4399 
4400  if (Op1Ty.getSizeInBits() >= 128) {
4401  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4402  return nullptr;
4403  }
4404 
4405  // At the moment we just support 64 bit vector concats.
4406  if (Op1Ty.getSizeInBits() != 64) {
4407  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4408  return nullptr;
4409  }
4410 
4411  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4412  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4413  const TargetRegisterClass *DstRC =
4414  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4415 
4416  MachineInstr *WidenedOp1 =
4417  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4418  MachineInstr *WidenedOp2 =
4419  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4420  if (!WidenedOp1 || !WidenedOp2) {
4421  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4422  return nullptr;
4423  }
4424 
4425  // Now do the insert of the upper element.
4426  unsigned InsertOpc, InsSubRegIdx;
4427  std::tie(InsertOpc, InsSubRegIdx) =
4428  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4429 
4430  if (!Dst)
4431  Dst = MRI.createVirtualRegister(DstRC);
4432  auto InsElt =
4433  MIRBuilder
4434  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4435  .addImm(1) /* Lane index */
4436  .addUse(WidenedOp2->getOperand(0).getReg())
4437  .addImm(0);
4438  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4439  return &*InsElt;
4440 }
4441 
4442 MachineInstr *
4443 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4444  MachineIRBuilder &MIRBuilder,
4445  Register SrcReg) const {
4446  // CSINC increments the result when the predicate is false. Invert it.
4448  CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4449  auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4450  .addImm(InvCC);
4452  return &*I;
4453 }
4454 
4455 std::pair<MachineInstr *, AArch64CC::CondCode>
4456 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4457  MachineOperand &LHS,
4458  MachineOperand &RHS,
4459  MachineIRBuilder &MIRBuilder) const {
4460  switch (Opcode) {
4461  default:
4462  llvm_unreachable("Unexpected opcode!");
4463  case TargetOpcode::G_SADDO:
4464  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4465  case TargetOpcode::G_UADDO:
4466  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4467  case TargetOpcode::G_SSUBO:
4468  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4469  case TargetOpcode::G_USUBO:
4470  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4471  }
4472 }
4473 
4474 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4475  MachineIRBuilder MIB(I);
4476  MachineRegisterInfo &MRI = *MIB.getMRI();
4477  // We want to recognize this pattern:
4478  //
4479  // $z = G_FCMP pred, $x, $y
4480  // ...
4481  // $w = G_SELECT $z, $a, $b
4482  //
4483  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4484  // some copies/truncs in between.)
4485  //
4486  // If we see this, then we can emit something like this:
4487  //
4488  // fcmp $x, $y
4489  // fcsel $w, $a, $b, pred
4490  //
4491  // Rather than emitting both of the rather long sequences in the standard
4492  // G_FCMP/G_SELECT select methods.
4493 
4494  // First, check if the condition is defined by a compare.
4495  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4496  while (CondDef) {
4497  // We can only fold if all of the defs have one use.
4498  Register CondDefReg = CondDef->getOperand(0).getReg();
4499  if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4500  // Unless it's another select.
4501  for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4502  if (CondDef == &UI)
4503  continue;
4504  if (UI.getOpcode() != TargetOpcode::G_SELECT)
4505  return false;
4506  }
4507  }
4508 
4509  // We can skip over G_TRUNC since the condition is 1-bit.
4510  // Truncating/extending can have no impact on the value.
4511  unsigned Opc = CondDef->getOpcode();
4512  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4513  break;
4514 
4515  // Can't see past copies from physregs.
4516  if (Opc == TargetOpcode::COPY &&
4517  Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4518  return false;
4519 
4520  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4521  }
4522 
4523  // Is the condition defined by a compare?
4524  if (!CondDef)
4525  return false;
4526 
4527  unsigned CondOpc = CondDef->getOpcode();
4528  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4529  return false;
4530 
4532  if (CondOpc == TargetOpcode::G_ICMP) {
4533  auto Pred =
4534  static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4536  emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4537  CondDef->getOperand(1), MIB);
4538  } else {
4539  // Get the condition code for the select.
4540  auto Pred =
4541  static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4542  AArch64CC::CondCode CondCode2;
4543  changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4544 
4545  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4546  // instructions to emit the comparison.
4547  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4548  // unnecessary.
4549  if (CondCode2 != AArch64CC::AL)
4550  return false;
4551 
4552  if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4553  CondDef->getOperand(3).getReg(), MIB)) {
4554  LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4555  return false;
4556  }
4557  }
4558 
4559  // Emit the select.
4560  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4561  I.getOperand(3).getReg(), CondCode, MIB);
4562  I.eraseFromParent();
4563  return true;
4564 }
4565 
4566 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4568  MachineIRBuilder &MIRBuilder) const {
4569  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4570  "Unexpected MachineOperand");
4571  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4572  // We want to find this sort of thing:
4573  // x = G_SUB 0, y
4574  // G_ICMP z, x
4575  //
4576  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4577  // e.g:
4578  //
4579  // cmn z, y
4580 
4581  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4582  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4583  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4584  auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4585  // Given this:
4586  //
4587  // x = G_SUB 0, y
4588  // G_ICMP x, z
4589  //
4590  // Produce this:
4591  //
4592  // cmn y, z
4593  if (isCMN(LHSDef, P, MRI))
4594  return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4595 
4596  // Same idea here, but with the RHS of the compare instead:
4597  //
4598  // Given this:
4599  //
4600  // x = G_SUB 0, y
4601  // G_ICMP z, x
4602  //
4603  // Produce this:
4604  //
4605  // cmn z, y
4606  if (isCMN(RHSDef, P, MRI))
4607  return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4608 
4609  // Given this:
4610  //
4611  // z = G_AND x, y
4612  // G_ICMP z, 0
4613  //
4614  // Produce this if the compare is signed:
4615  //
4616  // tst x, y
4617  if (!CmpInst::isUnsigned(P) && LHSDef &&
4618  LHSDef->getOpcode() == TargetOpcode::G_AND) {
4619  // Make sure that the RHS is 0.
4620  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4621  if (!ValAndVReg || ValAndVReg->Value != 0)
4622  return nullptr;
4623 
4624  return emitTST(LHSDef->getOperand(1),
4625  LHSDef->getOperand(2), MIRBuilder);
4626  }
4627 
4628  return nullptr;
4629 }
4630 
4631 bool AArch64InstructionSelector::selectShuffleVector(
4633  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4634  Register Src1Reg = I.getOperand(1).getReg();
4635  const LLT Src1Ty = MRI.getType(Src1Reg);
4636  Register Src2Reg = I.getOperand(2).getReg();
4637  const LLT Src2Ty = MRI.getType(Src2Reg);
4638  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4639 
4640  MachineBasicBlock &MBB = *I.getParent();
4641  MachineFunction &MF = *MBB.getParent();
4642  LLVMContext &Ctx = MF.getFunction().getContext();
4643 
4644  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4645  // it's originated from a <1 x T> type. Those should have been lowered into
4646  // G_BUILD_VECTOR earlier.
4647  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4648  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4649  return false;
4650  }
4651 
4652  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4653 
4655  for (int Val : Mask) {
4656  // For now, any undef indexes we'll just assume to be 0. This should be
4657  // optimized in future, e.g. to select DUP etc.
4658  Val = Val < 0 ? 0 : Val;
4659  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4660  unsigned Offset = Byte + Val * BytesPerElt;
4661  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4662  }
4663  }
4664 
4665  MachineIRBuilder MIRBuilder(I);
4666 
4667  // Use a constant pool to load the index vector for TBL.
4668  Constant *CPVal = ConstantVector::get(CstIdxs);
4669  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4670  if (!IndexLoad) {
4671  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4672  return false;
4673  }
4674 
4675  if (DstTy.getSizeInBits() != 128) {
4676  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4677  // This case can be done with TBL1.
4678  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4679  if (!Concat) {
4680  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4681  return false;
4682  }
4683 
4684  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4685  IndexLoad =
4686  emitScalarToVector(64, &AArch64::FPR128RegClass,
4687  IndexLoad->getOperand(0).getReg(), MIRBuilder);
4688 
4689  auto TBL1 = MIRBuilder.buildInstr(
4690  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4691  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4693 
4694  auto Copy =
4695  MIRBuilder
4696  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4697  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4698  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4699  I.eraseFromParent();
4700  return true;
4701  }
4702 
4703  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4704  // Q registers for regalloc.
4705  auto RegSeq = MIRBuilder
4706  .buildInstr(TargetOpcode::REG_SEQUENCE,
4707  {&AArch64::QQRegClass}, {Src1Reg})
4708  .addImm(AArch64::qsub0)
4709  .addUse(Src2Reg)
4710  .addImm(AArch64::qsub1);
4711 
4712  auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4713  {RegSeq, IndexLoad->getOperand(0)});
4714  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4716  I.eraseFromParent();
4717  return true;
4718 }
4719 
4720 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4721  Optional<Register> DstReg, Register SrcReg, Register EltReg,
4722  unsigned LaneIdx, const RegisterBank &RB,
4723  MachineIRBuilder &MIRBuilder) const {
4724  MachineInstr *InsElt = nullptr;
4725  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4726  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4727 
4728  // Create a register to define with the insert if one wasn't passed in.
4729  if (!DstReg)
4730  DstReg = MRI.createVirtualRegister(DstRC);
4731 
4732  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4733  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4734 
4735  if (RB.getID() == AArch64::FPRRegBankID) {
4736  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4737  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4738  .addImm(LaneIdx)
4739  .addUse(InsSub->getOperand(0).getReg())
4740  .addImm(0);
4741  } else {
4742  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4743  .addImm(LaneIdx)
4744  .addUse(EltReg);
4745  }
4746 
4747  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4748  return InsElt;
4749 }
4750 
4751 bool AArch64InstructionSelector::selectInsertElt(
4753  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4754 
4755  // Get information on the destination.
4756  Register DstReg = I.getOperand(0).getReg();
4757  const LLT DstTy = MRI.getType(DstReg);
4758  unsigned VecSize = DstTy.getSizeInBits();
4759 
4760  // Get information on the element we want to insert into the destination.
4761  Register EltReg = I.getOperand(2).getReg();
4762  const LLT EltTy = MRI.getType(EltReg);
4763  unsigned EltSize = EltTy.getSizeInBits();
4764  if (EltSize < 16 || EltSize > 64)
4765  return false; // Don't support all element types yet.
4766 
4767  // Find the definition of the index. Bail out if it's not defined by a
4768  // G_CONSTANT.
4769  Register IdxReg = I.getOperand(3).getReg();
4770  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4771  if (!VRegAndVal)
4772  return false;
4773  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4774 
4775  // Perform the lane insert.
4776  Register SrcReg = I.getOperand(1).getReg();
4777  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4778  MachineIRBuilder MIRBuilder(I);
4779 
4780  if (VecSize < 128) {
4781  // If the vector we're inserting into is smaller than 128 bits, widen it
4782  // to 128 to do the insert.
4783  MachineInstr *ScalarToVec = emitScalarToVector(
4784  VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4785  if (!ScalarToVec)
4786  return false;
4787  SrcReg = ScalarToVec->getOperand(0).getReg();
4788  }
4789 
4790  // Create an insert into a new FPR128 register.
4791  // Note that if our vector is already 128 bits, we end up emitting an extra
4792  // register.
4793  MachineInstr *InsMI =
4794  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4795 
4796  if (VecSize < 128) {
4797  // If we had to widen to perform the insert, then we have to demote back to
4798  // the original size to get the result we want.
4799  Register DemoteVec = InsMI->getOperand(0).getReg();
4800  const TargetRegisterClass *RC =
4801  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4802  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4803  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4804  return false;
4805  }
4806  unsigned SubReg = 0;
4807  if (!getSubRegForClass(RC, TRI, SubReg))
4808  return false;
4809  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4810  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4811  << "\n");
4812  return false;
4813  }
4814  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4815  .addReg(DemoteVec, 0, SubReg);
4816  RBI.constrainGenericRegister(DstReg, *RC, MRI);
4817  } else {
4818  // No widening needed.
4819  InsMI->getOperand(0).setReg(DstReg);
4820  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4821  }
4822 
4823  I.eraseFromParent();
4824  return true;
4825 }
4826 
4827 MachineInstr *
4828 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
4829  MachineIRBuilder &MIRBuilder,
4830  MachineRegisterInfo &MRI) const {
4831  LLT DstTy = MRI.getType(Dst);
4832  unsigned DstSize = DstTy.getSizeInBits();
4833  if (CV->isNullValue()) {
4834  if (DstSize == 128) {
4835  auto Mov =
4836  MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
4838  return &*Mov;
4839  }
4840 
4841  if (DstSize == 64) {
4842  auto Mov =
4843  MIRBuilder
4844  .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4845  .addImm(0);
4846  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
4847  .addReg(Mov.getReg(0), 0, AArch64::dsub);
4848  RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
4849  return &*Copy;
4850  }
4851  }
4852 
4853  auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
4854  if (!CPLoad) {
4855  LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
4856  return nullptr;
4857  }
4858 
4859  auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
4861  Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
4862  return &*Copy;
4863 }
4864 
4865 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4866  MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4867  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4868  unsigned DstSize = DstTy.getSizeInBits();
4869  assert(DstSize <= 128 && "Unexpected build_vec type!");
4870  if (DstSize < 32)
4871  return false;
4872  // Check if we're building a constant vector, in which case we want to
4873  // generate a constant pool load instead of a vector insert sequence.
4875  for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4876  // Try to find G_CONSTANT or G_FCONSTANT
4877  auto *OpMI =
4878  getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4879  if (OpMI)
4880  Csts.emplace_back(
4881  const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4882  else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4883  I.getOperand(Idx).getReg(), MRI)))
4884  Csts.emplace_back(
4885  const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4886  else
4887  return false;
4888  }
4889  Constant *CV = ConstantVector::get(Csts);
4890  MachineIRBuilder MIB(I);
4891  if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
4892  return false;
4893  I.eraseFromParent();
4894  return true;
4895 }
4896 
4897 bool AArch64InstructionSelector::selectBuildVector(
4899  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4900  // Until we port more of the optimized selections, for now just use a vector
4901  // insert sequence.
4902  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4903  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4904  unsigned EltSize = EltTy.getSizeInBits();
4905 
4906  if (tryOptConstantBuildVec(I, DstTy, MRI))
4907  return true;
4908  if (EltSize < 16 || EltSize > 64)
4909  return false; // Don't support all element types yet.
4910  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4911  MachineIRBuilder MIRBuilder(I);
4912 
4913  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4914  MachineInstr *ScalarToVec =
4915  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4916  I.getOperand(1).getReg(), MIRBuilder);
4917  if (!ScalarToVec)
4918  return false;
4919 
4920  Register DstVec = ScalarToVec->getOperand(0).getReg();
4921  unsigned DstSize = DstTy.getSizeInBits();
4922 
4923  // Keep track of the last MI we inserted. Later on, we might be able to save
4924  // a copy using it.
4925  MachineInstr *PrevMI = nullptr;
4926  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4927  // Note that if we don't do a subregister copy, we can end up making an
4928  // extra register.
4929  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4930  MIRBuilder);
4931  DstVec = PrevMI->getOperand(0).getReg();
4932  }
4933 
4934  // If DstTy's size in bits is less than 128, then emit a subregister copy
4935  // from DstVec to the last register we've defined.
4936  if (DstSize < 128) {
4937  // Force this to be FPR using the destination vector.
4938  const TargetRegisterClass *RC =
4939  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4940  if (!RC)
4941  return false;
4942  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4943  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4944  return false;
4945  }
4946 
4947  unsigned SubReg = 0;
4948  if (!getSubRegForClass(RC, TRI, SubReg))
4949  return false;
4950  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4951  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4952  << "\n");
4953  return false;
4954  }
4955 
4957  Register DstReg = I.getOperand(0).getReg();
4958 
4959  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4960  .addReg(DstVec, 0, SubReg);
4961  MachineOperand &RegOp = I.getOperand(1);
4962  RegOp.setReg(Reg);
4963  RBI.constrainGenericRegister(DstReg, *RC, MRI);
4964  } else {
4965  // We don't need a subregister copy. Save a copy by re-using the
4966  // destination register on the final insert.
4967  assert(PrevMI && "PrevMI was null?");
4968  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4969  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4970  }
4971 
4972  I.eraseFromParent();
4973  return true;
4974 }
4975 
4976 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4977 /// ID if it exists, and 0 otherwise.
4978 static unsigned findIntrinsicID(MachineInstr &I) {
4979  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4980  return Op.isIntrinsicID();
4981  });
4982  if (IntrinOp == I.operands_end())
4983  return 0;
4984  return IntrinOp->getIntrinsicID();
4985 }
4986 
4987 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4989  // Find the intrinsic ID.
4990  unsigned IntrinID = findIntrinsicID(I);
4991  if (!IntrinID)
4992  return false;
4993  MachineIRBuilder MIRBuilder(I);
4994 
4995  // Select the instruction.
4996  switch (IntrinID) {
4997  default:
4998  return false;
4999  case Intrinsic::trap:
5000  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5001  break;
5002  case Intrinsic::debugtrap:
5003  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5004  break;
5005  case Intrinsic::ubsantrap:
5006  MIRBuilder.buildInstr(AArch64::BRK, {}, {})
5007  .addImm(I.getOperand(1).getImm() | ('U' << 8));
5008  break;
5009  }
5010 
5011  I.eraseFromParent();
5012  return true;
5013 }
5014 
5015 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5017  unsigned IntrinID = findIntrinsicID(I);
5018  if (!IntrinID)
5019  return false;
5020  MachineIRBuilder MIRBuilder(I);
5021 
5022  switch (IntrinID) {
5023  default:
5024  break;
5025  case Intrinsic::aarch64_crypto_sha1h: {
5026  Register DstReg = I.getOperand(0).getReg();
5027  Regist