LLVM  15.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
23 #include "llvm/ADT/Optional.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Debug.h"
50 
51 #define DEBUG_TYPE "aarch64-isel"
52 
53 using namespace llvm;
54 using namespace MIPatternMatch;
55 using namespace AArch64GISelUtils;
56 
57 namespace llvm {
58 class BlockFrequencyInfo;
59 class ProfileSummaryInfo;
60 }
61 
62 namespace {
63 
64 #define GET_GLOBALISEL_PREDICATE_BITSET
65 #include "AArch64GenGlobalISel.inc"
66 #undef GET_GLOBALISEL_PREDICATE_BITSET
67 
68 
69 class AArch64InstructionSelector : public InstructionSelector {
70 public:
71  AArch64InstructionSelector(const AArch64TargetMachine &TM,
72  const AArch64Subtarget &STI,
73  const AArch64RegisterBankInfo &RBI);
74 
75  bool select(MachineInstr &I) override;
76  static const char *getName() { return DEBUG_TYPE; }
77 
78  void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79  CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80  BlockFrequencyInfo *BFI) override {
81  InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82  MIB.setMF(MF);
83 
84  // hasFnAttribute() is expensive to call on every BRCOND selection, so
85  // cache it here for each run of the selector.
86  ProduceNonFlagSettingCondBr =
87  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88  MFReturnAddr = Register();
89 
90  processPHIs(MF);
91  }
92 
93 private:
94  /// tblgen-erated 'select' implementation, used as the initial selector for
95  /// the patterns that don't require complex C++.
96  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97 
98  // A lowering phase that runs before any selection attempts.
99  // Returns true if the instruction was modified.
100  bool preISelLower(MachineInstr &I);
101 
102  // An early selection function that runs before the selectImpl() call.
103  bool earlySelect(MachineInstr &I);
104 
105  // Do some preprocessing of G_PHIs before we begin selection.
106  void processPHIs(MachineFunction &MF);
107 
108  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109 
110  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111  bool contractCrossBankCopyIntoStore(MachineInstr &I,
113 
114  bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115 
116  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117  MachineRegisterInfo &MRI) const;
118  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119  MachineRegisterInfo &MRI) const;
120 
121  ///@{
122  /// Helper functions for selectCompareBranch.
123  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124  MachineIRBuilder &MIB) const;
125  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126  MachineIRBuilder &MIB) const;
127  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128  MachineIRBuilder &MIB) const;
129  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130  MachineBasicBlock *DstMBB,
131  MachineIRBuilder &MIB) const;
132  ///@}
133 
134  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
136 
137  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139 
140  // Helper to generate an equivalent of scalar_to_vector into a new register,
141  // returned via 'Dst'.
142  MachineInstr *emitScalarToVector(unsigned EltSize,
143  const TargetRegisterClass *DstRC,
144  Register Scalar,
145  MachineIRBuilder &MIRBuilder) const;
146 
147  /// Emit a lane insert into \p DstReg, or a new vector register if None is
148  /// provided.
149  ///
150  /// The lane inserted into is defined by \p LaneIdx. The vector source
151  /// register is given by \p SrcReg. The register containing the element is
152  /// given by \p EltReg.
153  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154  Register EltReg, unsigned LaneIdx,
155  const RegisterBank &RB,
156  MachineIRBuilder &MIRBuilder) const;
157 
158  /// Emit a sequence of instructions representing a constant \p CV for a
159  /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160  ///
161  /// \returns the last instruction in the sequence on success, and nullptr
162  /// otherwise.
163  MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164  MachineIRBuilder &MIRBuilder,
166 
167  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168  bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
170  /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171  /// SUBREG_TO_REG.
172  bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
176 
177  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180  bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181 
182  /// Helper function to select vector load intrinsics like
183  /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184  /// \p Opc is the opcode that the selected instruction should use.
185  /// \p NumVecs is the number of vector destinations for the instruction.
186  /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187  bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188  MachineInstr &I);
189  bool selectIntrinsicWithSideEffects(MachineInstr &I,
191  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199  bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200  bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201 
202  unsigned emitConstantPoolEntry(const Constant *CPVal,
203  MachineFunction &MF) const;
204  MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205  MachineIRBuilder &MIRBuilder) const;
206 
207  // Emit a vector concat operation.
208  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209  Register Op2,
210  MachineIRBuilder &MIRBuilder) const;
211 
212  // Emit an integer compare between LHS and RHS, which checks for Predicate.
213  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
215  MachineIRBuilder &MIRBuilder) const;
216 
217  /// Emit a floating point comparison between \p LHS and \p RHS.
218  /// \p Pred if given is the intended predicate to use.
219  MachineInstr *emitFPCompare(Register LHS, Register RHS,
220  MachineIRBuilder &MIRBuilder,
222 
223  MachineInstr *emitInstr(unsigned Opcode,
224  std::initializer_list<llvm::DstOp> DstOps,
225  std::initializer_list<llvm::SrcOp> SrcOps,
226  MachineIRBuilder &MIRBuilder,
227  const ComplexRendererFns &RenderFns = None) const;
228  /// Helper function to emit an add or sub instruction.
229  ///
230  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231  /// in a specific order.
232  ///
233  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234  ///
235  /// \code
236  /// const std::array<std::array<unsigned, 2>, 4> Table {
237  /// {{AArch64::ADDXri, AArch64::ADDWri},
238  /// {AArch64::ADDXrs, AArch64::ADDWrs},
239  /// {AArch64::ADDXrr, AArch64::ADDWrr},
240  /// {AArch64::SUBXri, AArch64::SUBWri},
241  /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242  /// \endcode
243  ///
244  /// Each row in the table corresponds to a different addressing mode. Each
245  /// column corresponds to a different register size.
246  ///
247  /// \attention Rows must be structured as follows:
248  /// - Row 0: The ri opcode variants
249  /// - Row 1: The rs opcode variants
250  /// - Row 2: The rr opcode variants
251  /// - Row 3: The ri opcode variants for negative immediates
252  /// - Row 4: The rx opcode variants
253  ///
254  /// \attention Columns must be structured as follows:
255  /// - Column 0: The 64-bit opcode variants
256  /// - Column 1: The 32-bit opcode variants
257  ///
258  /// \p Dst is the destination register of the binop to emit.
259  /// \p LHS is the left-hand operand of the binop to emit.
260  /// \p RHS is the right-hand operand of the binop to emit.
261  MachineInstr *emitAddSub(
262  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
264  MachineIRBuilder &MIRBuilder) const;
265  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
267  MachineIRBuilder &MIRBuilder) const;
269  MachineIRBuilder &MIRBuilder) const;
271  MachineIRBuilder &MIRBuilder) const;
273  MachineIRBuilder &MIRBuilder) const;
275  MachineIRBuilder &MIRBuilder) const;
276  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
278  MachineIRBuilder &MIRBuilder) const;
279  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280  const RegisterBank &DstRB, LLT ScalarTy,
281  Register VecReg, unsigned LaneIdx,
282  MachineIRBuilder &MIRBuilder) const;
283  MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284  AArch64CC::CondCode Pred,
285  MachineIRBuilder &MIRBuilder) const;
286  /// Emit a CSet for a FP compare.
287  ///
288  /// \p Dst is expected to be a 32-bit scalar register.
289  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290  MachineIRBuilder &MIRBuilder) const;
291 
292  /// Emit the overflow op for \p Opcode.
293  ///
294  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295  /// G_USUBO, etc.
296  std::pair<MachineInstr *, AArch64CC::CondCode>
297  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299 
300  /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301  /// In some cases this is even possible with OR operations in the expression.
303  MachineIRBuilder &MIB) const;
307  AArch64CC::CondCode OutCC,
308  MachineIRBuilder &MIB) const;
310  bool Negate, Register CCOp,
312  MachineIRBuilder &MIB) const;
313 
314  /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315  /// \p IsNegative is true if the test should be "not zero".
316  /// This will also optimize the test bit instruction when possible.
317  MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318  MachineBasicBlock *DstMBB,
319  MachineIRBuilder &MIB) const;
320 
321  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323  MachineBasicBlock *DestMBB,
324  MachineIRBuilder &MIB) const;
325 
326  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327  // We use these manually instead of using the importer since it doesn't
328  // support SDNodeXForm.
329  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333 
334  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337 
338  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339  unsigned Size) const;
340 
341  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342  return selectAddrModeUnscaled(Root, 1);
343  }
344  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345  return selectAddrModeUnscaled(Root, 2);
346  }
347  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348  return selectAddrModeUnscaled(Root, 4);
349  }
350  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351  return selectAddrModeUnscaled(Root, 8);
352  }
353  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354  return selectAddrModeUnscaled(Root, 16);
355  }
356 
357  /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358  /// from complex pattern matchers like selectAddrModeIndexed().
359  ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360  MachineRegisterInfo &MRI) const;
361 
362  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363  unsigned Size) const;
364  template <int Width>
365  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366  return selectAddrModeIndexed(Root, Width / 8);
367  }
368 
369  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370  const MachineRegisterInfo &MRI) const;
371  ComplexRendererFns
372  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373  unsigned SizeInBytes) const;
374 
375  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376  /// or not a shift + extend should be folded into an addressing mode. Returns
377  /// None when this is not profitable or possible.
378  ComplexRendererFns
379  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380  MachineOperand &Offset, unsigned SizeInBytes,
381  bool WantsExt) const;
382  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384  unsigned SizeInBytes) const;
385  template <int Width>
386  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387  return selectAddrModeXRO(Root, Width / 8);
388  }
389 
390  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391  unsigned SizeInBytes) const;
392  template <int Width>
393  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394  return selectAddrModeWRO(Root, Width / 8);
395  }
396 
397  ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398  bool AllowROR = false) const;
399 
400  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401  return selectShiftedRegister(Root);
402  }
403 
404  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405  return selectShiftedRegister(Root, true);
406  }
407 
408  /// Given an extend instruction, determine the correct shift-extend type for
409  /// that instruction.
410  ///
411  /// If the instruction is going to be used in a load or store, pass
412  /// \p IsLoadStore = true.
414  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415  bool IsLoadStore = false) const;
416 
417  /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418  ///
419  /// \returns Either \p Reg if no change was necessary, or the new register
420  /// created by moving \p Reg.
421  ///
422  /// Note: This uses emitCopy right now.
423  Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424  MachineIRBuilder &MIB) const;
425 
426  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427 
428  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429  int OpIdx = -1) const;
430  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431  int OpIdx = -1) const;
432  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433  int OpIdx = -1) const;
434  void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435  int OpIdx = -1) const;
436  void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437  int OpIdx = -1) const;
438  void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439  int OpIdx = -1) const;
440  void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441  const MachineInstr &MI,
442  int OpIdx = -1) const;
443 
444  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445  void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446 
447  // Optimization methods.
448  bool tryOptSelect(GSelect &Sel);
449  bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
452  MachineIRBuilder &MIRBuilder) const;
453 
454  /// Return true if \p MI is a load or store of \p NumBytes bytes.
455  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456 
457  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458  /// register zeroed out. In other words, the result of MI has been explicitly
459  /// zero extended.
460  bool isDef32(const MachineInstr &MI) const;
461 
462  const AArch64TargetMachine &TM;
463  const AArch64Subtarget &STI;
464  const AArch64InstrInfo &TII;
465  const AArch64RegisterInfo &TRI;
466  const AArch64RegisterBankInfo &RBI;
467 
468  bool ProduceNonFlagSettingCondBr = false;
469 
470  // Some cached values used during selection.
471  // We use LR as a live-in register, and we keep track of it here as it can be
472  // clobbered by calls.
473  Register MFReturnAddr;
474 
475  MachineIRBuilder MIB;
476 
477 #define GET_GLOBALISEL_PREDICATES_DECL
478 #include "AArch64GenGlobalISel.inc"
479 #undef GET_GLOBALISEL_PREDICATES_DECL
480 
481 // We declare the temporaries used by selectImpl() in the class to minimize the
482 // cost of constructing placeholder values.
483 #define GET_GLOBALISEL_TEMPORARIES_DECL
484 #include "AArch64GenGlobalISel.inc"
485 #undef GET_GLOBALISEL_TEMPORARIES_DECL
486 };
487 
488 } // end anonymous namespace
489 
490 #define GET_GLOBALISEL_IMPL
491 #include "AArch64GenGlobalISel.inc"
492 #undef GET_GLOBALISEL_IMPL
493 
494 AArch64InstructionSelector::AArch64InstructionSelector(
495  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496  const AArch64RegisterBankInfo &RBI)
497  : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498  RBI(RBI),
500 #include "AArch64GenGlobalISel.inc"
503 #include "AArch64GenGlobalISel.inc"
505 {
506 }
507 
508 // FIXME: This should be target-independent, inferred from the types declared
509 // for each class in the bank.
510 //
511 /// Given a register bank, and a type, return the smallest register class that
512 /// can represent that combination.
513 static const TargetRegisterClass *
514 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515  bool GetAllRegSet = false) {
516  if (RB.getID() == AArch64::GPRRegBankID) {
517  if (Ty.getSizeInBits() <= 32)
518  return GetAllRegSet ? &AArch64::GPR32allRegClass
519  : &AArch64::GPR32RegClass;
520  if (Ty.getSizeInBits() == 64)
521  return GetAllRegSet ? &AArch64::GPR64allRegClass
522  : &AArch64::GPR64RegClass;
523  if (Ty.getSizeInBits() == 128)
524  return &AArch64::XSeqPairsClassRegClass;
525  return nullptr;
526  }
527 
528  if (RB.getID() == AArch64::FPRRegBankID) {
529  switch (Ty.getSizeInBits()) {
530  case 8:
531  return &AArch64::FPR8RegClass;
532  case 16:
533  return &AArch64::FPR16RegClass;
534  case 32:
535  return &AArch64::FPR32RegClass;
536  case 64:
537  return &AArch64::FPR64RegClass;
538  case 128:
539  return &AArch64::FPR128RegClass;
540  }
541  return nullptr;
542  }
543 
544  return nullptr;
545 }
546 
547 /// Given a register bank, and size in bits, return the smallest register class
548 /// that can represent that combination.
549 static const TargetRegisterClass *
550 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551  bool GetAllRegSet = false) {
552  unsigned RegBankID = RB.getID();
553 
554  if (RegBankID == AArch64::GPRRegBankID) {
555  if (SizeInBits <= 32)
556  return GetAllRegSet ? &AArch64::GPR32allRegClass
557  : &AArch64::GPR32RegClass;
558  if (SizeInBits == 64)
559  return GetAllRegSet ? &AArch64::GPR64allRegClass
560  : &AArch64::GPR64RegClass;
561  if (SizeInBits == 128)
562  return &AArch64::XSeqPairsClassRegClass;
563  }
564 
565  if (RegBankID == AArch64::FPRRegBankID) {
566  switch (SizeInBits) {
567  default:
568  return nullptr;
569  case 8:
570  return &AArch64::FPR8RegClass;
571  case 16:
572  return &AArch64::FPR16RegClass;
573  case 32:
574  return &AArch64::FPR32RegClass;
575  case 64:
576  return &AArch64::FPR64RegClass;
577  case 128:
578  return &AArch64::FPR128RegClass;
579  }
580  }
581 
582  return nullptr;
583 }
584 
585 /// Returns the correct subregister to use for a given register class.
587  const TargetRegisterInfo &TRI, unsigned &SubReg) {
588  switch (TRI.getRegSizeInBits(*RC)) {
589  case 8:
590  SubReg = AArch64::bsub;
591  break;
592  case 16:
593  SubReg = AArch64::hsub;
594  break;
595  case 32:
596  if (RC != &AArch64::FPR32RegClass)
597  SubReg = AArch64::sub_32;
598  else
599  SubReg = AArch64::ssub;
600  break;
601  case 64:
602  SubReg = AArch64::dsub;
603  break;
604  default:
605  LLVM_DEBUG(
606  dbgs() << "Couldn't find appropriate subregister for register class.");
607  return false;
608  }
609 
610  return true;
611 }
612 
613 /// Returns the minimum size the given register bank can hold.
614 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615  switch (RB.getID()) {
616  case AArch64::GPRRegBankID:
617  return 32;
618  case AArch64::FPRRegBankID:
619  return 8;
620  default:
621  llvm_unreachable("Tried to get minimum size for unknown register bank.");
622  }
623 }
624 
625 /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626 /// Helper function for functions like createDTuple and createQTuple.
627 ///
628 /// \p RegClassIDs - The list of register class IDs available for some tuple of
629 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630 /// expected to contain between 2 and 4 tuple classes.
631 ///
632 /// \p SubRegs - The list of subregister classes associated with each register
633 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634 /// subregister class. The index of each subregister class is expected to
635 /// correspond with the index of each register class.
636 ///
637 /// \returns Either the destination register of REG_SEQUENCE instruction that
638 /// was created, or the 0th element of \p Regs if \p Regs contains a single
639 /// element.
641  const unsigned RegClassIDs[],
642  const unsigned SubRegs[], MachineIRBuilder &MIB) {
643  unsigned NumRegs = Regs.size();
644  if (NumRegs == 1)
645  return Regs[0];
646  assert(NumRegs >= 2 && NumRegs <= 4 &&
647  "Only support between two and 4 registers in a tuple!");
649  auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650  auto RegSequence =
651  MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652  for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653  RegSequence.addUse(Regs[I]);
654  RegSequence.addImm(SubRegs[I]);
655  }
656  return RegSequence.getReg(0);
657 }
658 
659 /// Create a tuple of D-registers using the registers in \p Regs.
661  static const unsigned RegClassIDs[] = {
662  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664  AArch64::dsub2, AArch64::dsub3};
665  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666 }
667 
668 /// Create a tuple of Q-registers using the registers in \p Regs.
670  static const unsigned RegClassIDs[] = {
671  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673  AArch64::qsub2, AArch64::qsub3};
674  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675 }
676 
678  auto &MI = *Root.getParent();
679  auto &MBB = *MI.getParent();
680  auto &MF = *MBB.getParent();
681  auto &MRI = MF.getRegInfo();
682  uint64_t Immed;
683  if (Root.isImm())
684  Immed = Root.getImm();
685  else if (Root.isCImm())
686  Immed = Root.getCImm()->getZExtValue();
687  else if (Root.isReg()) {
688  auto ValAndVReg =
690  if (!ValAndVReg)
691  return None;
692  Immed = ValAndVReg->Value.getSExtValue();
693  } else
694  return None;
695  return Immed;
696 }
697 
698 /// Check whether \p I is a currently unsupported binary operation:
699 /// - it has an unsized type
700 /// - an operand is not a vreg
701 /// - all operands are not in the same bank
702 /// These are checks that should someday live in the verifier, but right now,
703 /// these are mostly limitations of the aarch64 selector.
704 static bool unsupportedBinOp(const MachineInstr &I,
705  const AArch64RegisterBankInfo &RBI,
706  const MachineRegisterInfo &MRI,
707  const AArch64RegisterInfo &TRI) {
708  LLT Ty = MRI.getType(I.getOperand(0).getReg());
709  if (!Ty.isValid()) {
710  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
711  return true;
712  }
713 
714  const RegisterBank *PrevOpBank = nullptr;
715  for (auto &MO : I.operands()) {
716  // FIXME: Support non-register operands.
717  if (!MO.isReg()) {
718  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
719  return true;
720  }
721 
722  // FIXME: Can generic operations have physical registers operands? If
723  // so, this will need to be taught about that, and we'll need to get the
724  // bank out of the minimal class for the register.
725  // Either way, this needs to be documented (and possibly verified).
726  if (!Register::isVirtualRegister(MO.getReg())) {
727  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
728  return true;
729  }
730 
731  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732  if (!OpBank) {
733  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
734  return true;
735  }
736 
737  if (PrevOpBank && OpBank != PrevOpBank) {
738  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
739  return true;
740  }
741  PrevOpBank = OpBank;
742  }
743  return false;
744 }
745 
746 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748 /// and of size \p OpSize.
749 /// \returns \p GenericOpc if the combination is unsupported.
750 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751  unsigned OpSize) {
752  switch (RegBankID) {
753  case AArch64::GPRRegBankID:
754  if (OpSize == 32) {
755  switch (GenericOpc) {
756  case TargetOpcode::G_SHL:
757  return AArch64::LSLVWr;
758  case TargetOpcode::G_LSHR:
759  return AArch64::LSRVWr;
760  case TargetOpcode::G_ASHR:
761  return AArch64::ASRVWr;
762  default:
763  return GenericOpc;
764  }
765  } else if (OpSize == 64) {
766  switch (GenericOpc) {
767  case TargetOpcode::G_PTR_ADD:
768  return AArch64::ADDXrr;
769  case TargetOpcode::G_SHL:
770  return AArch64::LSLVXr;
771  case TargetOpcode::G_LSHR:
772  return AArch64::LSRVXr;
773  case TargetOpcode::G_ASHR:
774  return AArch64::ASRVXr;
775  default:
776  return GenericOpc;
777  }
778  }
779  break;
780  case AArch64::FPRRegBankID:
781  switch (OpSize) {
782  case 32:
783  switch (GenericOpc) {
784  case TargetOpcode::G_FADD:
785  return AArch64::FADDSrr;
786  case TargetOpcode::G_FSUB:
787  return AArch64::FSUBSrr;
788  case TargetOpcode::G_FMUL:
789  return AArch64::FMULSrr;
790  case TargetOpcode::G_FDIV:
791  return AArch64::FDIVSrr;
792  default:
793  return GenericOpc;
794  }
795  case 64:
796  switch (GenericOpc) {
797  case TargetOpcode::G_FADD:
798  return AArch64::FADDDrr;
799  case TargetOpcode::G_FSUB:
800  return AArch64::FSUBDrr;
801  case TargetOpcode::G_FMUL:
802  return AArch64::FMULDrr;
803  case TargetOpcode::G_FDIV:
804  return AArch64::FDIVDrr;
805  case TargetOpcode::G_OR:
806  return AArch64::ORRv8i8;
807  default:
808  return GenericOpc;
809  }
810  }
811  break;
812  }
813  return GenericOpc;
814 }
815 
816 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817 /// appropriate for the (value) register bank \p RegBankID and of memory access
818 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
819 /// addressing mode (e.g., LDRXui).
820 /// \returns \p GenericOpc if the combination is unsupported.
821 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822  unsigned OpSize) {
823  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824  switch (RegBankID) {
825  case AArch64::GPRRegBankID:
826  switch (OpSize) {
827  case 8:
828  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829  case 16:
830  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831  case 32:
832  return isStore ? AArch64::STRWui : AArch64::LDRWui;
833  case 64:
834  return isStore ? AArch64::STRXui : AArch64::LDRXui;
835  }
836  break;
837  case AArch64::FPRRegBankID:
838  switch (OpSize) {
839  case 8:
840  return isStore ? AArch64::STRBui : AArch64::LDRBui;
841  case 16:
842  return isStore ? AArch64::STRHui : AArch64::LDRHui;
843  case 32:
844  return isStore ? AArch64::STRSui : AArch64::LDRSui;
845  case 64:
846  return isStore ? AArch64::STRDui : AArch64::LDRDui;
847  case 128:
848  return isStore ? AArch64::STRQui : AArch64::LDRQui;
849  }
850  break;
851  }
852  return GenericOpc;
853 }
854 
855 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856 /// to \p *To.
857 ///
858 /// E.g "To = COPY SrcReg:SubReg"
860  const RegisterBankInfo &RBI, Register SrcReg,
861  const TargetRegisterClass *To, unsigned SubReg) {
862  assert(SrcReg.isValid() && "Expected a valid source register?");
863  assert(To && "Destination register class cannot be null");
864  assert(SubReg && "Expected a valid subregister");
865 
866  MachineIRBuilder MIB(I);
867  auto SubRegCopy =
868  MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869  MachineOperand &RegOp = I.getOperand(1);
870  RegOp.setReg(SubRegCopy.getReg(0));
871 
872  // It's possible that the destination register won't be constrained. Make
873  // sure that happens.
874  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
875  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876 
877  return true;
878 }
879 
880 /// Helper function to get the source and destination register classes for a
881 /// copy. Returns a std::pair containing the source register class for the
882 /// copy, and the destination register class for the copy. If a register class
883 /// cannot be determined, then it will be nullptr.
884 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
887  const RegisterBankInfo &RBI) {
888  Register DstReg = I.getOperand(0).getReg();
889  Register SrcReg = I.getOperand(1).getReg();
890  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894 
895  // Special casing for cross-bank copies of s1s. We can technically represent
896  // a 1-bit value with any size of register. The minimum size for a GPR is 32
897  // bits. So, we need to put the FPR on 32 bits as well.
898  //
899  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900  // then we can pull it into the helpers that get the appropriate class for a
901  // register bank. Or make a new helper that carries along some constraint
902  // information.
903  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904  SrcSize = DstSize = 32;
905 
906  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907  getMinClassForRegBank(DstRegBank, DstSize, true)};
908 }
909 
912  const RegisterBankInfo &RBI) {
913  Register DstReg = I.getOperand(0).getReg();
914  Register SrcReg = I.getOperand(1).getReg();
915  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
916  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
917 
918  // Find the correct register classes for the source and destination registers.
919  const TargetRegisterClass *SrcRC;
920  const TargetRegisterClass *DstRC;
921  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
922 
923  if (!DstRC) {
924  LLVM_DEBUG(dbgs() << "Unexpected dest size "
925  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
926  return false;
927  }
928 
929  // Is this a copy? If so, then we may need to insert a subregister copy.
930  if (I.isCopy()) {
931  // Yes. Check if there's anything to fix up.
932  if (!SrcRC) {
933  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
934  return false;
935  }
936 
937  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
938  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
939  unsigned SubReg;
940 
941  // If the source bank doesn't support a subregister copy small enough,
942  // then we first need to copy to the destination bank.
943  if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
944  const TargetRegisterClass *DstTempRC =
945  getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
946  getSubRegForClass(DstRC, TRI, SubReg);
947 
948  MachineIRBuilder MIB(I);
949  auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
950  copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
951  } else if (SrcSize > DstSize) {
952  // If the source register is bigger than the destination we need to
953  // perform a subregister copy.
954  const TargetRegisterClass *SubRegRC =
955  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
956  getSubRegForClass(SubRegRC, TRI, SubReg);
957  copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
958  } else if (DstSize > SrcSize) {
959  // If the destination register is bigger than the source we need to do
960  // a promotion using SUBREG_TO_REG.
961  const TargetRegisterClass *PromotionRC =
962  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
963  getSubRegForClass(SrcRC, TRI, SubReg);
964 
965  Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
966  BuildMI(*I.getParent(), I, I.getDebugLoc(),
967  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
968  .addImm(0)
969  .addUse(SrcReg)
970  .addImm(SubReg);
971  MachineOperand &RegOp = I.getOperand(1);
972  RegOp.setReg(PromoteReg);
973  }
974 
975  // If the destination is a physical register, then there's nothing to
976  // change, so we're done.
977  if (Register::isPhysicalRegister(DstReg))
978  return true;
979  }
980 
981  // No need to constrain SrcReg. It will get constrained when we hit another
982  // of its use or its defs. Copies do not have constraints.
983  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
984  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
985  << " operand\n");
986  return false;
987  }
988 
989  // If this a GPR ZEXT that we want to just reduce down into a copy.
990  // The sizes will be mismatched with the source < 32b but that's ok.
991  if (I.getOpcode() == TargetOpcode::G_ZEXT) {
992  I.setDesc(TII.get(AArch64::COPY));
993  assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
994  return selectCopy(I, TII, MRI, TRI, RBI);
995  }
996 
997  I.setDesc(TII.get(AArch64::COPY));
998  return true;
999 }
1000 
1001 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1002  if (!DstTy.isScalar() || !SrcTy.isScalar())
1003  return GenericOpc;
1004 
1005  const unsigned DstSize = DstTy.getSizeInBits();
1006  const unsigned SrcSize = SrcTy.getSizeInBits();
1007 
1008  switch (DstSize) {
1009  case 32:
1010  switch (SrcSize) {
1011  case 32:
1012  switch (GenericOpc) {
1013  case TargetOpcode::G_SITOFP:
1014  return AArch64::SCVTFUWSri;
1015  case TargetOpcode::G_UITOFP:
1016  return AArch64::UCVTFUWSri;
1017  case TargetOpcode::G_FPTOSI:
1018  return AArch64::FCVTZSUWSr;
1019  case TargetOpcode::G_FPTOUI:
1020  return AArch64::FCVTZUUWSr;
1021  default:
1022  return GenericOpc;
1023  }
1024  case 64:
1025  switch (GenericOpc) {
1026  case TargetOpcode::G_SITOFP:
1027  return AArch64::SCVTFUXSri;
1028  case TargetOpcode::G_UITOFP:
1029  return AArch64::UCVTFUXSri;
1030  case TargetOpcode::G_FPTOSI:
1031  return AArch64::FCVTZSUWDr;
1032  case TargetOpcode::G_FPTOUI:
1033  return AArch64::FCVTZUUWDr;
1034  default:
1035  return GenericOpc;
1036  }
1037  default:
1038  return GenericOpc;
1039  }
1040  case 64:
1041  switch (SrcSize) {
1042  case 32:
1043  switch (GenericOpc) {
1044  case TargetOpcode::G_SITOFP:
1045  return AArch64::SCVTFUWDri;
1046  case TargetOpcode::G_UITOFP:
1047  return AArch64::UCVTFUWDri;
1048  case TargetOpcode::G_FPTOSI:
1049  return AArch64::FCVTZSUXSr;
1050  case TargetOpcode::G_FPTOUI:
1051  return AArch64::FCVTZUUXSr;
1052  default:
1053  return GenericOpc;
1054  }
1055  case 64:
1056  switch (GenericOpc) {
1057  case TargetOpcode::G_SITOFP:
1058  return AArch64::SCVTFUXDri;
1059  case TargetOpcode::G_UITOFP:
1060  return AArch64::UCVTFUXDri;
1061  case TargetOpcode::G_FPTOSI:
1062  return AArch64::FCVTZSUXDr;
1063  case TargetOpcode::G_FPTOUI:
1064  return AArch64::FCVTZUUXDr;
1065  default:
1066  return GenericOpc;
1067  }
1068  default:
1069  return GenericOpc;
1070  }
1071  default:
1072  return GenericOpc;
1073  };
1074  return GenericOpc;
1075 }
1076 
1077 MachineInstr *
1078 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1079  Register False, AArch64CC::CondCode CC,
1080  MachineIRBuilder &MIB) const {
1081  MachineRegisterInfo &MRI = *MIB.getMRI();
1082  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1083  RBI.getRegBank(True, MRI, TRI)->getID() &&
1084  "Expected both select operands to have the same regbank?");
1085  LLT Ty = MRI.getType(True);
1086  if (Ty.isVector())
1087  return nullptr;
1088  const unsigned Size = Ty.getSizeInBits();
1089  assert((Size == 32 || Size == 64) &&
1090  "Expected 32 bit or 64 bit select only?");
1091  const bool Is32Bit = Size == 32;
1092  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1093  unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1094  auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1095  constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1096  return &*FCSel;
1097  }
1098 
1099  // By default, we'll try and emit a CSEL.
1100  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1101  bool Optimized = false;
1102  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1103  &Optimized](Register &Reg, Register &OtherReg,
1104  bool Invert) {
1105  if (Optimized)
1106  return false;
1107 
1108  // Attempt to fold:
1109  //
1110  // %sub = G_SUB 0, %x
1111  // %select = G_SELECT cc, %reg, %sub
1112  //
1113  // Into:
1114  // %select = CSNEG %reg, %x, cc
1115  Register MatchReg;
1116  if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1117  Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1118  Reg = MatchReg;
1119  if (Invert) {
1121  std::swap(Reg, OtherReg);
1122  }
1123  return true;
1124  }
1125 
1126  // Attempt to fold:
1127  //
1128  // %xor = G_XOR %x, -1
1129  // %select = G_SELECT cc, %reg, %xor
1130  //
1131  // Into:
1132  // %select = CSINV %reg, %x, cc
1133  if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1134  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1135  Reg = MatchReg;
1136  if (Invert) {
1138  std::swap(Reg, OtherReg);
1139  }
1140  return true;
1141  }
1142 
1143  // Attempt to fold:
1144  //
1145  // %add = G_ADD %x, 1
1146  // %select = G_SELECT cc, %reg, %add
1147  //
1148  // Into:
1149  // %select = CSINC %reg, %x, cc
1150  if (mi_match(Reg, MRI,
1151  m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1152  m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1153  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1154  Reg = MatchReg;
1155  if (Invert) {
1157  std::swap(Reg, OtherReg);
1158  }
1159  return true;
1160  }
1161 
1162  return false;
1163  };
1164 
1165  // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1166  // true/false values are constants.
1167  // FIXME: All of these patterns already exist in tablegen. We should be
1168  // able to import these.
1169  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1170  &Optimized]() {
1171  if (Optimized)
1172  return false;
1173  auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1174  auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1175  if (!TrueCst && !FalseCst)
1176  return false;
1177 
1178  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1179  if (TrueCst && FalseCst) {
1180  int64_t T = TrueCst->Value.getSExtValue();
1181  int64_t F = FalseCst->Value.getSExtValue();
1182 
1183  if (T == 0 && F == 1) {
1184  // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1185  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186  True = ZReg;
1187  False = ZReg;
1188  return true;
1189  }
1190 
1191  if (T == 0 && F == -1) {
1192  // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1193  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1194  True = ZReg;
1195  False = ZReg;
1196  return true;
1197  }
1198  }
1199 
1200  if (TrueCst) {
1201  int64_t T = TrueCst->Value.getSExtValue();
1202  if (T == 1) {
1203  // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1204  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1205  True = False;
1206  False = ZReg;
1208  return true;
1209  }
1210 
1211  if (T == -1) {
1212  // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1213  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1214  True = False;
1215  False = ZReg;
1217  return true;
1218  }
1219  }
1220 
1221  if (FalseCst) {
1222  int64_t F = FalseCst->Value.getSExtValue();
1223  if (F == 1) {
1224  // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1225  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1226  False = ZReg;
1227  return true;
1228  }
1229 
1230  if (F == -1) {
1231  // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1232  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1233  False = ZReg;
1234  return true;
1235  }
1236  }
1237  return false;
1238  };
1239 
1240  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1241  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1242  Optimized |= TryOptSelectCst();
1243  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1245  return &*SelectInst;
1246 }
1247 
1249  switch (P) {
1250  default:
1251  llvm_unreachable("Unknown condition code!");
1252  case CmpInst::ICMP_NE:
1253  return AArch64CC::NE;
1254  case CmpInst::ICMP_EQ:
1255  return AArch64CC::EQ;
1256  case CmpInst::ICMP_SGT:
1257  return AArch64CC::GT;
1258  case CmpInst::ICMP_SGE:
1259  return AArch64CC::GE;
1260  case CmpInst::ICMP_SLT:
1261  return AArch64CC::LT;
1262  case CmpInst::ICMP_SLE:
1263  return AArch64CC::LE;
1264  case CmpInst::ICMP_UGT:
1265  return AArch64CC::HI;
1266  case CmpInst::ICMP_UGE:
1267  return AArch64CC::HS;
1268  case CmpInst::ICMP_ULT:
1269  return AArch64CC::LO;
1270  case CmpInst::ICMP_ULE:
1271  return AArch64CC::LS;
1272  }
1273 }
1274 
1275 /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1278  AArch64CC::CondCode &CondCode2) {
1279  CondCode2 = AArch64CC::AL;
1280  switch (CC) {
1281  default:
1282  llvm_unreachable("Unknown FP condition!");
1283  case CmpInst::FCMP_OEQ:
1285  break;
1286  case CmpInst::FCMP_OGT:
1288  break;
1289  case CmpInst::FCMP_OGE:
1291  break;
1292  case CmpInst::FCMP_OLT:
1294  break;
1295  case CmpInst::FCMP_OLE:
1297  break;
1298  case CmpInst::FCMP_ONE:
1300  CondCode2 = AArch64CC::GT;
1301  break;
1302  case CmpInst::FCMP_ORD:
1304  break;
1305  case CmpInst::FCMP_UNO:
1307  break;
1308  case CmpInst::FCMP_UEQ:
1310  CondCode2 = AArch64CC::VS;
1311  break;
1312  case CmpInst::FCMP_UGT:
1314  break;
1315  case CmpInst::FCMP_UGE:
1317  break;
1318  case CmpInst::FCMP_ULT:
1320  break;
1321  case CmpInst::FCMP_ULE:
1323  break;
1324  case CmpInst::FCMP_UNE:
1326  break;
1327  }
1328 }
1329 
1330 /// Convert an IR fp condition code to an AArch64 CC.
1331 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1332 /// should be AND'ed instead of OR'ed.
1335  AArch64CC::CondCode &CondCode2) {
1336  CondCode2 = AArch64CC::AL;
1337  switch (CC) {
1338  default:
1339  changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1340  assert(CondCode2 == AArch64CC::AL);
1341  break;
1342  case CmpInst::FCMP_ONE:
1343  // (a one b)
1344  // == ((a olt b) || (a ogt b))
1345  // == ((a ord b) && (a une b))
1347  CondCode2 = AArch64CC::NE;
1348  break;
1349  case CmpInst::FCMP_UEQ:
1350  // (a ueq b)
1351  // == ((a uno b) || (a oeq b))
1352  // == ((a ule b) && (a uge b))
1354  CondCode2 = AArch64CC::LE;
1355  break;
1356  }
1357 }
1358 
1359 /// Return a register which can be used as a bit to test in a TB(N)Z.
1362  assert(Reg.isValid() && "Expected valid register!");
1363  bool HasZext = false;
1364  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1365  unsigned Opc = MI->getOpcode();
1366 
1367  if (!MI->getOperand(0).isReg() ||
1368  !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1369  break;
1370 
1371  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1372  //
1373  // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1374  // on the truncated x is the same as the bit number on x.
1375  if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1376  Opc == TargetOpcode::G_TRUNC) {
1377  if (Opc == TargetOpcode::G_ZEXT)
1378  HasZext = true;
1379 
1380  Register NextReg = MI->getOperand(1).getReg();
1381  // Did we find something worth folding?
1382  if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1383  break;
1384 
1385  // NextReg is worth folding. Keep looking.
1386  Reg = NextReg;
1387  continue;
1388  }
1389 
1390  // Attempt to find a suitable operation with a constant on one side.
1392  Register TestReg;
1393  switch (Opc) {
1394  default:
1395  break;
1396  case TargetOpcode::G_AND:
1397  case TargetOpcode::G_XOR: {
1398  TestReg = MI->getOperand(1).getReg();
1399  Register ConstantReg = MI->getOperand(2).getReg();
1400  auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1401  if (!VRegAndVal) {
1402  // AND commutes, check the other side for a constant.
1403  // FIXME: Can we canonicalize the constant so that it's always on the
1404  // same side at some point earlier?
1405  std::swap(ConstantReg, TestReg);
1406  VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1407  }
1408  if (VRegAndVal) {
1409  if (HasZext)
1410  C = VRegAndVal->Value.getZExtValue();
1411  else
1412  C = VRegAndVal->Value.getSExtValue();
1413  }
1414  break;
1415  }
1416  case TargetOpcode::G_ASHR:
1417  case TargetOpcode::G_LSHR:
1418  case TargetOpcode::G_SHL: {
1419  TestReg = MI->getOperand(1).getReg();
1420  auto VRegAndVal =
1421  getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1422  if (VRegAndVal)
1423  C = VRegAndVal->Value.getSExtValue();
1424  break;
1425  }
1426  }
1427 
1428  // Didn't find a constant or viable register. Bail out of the loop.
1429  if (!C || !TestReg.isValid())
1430  break;
1431 
1432  // We found a suitable instruction with a constant. Check to see if we can
1433  // walk through the instruction.
1434  Register NextReg;
1435  unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1436  switch (Opc) {
1437  default:
1438  break;
1439  case TargetOpcode::G_AND:
1440  // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1441  if ((*C >> Bit) & 1)
1442  NextReg = TestReg;
1443  break;
1444  case TargetOpcode::G_SHL:
1445  // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1446  // the type of the register.
1447  if (*C <= Bit && (Bit - *C) < TestRegSize) {
1448  NextReg = TestReg;
1449  Bit = Bit - *C;
1450  }
1451  break;
1452  case TargetOpcode::G_ASHR:
1453  // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1454  // in x
1455  NextReg = TestReg;
1456  Bit = Bit + *C;
1457  if (Bit >= TestRegSize)
1458  Bit = TestRegSize - 1;
1459  break;
1460  case TargetOpcode::G_LSHR:
1461  // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1462  if ((Bit + *C) < TestRegSize) {
1463  NextReg = TestReg;
1464  Bit = Bit + *C;
1465  }
1466  break;
1467  case TargetOpcode::G_XOR:
1468  // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1469  // appropriate.
1470  //
1471  // e.g. If x' = xor x, c, and the b-th bit is set in c then
1472  //
1473  // tbz x', b -> tbnz x, b
1474  //
1475  // Because x' only has the b-th bit set if x does not.
1476  if ((*C >> Bit) & 1)
1477  Invert = !Invert;
1478  NextReg = TestReg;
1479  break;
1480  }
1481 
1482  // Check if we found anything worth folding.
1483  if (!NextReg.isValid())
1484  return Reg;
1485  Reg = NextReg;
1486  }
1487 
1488  return Reg;
1489 }
1490 
1491 MachineInstr *AArch64InstructionSelector::emitTestBit(
1492  Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1493  MachineIRBuilder &MIB) const {
1494  assert(TestReg.isValid());
1495  assert(ProduceNonFlagSettingCondBr &&
1496  "Cannot emit TB(N)Z with speculation tracking!");
1497  MachineRegisterInfo &MRI = *MIB.getMRI();
1498 
1499  // Attempt to optimize the test bit by walking over instructions.
1500  TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1501  LLT Ty = MRI.getType(TestReg);
1502  unsigned Size = Ty.getSizeInBits();
1503  assert(!Ty.isVector() && "Expected a scalar!");
1504  assert(Bit < 64 && "Bit is too large!");
1505 
1506  // When the test register is a 64-bit register, we have to narrow to make
1507  // TBNZW work.
1508  bool UseWReg = Bit < 32;
1509  unsigned NecessarySize = UseWReg ? 32 : 64;
1510  if (Size != NecessarySize)
1511  TestReg = moveScalarRegClass(
1512  TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1513  MIB);
1514 
1515  static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1516  {AArch64::TBZW, AArch64::TBNZW}};
1517  unsigned Opc = OpcTable[UseWReg][IsNegative];
1518  auto TestBitMI =
1519  MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1520  constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1521  return &*TestBitMI;
1522 }
1523 
1524 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1525  MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1526  MachineIRBuilder &MIB) const {
1527  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1528  // Given something like this:
1529  //
1530  // %x = ...Something...
1531  // %one = G_CONSTANT i64 1
1532  // %zero = G_CONSTANT i64 0
1533  // %and = G_AND %x, %one
1534  // %cmp = G_ICMP intpred(ne), %and, %zero
1535  // %cmp_trunc = G_TRUNC %cmp
1536  // G_BRCOND %cmp_trunc, %bb.3
1537  //
1538  // We want to try and fold the AND into the G_BRCOND and produce either a
1539  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1540  //
1541  // In this case, we'd get
1542  //
1543  // TBNZ %x %bb.3
1544  //
1545 
1546  // Check if the AND has a constant on its RHS which we can use as a mask.
1547  // If it's a power of 2, then it's the same as checking a specific bit.
1548  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1549  auto MaybeBit = getIConstantVRegValWithLookThrough(
1550  AndInst.getOperand(2).getReg(), *MIB.getMRI());
1551  if (!MaybeBit)
1552  return false;
1553 
1554  int32_t Bit = MaybeBit->Value.exactLogBase2();
1555  if (Bit < 0)
1556  return false;
1557 
1558  Register TestReg = AndInst.getOperand(1).getReg();
1559 
1560  // Emit a TB(N)Z.
1561  emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1562  return true;
1563 }
1564 
1565 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1566  bool IsNegative,
1567  MachineBasicBlock *DestMBB,
1568  MachineIRBuilder &MIB) const {
1569  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1570  MachineRegisterInfo &MRI = *MIB.getMRI();
1571  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1572  AArch64::GPRRegBankID &&
1573  "Expected GPRs only?");
1574  auto Ty = MRI.getType(CompareReg);
1575  unsigned Width = Ty.getSizeInBits();
1576  assert(!Ty.isVector() && "Expected scalar only?");
1577  assert(Width <= 64 && "Expected width to be at most 64?");
1578  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1579  {AArch64::CBNZW, AArch64::CBNZX}};
1580  unsigned Opc = OpcTable[IsNegative][Width == 64];
1581  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1582  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1583  return &*BranchMI;
1584 }
1585 
1586 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1587  MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1588  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1589  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1590  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1591  // totally clean. Some of them require two branches to implement.
1592  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1593  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1594  Pred);
1595  AArch64CC::CondCode CC1, CC2;
1596  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1597  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1598  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1599  if (CC2 != AArch64CC::AL)
1600  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1601  I.eraseFromParent();
1602  return true;
1603 }
1604 
1605 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1606  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1607  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1608  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1609  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1610  //
1611  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1612  // instructions will not be produced, as they are conditional branch
1613  // instructions that do not set flags.
1614  if (!ProduceNonFlagSettingCondBr)
1615  return false;
1616 
1617  MachineRegisterInfo &MRI = *MIB.getMRI();
1618  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1619  auto Pred =
1620  static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1621  Register LHS = ICmp.getOperand(2).getReg();
1622  Register RHS = ICmp.getOperand(3).getReg();
1623 
1624  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1625  auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1626  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1627 
1628  // When we can emit a TB(N)Z, prefer that.
1629  //
1630  // Handle non-commutative condition codes first.
1631  // Note that we don't want to do this when we have a G_AND because it can
1632  // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1633  if (VRegAndVal && !AndInst) {
1634  int64_t C = VRegAndVal->Value.getSExtValue();
1635 
1636  // When we have a greater-than comparison, we can just test if the msb is
1637  // zero.
1638  if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1640  emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1641  I.eraseFromParent();
1642  return true;
1643  }
1644 
1645  // When we have a less than comparison, we can just test if the msb is not
1646  // zero.
1647  if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1649  emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1650  I.eraseFromParent();
1651  return true;
1652  }
1653  }
1654 
1655  // Attempt to handle commutative condition codes. Right now, that's only
1656  // eq/ne.
1657  if (ICmpInst::isEquality(Pred)) {
1658  if (!VRegAndVal) {
1659  std::swap(RHS, LHS);
1661  AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1662  }
1663 
1664  if (VRegAndVal && VRegAndVal->Value == 0) {
1665  // If there's a G_AND feeding into this branch, try to fold it away by
1666  // emitting a TB(N)Z instead.
1667  //
1668  // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1669  // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1670  // would be redundant.
1671  if (AndInst &&
1672  tryOptAndIntoCompareBranch(
1673  *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1674  I.eraseFromParent();
1675  return true;
1676  }
1677 
1678  // Otherwise, try to emit a CB(N)Z instead.
1679  auto LHSTy = MRI.getType(LHS);
1680  if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1681  emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1682  I.eraseFromParent();
1683  return true;
1684  }
1685  }
1686  }
1687 
1688  return false;
1689 }
1690 
1691 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1692  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1693  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1694  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1695  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1696  return true;
1697 
1698  // Couldn't optimize. Emit a compare + a Bcc.
1699  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1700  auto PredOp = ICmp.getOperand(1);
1701  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1703  static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1704  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1705  I.eraseFromParent();
1706  return true;
1707 }
1708 
1709 bool AArch64InstructionSelector::selectCompareBranch(
1711  Register CondReg = I.getOperand(0).getReg();
1712  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1713  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1714  CondReg = CCMI->getOperand(1).getReg();
1715  CCMI = MRI.getVRegDef(CondReg);
1716  }
1717 
1718  // Try to select the G_BRCOND using whatever is feeding the condition if
1719  // possible.
1720  unsigned CCMIOpc = CCMI->getOpcode();
1721  if (CCMIOpc == TargetOpcode::G_FCMP)
1722  return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1723  if (CCMIOpc == TargetOpcode::G_ICMP)
1724  return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1725 
1726  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1727  // instructions will not be produced, as they are conditional branch
1728  // instructions that do not set flags.
1729  if (ProduceNonFlagSettingCondBr) {
1730  emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1731  I.getOperand(1).getMBB(), MIB);
1732  I.eraseFromParent();
1733  return true;
1734  }
1735 
1736  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1737  auto TstMI =
1738  MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1739  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1740  auto Bcc = MIB.buildInstr(AArch64::Bcc)
1742  .addMBB(I.getOperand(1).getMBB());
1743  I.eraseFromParent();
1744  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1745 }
1746 
1747 /// Returns the element immediate value of a vector shift operand if found.
1748 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1751  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1752  MachineInstr *OpMI = MRI.getVRegDef(Reg);
1753  return getAArch64VectorSplatScalar(*OpMI, MRI);
1754 }
1755 
1756 /// Matches and returns the shift immediate value for a SHL instruction given
1757 /// a shift operand.
1760  if (!ShiftImm)
1761  return None;
1762  // Check the immediate is in range for a SHL.
1763  int64_t Imm = *ShiftImm;
1764  if (Imm < 0)
1765  return None;
1766  switch (SrcTy.getElementType().getSizeInBits()) {
1767  default:
1768  LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1769  return None;
1770  case 8:
1771  if (Imm > 7)
1772  return None;
1773  break;
1774  case 16:
1775  if (Imm > 15)
1776  return None;
1777  break;
1778  case 32:
1779  if (Imm > 31)
1780  return None;
1781  break;
1782  case 64:
1783  if (Imm > 63)
1784  return None;
1785  break;
1786  }
1787  return Imm;
1788 }
1789 
1790 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1792  assert(I.getOpcode() == TargetOpcode::G_SHL);
1793  Register DstReg = I.getOperand(0).getReg();
1794  const LLT Ty = MRI.getType(DstReg);
1795  Register Src1Reg = I.getOperand(1).getReg();
1796  Register Src2Reg = I.getOperand(2).getReg();
1797 
1798  if (!Ty.isVector())
1799  return false;
1800 
1801  // Check if we have a vector of constants on RHS that we can select as the
1802  // immediate form.
1803  Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1804 
1805  unsigned Opc = 0;
1806  if (Ty == LLT::fixed_vector(2, 64)) {
1807  Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1808  } else if (Ty == LLT::fixed_vector(4, 32)) {
1809  Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1810  } else if (Ty == LLT::fixed_vector(2, 32)) {
1811  Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1812  } else if (Ty == LLT::fixed_vector(4, 16)) {
1813  Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1814  } else if (Ty == LLT::fixed_vector(8, 16)) {
1815  Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1816  } else if (Ty == LLT::fixed_vector(16, 8)) {
1817  Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1818  } else if (Ty == LLT::fixed_vector(8, 8)) {
1819  Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1820  } else {
1821  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1822  return false;
1823  }
1824 
1825  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1826  if (ImmVal)
1827  Shl.addImm(*ImmVal);
1828  else
1829  Shl.addUse(Src2Reg);
1831  I.eraseFromParent();
1832  return true;
1833 }
1834 
1835 bool AArch64InstructionSelector::selectVectorAshrLshr(
1837  assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1838  I.getOpcode() == TargetOpcode::G_LSHR);
1839  Register DstReg = I.getOperand(0).getReg();
1840  const LLT Ty = MRI.getType(DstReg);
1841  Register Src1Reg = I.getOperand(1).getReg();
1842  Register Src2Reg = I.getOperand(2).getReg();
1843 
1844  if (!Ty.isVector())
1845  return false;
1846 
1847  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1848 
1849  // We expect the immediate case to be lowered in the PostLegalCombiner to
1850  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1851 
1852  // There is not a shift right register instruction, but the shift left
1853  // register instruction takes a signed value, where negative numbers specify a
1854  // right shift.
1855 
1856  unsigned Opc = 0;
1857  unsigned NegOpc = 0;
1858  const TargetRegisterClass *RC =
1859  getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1860  if (Ty == LLT::fixed_vector(2, 64)) {
1861  Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1862  NegOpc = AArch64::NEGv2i64;
1863  } else if (Ty == LLT::fixed_vector(4, 32)) {
1864  Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1865  NegOpc = AArch64::NEGv4i32;
1866  } else if (Ty == LLT::fixed_vector(2, 32)) {
1867  Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1868  NegOpc = AArch64::NEGv2i32;
1869  } else if (Ty == LLT::fixed_vector(4, 16)) {
1870  Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1871  NegOpc = AArch64::NEGv4i16;
1872  } else if (Ty == LLT::fixed_vector(8, 16)) {
1873  Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1874  NegOpc = AArch64::NEGv8i16;
1875  } else if (Ty == LLT::fixed_vector(16, 8)) {
1876  Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1877  NegOpc = AArch64::NEGv16i8;
1878  } else if (Ty == LLT::fixed_vector(8, 8)) {
1879  Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1880  NegOpc = AArch64::NEGv8i8;
1881  } else {
1882  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1883  return false;
1884  }
1885 
1886  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1888  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1890  I.eraseFromParent();
1891  return true;
1892 }
1893 
1894 bool AArch64InstructionSelector::selectVaStartAAPCS(
1896  return false;
1897 }
1898 
1899 bool AArch64InstructionSelector::selectVaStartDarwin(
1902  Register ListReg = I.getOperand(0).getReg();
1903 
1904  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1905 
1906  auto MIB =
1907  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1908  .addDef(ArgsAddrReg)
1909  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1910  .addImm(0)
1911  .addImm(0);
1912 
1914 
1915  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1916  .addUse(ArgsAddrReg)
1917  .addUse(ListReg)
1918  .addImm(0)
1919  .addMemOperand(*I.memoperands_begin());
1920 
1922  I.eraseFromParent();
1923  return true;
1924 }
1925 
1926 void AArch64InstructionSelector::materializeLargeCMVal(
1927  MachineInstr &I, const Value *V, unsigned OpFlags) {
1928  MachineBasicBlock &MBB = *I.getParent();
1929  MachineFunction &MF = *MBB.getParent();
1931 
1932  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1933  MovZ->addOperand(MF, I.getOperand(1));
1934  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1936  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1938 
1939  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1940  Register ForceDstReg) {
1941  Register DstReg = ForceDstReg
1942  ? ForceDstReg
1943  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1944  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1945  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1946  MovI->addOperand(MF, MachineOperand::CreateGA(
1947  GV, MovZ->getOperand(1).getOffset(), Flags));
1948  } else {
1949  MovI->addOperand(
1950  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1951  MovZ->getOperand(1).getOffset(), Flags));
1952  }
1953  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1955  return DstReg;
1956  };
1957  Register DstReg = BuildMovK(MovZ.getReg(0),
1959  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1960  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1961 }
1962 
1963 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1964  MachineBasicBlock &MBB = *I.getParent();
1965  MachineFunction &MF = *MBB.getParent();
1967 
1968  switch (I.getOpcode()) {
1969  case TargetOpcode::G_STORE: {
1970  bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1971  MachineOperand &SrcOp = I.getOperand(0);
1972  if (MRI.getType(SrcOp.getReg()).isPointer()) {
1973  // Allow matching with imported patterns for stores of pointers. Unlike
1974  // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1975  // and constrain.
1976  auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1977  Register NewSrc = Copy.getReg(0);
1978  SrcOp.setReg(NewSrc);
1979  RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1980  Changed = true;
1981  }
1982  return Changed;
1983  }
1984  case TargetOpcode::G_PTR_ADD:
1985  return convertPtrAddToAdd(I, MRI);
1986  case TargetOpcode::G_LOAD: {
1987  // For scalar loads of pointers, we try to convert the dest type from p0
1988  // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1989  // conversion, this should be ok because all users should have been
1990  // selected already, so the type doesn't matter for them.
1991  Register DstReg = I.getOperand(0).getReg();
1992  const LLT DstTy = MRI.getType(DstReg);
1993  if (!DstTy.isPointer())
1994  return false;
1995  MRI.setType(DstReg, LLT::scalar(64));
1996  return true;
1997  }
1998  case AArch64::G_DUP: {
1999  // Convert the type from p0 to s64 to help selection.
2000  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2001  if (!DstTy.getElementType().isPointer())
2002  return false;
2003  auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2004  MRI.setType(I.getOperand(0).getReg(),
2005  DstTy.changeElementType(LLT::scalar(64)));
2006  MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2007  I.getOperand(1).setReg(NewSrc.getReg(0));
2008  return true;
2009  }
2010  case TargetOpcode::G_UITOFP:
2011  case TargetOpcode::G_SITOFP: {
2012  // If both source and destination regbanks are FPR, then convert the opcode
2013  // to G_SITOF so that the importer can select it to an fpr variant.
2014  // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2015  // copy.
2016  Register SrcReg = I.getOperand(1).getReg();
2017  LLT SrcTy = MRI.getType(SrcReg);
2018  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2019  if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2020  return false;
2021 
2022  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2023  if (I.getOpcode() == TargetOpcode::G_SITOFP)
2024  I.setDesc(TII.get(AArch64::G_SITOF));
2025  else
2026  I.setDesc(TII.get(AArch64::G_UITOF));
2027  return true;
2028  }
2029  return false;
2030  }
2031  default:
2032  return false;
2033  }
2034 }
2035 
2036 /// This lowering tries to look for G_PTR_ADD instructions and then converts
2037 /// them to a standard G_ADD with a COPY on the source.
2038 ///
2039 /// The motivation behind this is to expose the add semantics to the imported
2040 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2041 /// because the selector works bottom up, uses before defs. By the time we
2042 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2043 /// fold this into addressing modes and were therefore unsuccessful.
2044 bool AArch64InstructionSelector::convertPtrAddToAdd(
2046  assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2047  Register DstReg = I.getOperand(0).getReg();
2048  Register AddOp1Reg = I.getOperand(1).getReg();
2049  const LLT PtrTy = MRI.getType(DstReg);
2050  if (PtrTy.getAddressSpace() != 0)
2051  return false;
2052 
2053  const LLT CastPtrTy =
2054  PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2055  auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2056  // Set regbanks on the registers.
2057  if (PtrTy.isVector())
2058  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2059  else
2060  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2061 
2062  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2063  // %dst(intty) = G_ADD %intbase, off
2064  I.setDesc(TII.get(TargetOpcode::G_ADD));
2065  MRI.setType(DstReg, CastPtrTy);
2066  I.getOperand(1).setReg(PtrToInt.getReg(0));
2067  if (!select(*PtrToInt)) {
2068  LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2069  return false;
2070  }
2071 
2072  // Also take the opportunity here to try to do some optimization.
2073  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2074  Register NegatedReg;
2075  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2076  return true;
2077  I.getOperand(2).setReg(NegatedReg);
2078  I.setDesc(TII.get(TargetOpcode::G_SUB));
2079  return true;
2080 }
2081 
2082 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2084  // We try to match the immediate variant of LSL, which is actually an alias
2085  // for a special case of UBFM. Otherwise, we fall back to the imported
2086  // selector which will match the register variant.
2087  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2088  const auto &MO = I.getOperand(2);
2089  auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2090  if (!VRegAndVal)
2091  return false;
2092 
2093  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2094  if (DstTy.isVector())
2095  return false;
2096  bool Is64Bit = DstTy.getSizeInBits() == 64;
2097  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2098  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2099 
2100  if (!Imm1Fn || !Imm2Fn)
2101  return false;
2102 
2103  auto NewI =
2104  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2105  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2106 
2107  for (auto &RenderFn : *Imm1Fn)
2108  RenderFn(NewI);
2109  for (auto &RenderFn : *Imm2Fn)
2110  RenderFn(NewI);
2111 
2112  I.eraseFromParent();
2113  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2114 }
2115 
2116 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2118  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2119  // If we're storing a scalar, it doesn't matter what register bank that
2120  // scalar is on. All that matters is the size.
2121  //
2122  // So, if we see something like this (with a 32-bit scalar as an example):
2123  //
2124  // %x:gpr(s32) = ... something ...
2125  // %y:fpr(s32) = COPY %x:gpr(s32)
2126  // G_STORE %y:fpr(s32)
2127  //
2128  // We can fix this up into something like this:
2129  //
2130  // G_STORE %x:gpr(s32)
2131  //
2132  // And then continue the selection process normally.
2133  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2134  if (!DefDstReg.isValid())
2135  return false;
2136  LLT DefDstTy = MRI.getType(DefDstReg);
2137  Register StoreSrcReg = I.getOperand(0).getReg();
2138  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2139 
2140  // If we get something strange like a physical register, then we shouldn't
2141  // go any further.
2142  if (!DefDstTy.isValid())
2143  return false;
2144 
2145  // Are the source and dst types the same size?
2146  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2147  return false;
2148 
2149  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2150  RBI.getRegBank(DefDstReg, MRI, TRI))
2151  return false;
2152 
2153  // We have a cross-bank copy, which is entering a store. Let's fold it.
2154  I.getOperand(0).setReg(DefDstReg);
2155  return true;
2156 }
2157 
2158 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2159  assert(I.getParent() && "Instruction should be in a basic block!");
2160  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2161 
2162  MachineBasicBlock &MBB = *I.getParent();
2163  MachineFunction &MF = *MBB.getParent();
2165 
2166  switch (I.getOpcode()) {
2167  case AArch64::G_DUP: {
2168  // Before selecting a DUP instruction, check if it is better selected as a
2169  // MOV or load from a constant pool.
2170  Register Src = I.getOperand(1).getReg();
2171  auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2172  if (!ValAndVReg)
2173  return false;
2174  LLVMContext &Ctx = MF.getFunction().getContext();
2175  Register Dst = I.getOperand(0).getReg();
2176  auto *CV = ConstantDataVector::getSplat(
2177  MRI.getType(Dst).getNumElements(),
2178  ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2179  ValAndVReg->Value));
2180  if (!emitConstantVector(Dst, CV, MIB, MRI))
2181  return false;
2182  I.eraseFromParent();
2183  return true;
2184  }
2185  case TargetOpcode::G_SEXT:
2186  // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2187  // over a normal extend.
2188  if (selectUSMovFromExtend(I, MRI))
2189  return true;
2190  return false;
2191  case TargetOpcode::G_BR:
2192  return false;
2193  case TargetOpcode::G_SHL:
2194  return earlySelectSHL(I, MRI);
2195  case TargetOpcode::G_CONSTANT: {
2196  bool IsZero = false;
2197  if (I.getOperand(1).isCImm())
2198  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2199  else if (I.getOperand(1).isImm())
2200  IsZero = I.getOperand(1).getImm() == 0;
2201 
2202  if (!IsZero)
2203  return false;
2204 
2205  Register DefReg = I.getOperand(0).getReg();
2206  LLT Ty = MRI.getType(DefReg);
2207  if (Ty.getSizeInBits() == 64) {
2208  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2209  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2210  } else if (Ty.getSizeInBits() == 32) {
2211  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2212  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2213  } else
2214  return false;
2215 
2216  I.setDesc(TII.get(TargetOpcode::COPY));
2217  return true;
2218  }
2219 
2220  case TargetOpcode::G_ADD: {
2221  // Check if this is being fed by a G_ICMP on either side.
2222  //
2223  // (cmp pred, x, y) + z
2224  //
2225  // In the above case, when the cmp is true, we increment z by 1. So, we can
2226  // fold the add into the cset for the cmp by using cinc.
2227  //
2228  // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2229  Register AddDst = I.getOperand(0).getReg();
2230  Register AddLHS = I.getOperand(1).getReg();
2231  Register AddRHS = I.getOperand(2).getReg();
2232  // Only handle scalars.
2233  LLT Ty = MRI.getType(AddLHS);
2234  if (Ty.isVector())
2235  return false;
2236  // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2237  // bits.
2238  unsigned Size = Ty.getSizeInBits();
2239  if (Size != 32 && Size != 64)
2240  return false;
2241  auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2242  if (!MRI.hasOneNonDBGUse(Reg))
2243  return nullptr;
2244  // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2245  // compare.
2246  if (Size == 32)
2247  return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2248  // We model scalar compares using 32-bit destinations right now.
2249  // If it's a 64-bit compare, it'll have 64-bit sources.
2250  Register ZExt;
2251  if (!mi_match(Reg, MRI,
2253  return nullptr;
2254  auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2255  if (!Cmp ||
2256  MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2257  return nullptr;
2258  return Cmp;
2259  };
2260  // Try to match
2261  // z + (cmp pred, x, y)
2262  MachineInstr *Cmp = MatchCmp(AddRHS);
2263  if (!Cmp) {
2264  // (cmp pred, x, y) + z
2265  std::swap(AddLHS, AddRHS);
2266  Cmp = MatchCmp(AddRHS);
2267  if (!Cmp)
2268  return false;
2269  }
2270  auto &PredOp = Cmp->getOperand(1);
2271  auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2272  const AArch64CC::CondCode InvCC =
2273  changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2274  MIB.setInstrAndDebugLoc(I);
2275  emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2276  /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2277  emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2278  I.eraseFromParent();
2279  return true;
2280  }
2281  case TargetOpcode::G_OR: {
2282  // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2283  // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2284  // shifting and masking that we can replace with a BFI (encoded as a BFM).
2285  Register Dst = I.getOperand(0).getReg();
2286  LLT Ty = MRI.getType(Dst);
2287 
2288  if (!Ty.isScalar())
2289  return false;
2290 
2291  unsigned Size = Ty.getSizeInBits();
2292  if (Size != 32 && Size != 64)
2293  return false;
2294 
2295  Register ShiftSrc;
2296  int64_t ShiftImm;
2297  Register MaskSrc;
2298  int64_t MaskImm;
2299  if (!mi_match(
2300  Dst, MRI,
2301  m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2302  m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2303  return false;
2304 
2305  if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2306  return false;
2307 
2308  int64_t Immr = Size - ShiftImm;
2309  int64_t Imms = Size - ShiftImm - 1;
2310  unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2311  emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2312  I.eraseFromParent();
2313  return true;
2314  }
2315  case TargetOpcode::G_FENCE: {
2316  if (I.getOperand(1).getImm() == 0)
2317  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CompilerBarrier))
2318  .addImm(I.getOperand(0).getImm());
2319  else
2320  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::DMB))
2321  .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2322  I.eraseFromParent();
2323  return true;
2324  }
2325  default:
2326  return false;
2327  }
2328 }
2329 
2331  assert(I.getParent() && "Instruction should be in a basic block!");
2332  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2333 
2334  MachineBasicBlock &MBB = *I.getParent();
2335  MachineFunction &MF = *MBB.getParent();
2337 
2338  const AArch64Subtarget *Subtarget =
2339  &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2340  if (Subtarget->requiresStrictAlign()) {
2341  // We don't support this feature yet.
2342  LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2343  return false;
2344  }
2345 
2346  MIB.setInstrAndDebugLoc(I);
2347 
2348  unsigned Opcode = I.getOpcode();
2349  // G_PHI requires same handling as PHI
2350  if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2351  // Certain non-generic instructions also need some special handling.
2352 
2353  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2354  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2355 
2356  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2357  const Register DefReg = I.getOperand(0).getReg();
2358  const LLT DefTy = MRI.getType(DefReg);
2359 
2360  const RegClassOrRegBank &RegClassOrBank =
2361  MRI.getRegClassOrRegBank(DefReg);
2362 
2363  const TargetRegisterClass *DefRC
2364  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2365  if (!DefRC) {
2366  if (!DefTy.isValid()) {
2367  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2368  return false;
2369  }
2370  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2371  DefRC = getRegClassForTypeOnBank(DefTy, RB);
2372  if (!DefRC) {
2373  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2374  return false;
2375  }
2376  }
2377 
2378  I.setDesc(TII.get(TargetOpcode::PHI));
2379 
2380  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2381  }
2382 
2383  if (I.isCopy())
2384  return selectCopy(I, TII, MRI, TRI, RBI);
2385 
2386  return true;
2387  }
2388 
2389 
2390  if (I.getNumOperands() != I.getNumExplicitOperands()) {
2391  LLVM_DEBUG(
2392  dbgs() << "Generic instruction has unexpected implicit operands\n");
2393  return false;
2394  }
2395 
2396  // Try to do some lowering before we start instruction selecting. These
2397  // lowerings are purely transformations on the input G_MIR and so selection
2398  // must continue after any modification of the instruction.
2399  if (preISelLower(I)) {
2400  Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2401  }
2402 
2403  // There may be patterns where the importer can't deal with them optimally,
2404  // but does select it to a suboptimal sequence so our custom C++ selection
2405  // code later never has a chance to work on it. Therefore, we have an early
2406  // selection attempt here to give priority to certain selection routines
2407  // over the imported ones.
2408  if (earlySelect(I))
2409  return true;
2410 
2411  if (selectImpl(I, *CoverageInfo))
2412  return true;
2413 
2414  LLT Ty =
2415  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2416 
2417  switch (Opcode) {
2418  case TargetOpcode::G_SBFX:
2419  case TargetOpcode::G_UBFX: {
2420  static const unsigned OpcTable[2][2] = {
2421  {AArch64::UBFMWri, AArch64::UBFMXri},
2422  {AArch64::SBFMWri, AArch64::SBFMXri}};
2423  bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2424  unsigned Size = Ty.getSizeInBits();
2425  unsigned Opc = OpcTable[IsSigned][Size == 64];
2426  auto Cst1 =
2427  getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2428  assert(Cst1 && "Should have gotten a constant for src 1?");
2429  auto Cst2 =
2430  getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2431  assert(Cst2 && "Should have gotten a constant for src 2?");
2432  auto LSB = Cst1->Value.getZExtValue();
2433  auto Width = Cst2->Value.getZExtValue();
2434  auto BitfieldInst =
2435  MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2436  .addImm(LSB)
2437  .addImm(LSB + Width - 1);
2438  I.eraseFromParent();
2439  return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2440  }
2441  case TargetOpcode::G_BRCOND:
2442  return selectCompareBranch(I, MF, MRI);
2443 
2444  case TargetOpcode::G_BRINDIRECT: {
2445  I.setDesc(TII.get(AArch64::BR));
2446  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2447  }
2448 
2449  case TargetOpcode::G_BRJT:
2450  return selectBrJT(I, MRI);
2451 
2452  case AArch64::G_ADD_LOW: {
2453  // This op may have been separated from it's ADRP companion by the localizer
2454  // or some other code motion pass. Given that many CPUs will try to
2455  // macro fuse these operations anyway, select this into a MOVaddr pseudo
2456  // which will later be expanded into an ADRP+ADD pair after scheduling.
2457  MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2458  if (BaseMI->getOpcode() != AArch64::ADRP) {
2459  I.setDesc(TII.get(AArch64::ADDXri));
2460  I.addOperand(MachineOperand::CreateImm(0));
2461  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2462  }
2463  assert(TM.getCodeModel() == CodeModel::Small &&
2464  "Expected small code model");
2465  auto Op1 = BaseMI->getOperand(1);
2466  auto Op2 = I.getOperand(2);
2467  auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2468  .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2469  Op1.getTargetFlags())
2470  .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2471  Op2.getTargetFlags());
2472  I.eraseFromParent();
2473  return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2474  }
2475 
2476  case TargetOpcode::G_BSWAP: {
2477  // Handle vector types for G_BSWAP directly.
2478  Register DstReg = I.getOperand(0).getReg();
2479  LLT DstTy = MRI.getType(DstReg);
2480 
2481  // We should only get vector types here; everything else is handled by the
2482  // importer right now.
2483  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2484  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2485  return false;
2486  }
2487 
2488  // Only handle 4 and 2 element vectors for now.
2489  // TODO: 16-bit elements.
2490  unsigned NumElts = DstTy.getNumElements();
2491  if (NumElts != 4 && NumElts != 2) {
2492  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2493  return false;
2494  }
2495 
2496  // Choose the correct opcode for the supported types. Right now, that's
2497  // v2s32, v4s32, and v2s64.
2498  unsigned Opc = 0;
2499  unsigned EltSize = DstTy.getElementType().getSizeInBits();
2500  if (EltSize == 32)
2501  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2502  : AArch64::REV32v16i8;
2503  else if (EltSize == 64)
2504  Opc = AArch64::REV64v16i8;
2505 
2506  // We should always get something by the time we get here...
2507  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2508 
2509  I.setDesc(TII.get(Opc));
2510  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2511  }
2512 
2513  case TargetOpcode::G_FCONSTANT:
2514  case TargetOpcode::G_CONSTANT: {
2515  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2516 
2517  const LLT s8 = LLT::scalar(8);
2518  const LLT s16 = LLT::scalar(16);
2519  const LLT s32 = LLT::scalar(32);
2520  const LLT s64 = LLT::scalar(64);
2521  const LLT s128 = LLT::scalar(128);
2522  const LLT p0 = LLT::pointer(0, 64);
2523 
2524  const Register DefReg = I.getOperand(0).getReg();
2525  const LLT DefTy = MRI.getType(DefReg);
2526  const unsigned DefSize = DefTy.getSizeInBits();
2527  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2528 
2529  // FIXME: Redundant check, but even less readable when factored out.
2530  if (isFP) {
2531  if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2532  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2533  << " constant, expected: " << s16 << " or " << s32
2534  << " or " << s64 << " or " << s128 << '\n');
2535  return false;
2536  }
2537 
2538  if (RB.getID() != AArch64::FPRRegBankID) {
2539  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2540  << " constant on bank: " << RB
2541  << ", expected: FPR\n");
2542  return false;
2543  }
2544 
2545  // The case when we have 0.0 is covered by tablegen. Reject it here so we
2546  // can be sure tablegen works correctly and isn't rescued by this code.
2547  // 0.0 is not covered by tablegen for FP128. So we will handle this
2548  // scenario in the code here.
2549  if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2550  return false;
2551  } else {
2552  // s32 and s64 are covered by tablegen.
2553  if (Ty != p0 && Ty != s8 && Ty != s16) {
2554  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2555  << " constant, expected: " << s32 << ", " << s64
2556  << ", or " << p0 << '\n');
2557  return false;
2558  }
2559 
2560  if (RB.getID() != AArch64::GPRRegBankID) {
2561  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2562  << " constant on bank: " << RB
2563  << ", expected: GPR\n");
2564  return false;
2565  }
2566  }
2567 
2568  if (isFP) {
2569  const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2570  // For 16, 64, and 128b values, emit a constant pool load.
2571  switch (DefSize) {
2572  default:
2573  llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2574  case 32:
2575  // For s32, use a cp load if we have optsize/minsize.
2576  if (!shouldOptForSize(&MF))
2577  break;
2579  case 16:
2580  case 64:
2581  case 128: {
2582  auto *FPImm = I.getOperand(1).getFPImm();
2583  auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2584  if (!LoadMI) {
2585  LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2586  return false;
2587  }
2588  MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2589  I.eraseFromParent();
2590  return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2591  }
2592  }
2593 
2594  // Either emit a FMOV, or emit a copy to emit a normal mov.
2595  assert(DefSize == 32 &&
2596  "Expected constant pool loads for all sizes other than 32!");
2597  const Register DefGPRReg =
2598  MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2599  MachineOperand &RegOp = I.getOperand(0);
2600  RegOp.setReg(DefGPRReg);
2601  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2602  MIB.buildCopy({DefReg}, {DefGPRReg});
2603 
2604  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2605  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2606  return false;
2607  }
2608 
2609  MachineOperand &ImmOp = I.getOperand(1);
2610  // FIXME: Is going through int64_t always correct?
2611  ImmOp.ChangeToImmediate(
2613  } else if (I.getOperand(1).isCImm()) {
2614  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2615  I.getOperand(1).ChangeToImmediate(Val);
2616  } else if (I.getOperand(1).isImm()) {
2617  uint64_t Val = I.getOperand(1).getImm();
2618  I.getOperand(1).ChangeToImmediate(Val);
2619  }
2620 
2621  const unsigned MovOpc =
2622  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2623  I.setDesc(TII.get(MovOpc));
2625  return true;
2626  }
2627  case TargetOpcode::G_EXTRACT: {
2628  Register DstReg = I.getOperand(0).getReg();
2629  Register SrcReg = I.getOperand(1).getReg();
2630  LLT SrcTy = MRI.getType(SrcReg);
2631  LLT DstTy = MRI.getType(DstReg);
2632  (void)DstTy;
2633  unsigned SrcSize = SrcTy.getSizeInBits();
2634 
2635  if (SrcTy.getSizeInBits() > 64) {
2636  // This should be an extract of an s128, which is like a vector extract.
2637  if (SrcTy.getSizeInBits() != 128)
2638  return false;
2639  // Only support extracting 64 bits from an s128 at the moment.
2640  if (DstTy.getSizeInBits() != 64)
2641  return false;
2642 
2643  unsigned Offset = I.getOperand(2).getImm();
2644  if (Offset % 64 != 0)
2645  return false;
2646 
2647  // Check we have the right regbank always.
2648  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2649  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2650  assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2651 
2652  if (SrcRB.getID() == AArch64::GPRRegBankID) {
2653  MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2654  .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2655  I.eraseFromParent();
2656  return true;
2657  }
2658 
2659  // Emit the same code as a vector extract.
2660  // Offset must be a multiple of 64.
2661  unsigned LaneIdx = Offset / 64;
2662  MachineInstr *Extract = emitExtractVectorElt(
2663  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2664  if (!Extract)
2665  return false;
2666  I.eraseFromParent();
2667  return true;
2668  }
2669 
2670  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2671  MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2672  Ty.getSizeInBits() - 1);
2673 
2674  if (SrcSize < 64) {
2675  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2676  "unexpected G_EXTRACT types");
2677  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2678  }
2679 
2680  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2681  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2682  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2683  .addReg(DstReg, 0, AArch64::sub_32);
2684  RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2685  AArch64::GPR32RegClass, MRI);
2686  I.getOperand(0).setReg(DstReg);
2687 
2688  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2689  }
2690 
2691  case TargetOpcode::G_INSERT: {
2692  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2693  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2694  unsigned DstSize = DstTy.getSizeInBits();
2695  // Larger inserts are vectors, same-size ones should be something else by
2696  // now (split up or turned into COPYs).
2697  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2698  return false;
2699 
2700  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2701  unsigned LSB = I.getOperand(3).getImm();
2702  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2703  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2704  MachineInstrBuilder(MF, I).addImm(Width - 1);
2705 
2706  if (DstSize < 64) {
2707  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2708  "unexpected G_INSERT types");
2709  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2710  }
2711 
2712  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2713  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2714  TII.get(AArch64::SUBREG_TO_REG))
2715  .addDef(SrcReg)
2716  .addImm(0)
2717  .addUse(I.getOperand(2).getReg())
2718  .addImm(AArch64::sub_32);
2719  RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2720  AArch64::GPR32RegClass, MRI);
2721  I.getOperand(2).setReg(SrcReg);
2722 
2723  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2724  }
2725  case TargetOpcode::G_FRAME_INDEX: {
2726  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2727  if (Ty != LLT::pointer(0, 64)) {
2728  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2729  << ", expected: " << LLT::pointer(0, 64) << '\n');
2730  return false;
2731  }
2732  I.setDesc(TII.get(AArch64::ADDXri));
2733 
2734  // MOs for a #0 shifted immediate.
2735  I.addOperand(MachineOperand::CreateImm(0));
2736  I.addOperand(MachineOperand::CreateImm(0));
2737 
2738  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2739  }
2740 
2741  case TargetOpcode::G_GLOBAL_VALUE: {
2742  auto GV = I.getOperand(1).getGlobal();
2743  if (GV->isThreadLocal())
2744  return selectTLSGlobalValue(I, MRI);
2745 
2746  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2747  if (OpFlags & AArch64II::MO_GOT) {
2748  I.setDesc(TII.get(AArch64::LOADgot));
2749  I.getOperand(1).setTargetFlags(OpFlags);
2750  } else if (TM.getCodeModel() == CodeModel::Large) {
2751  // Materialize the global using movz/movk instructions.
2752  materializeLargeCMVal(I, GV, OpFlags);
2753  I.eraseFromParent();
2754  return true;
2755  } else if (TM.getCodeModel() == CodeModel::Tiny) {
2756  I.setDesc(TII.get(AArch64::ADR));
2757  I.getOperand(1).setTargetFlags(OpFlags);
2758  } else {
2759  I.setDesc(TII.get(AArch64::MOVaddr));
2760  I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2761  MachineInstrBuilder MIB(MF, I);
2762  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2764  }
2765  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2766  }
2767 
2768  case TargetOpcode::G_ZEXTLOAD:
2769  case TargetOpcode::G_LOAD:
2770  case TargetOpcode::G_STORE: {
2771  GLoadStore &LdSt = cast<GLoadStore>(I);
2772  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2773  LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2774 
2775  if (PtrTy != LLT::pointer(0, 64)) {
2776  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2777  << ", expected: " << LLT::pointer(0, 64) << '\n');
2778  return false;
2779  }
2780 
2781  uint64_t MemSizeInBytes = LdSt.getMemSize();
2782  unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2783  AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2784 
2785  // Need special instructions for atomics that affect ordering.
2786  if (Order != AtomicOrdering::NotAtomic &&
2787  Order != AtomicOrdering::Unordered &&
2788  Order != AtomicOrdering::Monotonic) {
2789  assert(!isa<GZExtLoad>(LdSt));
2790  if (MemSizeInBytes > 64)
2791  return false;
2792 
2793  if (isa<GLoad>(LdSt)) {
2794  static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2795  AArch64::LDARW, AArch64::LDARX};
2796  I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2797  } else {
2798  static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2799  AArch64::STLRW, AArch64::STLRX};
2800  Register ValReg = LdSt.getReg(0);
2801  if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2802  // Emit a subreg copy of 32 bits.
2803  Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2804  MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2805  .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2806  I.getOperand(0).setReg(NewVal);
2807  }
2808  I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2809  }
2811  return true;
2812  }
2813 
2814 #ifndef NDEBUG
2815  const Register PtrReg = LdSt.getPointerReg();
2816  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2817  // Check that the pointer register is valid.
2818  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2819  "Load/Store pointer operand isn't a GPR");
2820  assert(MRI.getType(PtrReg).isPointer() &&
2821  "Load/Store pointer operand isn't a pointer");
2822 #endif
2823 
2824  const Register ValReg = LdSt.getReg(0);
2825  const LLT ValTy = MRI.getType(ValReg);
2826  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2827 
2828  // The code below doesn't support truncating stores, so we need to split it
2829  // again.
2830  if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2831  unsigned SubReg;
2832  LLT MemTy = LdSt.getMMO().getMemoryType();
2833  auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2834  if (!getSubRegForClass(RC, TRI, SubReg))
2835  return false;
2836 
2837  // Generate a subreg copy.
2838  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2839  .addReg(ValReg, 0, SubReg)
2840  .getReg(0);
2841  RBI.constrainGenericRegister(Copy, *RC, MRI);
2842  LdSt.getOperand(0).setReg(Copy);
2843  } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2844  // If this is an any-extending load from the FPR bank, split it into a regular
2845  // load + extend.
2846  if (RB.getID() == AArch64::FPRRegBankID) {
2847  unsigned SubReg;
2848  LLT MemTy = LdSt.getMMO().getMemoryType();
2849  auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2850  if (!getSubRegForClass(RC, TRI, SubReg))
2851  return false;
2852  Register OldDst = LdSt.getReg(0);
2853  Register NewDst =
2855  LdSt.getOperand(0).setReg(NewDst);
2856  MRI.setRegBank(NewDst, RB);
2857  // Generate a SUBREG_TO_REG to extend it.
2858  MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2859  MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2860  .addImm(0)
2861  .addUse(NewDst)
2862  .addImm(SubReg);
2863  auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2864  RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2865  MIB.setInstr(LdSt);
2866  }
2867  }
2868 
2869  // Helper lambda for partially selecting I. Either returns the original
2870  // instruction with an updated opcode, or a new instruction.
2871  auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2872  bool IsStore = isa<GStore>(I);
2873  const unsigned NewOpc =
2874  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2875  if (NewOpc == I.getOpcode())
2876  return nullptr;
2877  // Check if we can fold anything into the addressing mode.
2878  auto AddrModeFns =
2879  selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2880  if (!AddrModeFns) {
2881  // Can't fold anything. Use the original instruction.
2882  I.setDesc(TII.get(NewOpc));
2883  I.addOperand(MachineOperand::CreateImm(0));
2884  return &I;
2885  }
2886 
2887  // Folded something. Create a new instruction and return it.
2888  auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2889  Register CurValReg = I.getOperand(0).getReg();
2890  IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2891  NewInst.cloneMemRefs(I);
2892  for (auto &Fn : *AddrModeFns)
2893  Fn(NewInst);
2894  I.eraseFromParent();
2895  return &*NewInst;
2896  };
2897 
2898  MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2899  if (!LoadStore)
2900  return false;
2901 
2902  // If we're storing a 0, use WZR/XZR.
2903  if (Opcode == TargetOpcode::G_STORE) {
2905  LoadStore->getOperand(0).getReg(), MRI);
2906  if (CVal && CVal->Value == 0) {
2907  switch (LoadStore->getOpcode()) {
2908  case AArch64::STRWui:
2909  case AArch64::STRHHui:
2910  case AArch64::STRBBui:
2911  LoadStore->getOperand(0).setReg(AArch64::WZR);
2912  break;
2913  case AArch64::STRXui:
2914  LoadStore->getOperand(0).setReg(AArch64::XZR);
2915  break;
2916  }
2917  }
2918  }
2919 
2920  if (IsZExtLoad) {
2921  // The zextload from a smaller type to i32 should be handled by the
2922  // importer.
2923  if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2924  return false;
2925  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2926  // and zero_extend with SUBREG_TO_REG.
2927  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2928  Register DstReg = LoadStore->getOperand(0).getReg();
2929  LoadStore->getOperand(0).setReg(LdReg);
2930 
2931  MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2932  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2933  .addImm(0)
2934  .addUse(LdReg)
2935  .addImm(AArch64::sub_32);
2937  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2938  MRI);
2939  }
2941  }
2942 
2943  case TargetOpcode::G_SMULH:
2944  case TargetOpcode::G_UMULH: {
2945  // Reject the various things we don't support yet.
2946  if (unsupportedBinOp(I, RBI, MRI, TRI))
2947  return false;
2948 
2949  const Register DefReg = I.getOperand(0).getReg();
2950  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2951 
2952  if (RB.getID() != AArch64::GPRRegBankID) {
2953  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2954  return false;
2955  }
2956 
2957  if (Ty != LLT::scalar(64)) {
2958  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2959  << ", expected: " << LLT::scalar(64) << '\n');
2960  return false;
2961  }
2962 
2963  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2964  : AArch64::UMULHrr;
2965  I.setDesc(TII.get(NewOpc));
2966 
2967  // Now that we selected an opcode, we need to constrain the register
2968  // operands to use appropriate classes.
2969  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2970  }
2971  case TargetOpcode::G_LSHR:
2972  case TargetOpcode::G_ASHR:
2973  if (MRI.getType(I.getOperand(0).getReg()).isVector())
2974  return selectVectorAshrLshr(I, MRI);
2976  case TargetOpcode::G_SHL:
2977  if (Opcode == TargetOpcode::G_SHL &&
2978  MRI.getType(I.getOperand(0).getReg()).isVector())
2979  return selectVectorSHL(I, MRI);
2980 
2981  // These shifts were legalized to have 64 bit shift amounts because we
2982  // want to take advantage of the selection patterns that assume the
2983  // immediates are s64s, however, selectBinaryOp will assume both operands
2984  // will have the same bit size.
2985  {
2986  Register SrcReg = I.getOperand(1).getReg();
2987  Register ShiftReg = I.getOperand(2).getReg();
2988  const LLT ShiftTy = MRI.getType(ShiftReg);
2989  const LLT SrcTy = MRI.getType(SrcReg);
2990  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
2991  ShiftTy.getSizeInBits() == 64) {
2992  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
2993  // Insert a subregister copy to implement a 64->32 trunc
2994  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
2995  .addReg(ShiftReg, 0, AArch64::sub_32);
2996  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2997  I.getOperand(2).setReg(Trunc.getReg(0));
2998  }
2999  }
3001  case TargetOpcode::G_OR: {
3002  // Reject the various things we don't support yet.
3003  if (unsupportedBinOp(I, RBI, MRI, TRI))
3004  return false;
3005 
3006  const unsigned OpSize = Ty.getSizeInBits();
3007 
3008  const Register DefReg = I.getOperand(0).getReg();
3009  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3010 
3011  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3012  if (NewOpc == I.getOpcode())
3013  return false;
3014 
3015  I.setDesc(TII.get(NewOpc));
3016  // FIXME: Should the type be always reset in setDesc?
3017 
3018  // Now that we selected an opcode, we need to constrain the register
3019  // operands to use appropriate classes.
3020  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3021  }
3022 
3023  case TargetOpcode::G_PTR_ADD: {
3024  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3025  I.eraseFromParent();
3026  return true;
3027  }
3028  case TargetOpcode::G_SADDO:
3029  case TargetOpcode::G_UADDO:
3030  case TargetOpcode::G_SSUBO:
3031  case TargetOpcode::G_USUBO: {
3032  // Emit the operation and get the correct condition code.
3033  auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3034  I.getOperand(2), I.getOperand(3), MIB);
3035 
3036  // Now, put the overflow result in the register given by the first operand
3037  // to the overflow op. CSINC increments the result when the predicate is
3038  // false, so to get the increment when it's true, we need to use the
3039  // inverse. In this case, we want to increment when carry is set.
3040  Register ZReg = AArch64::WZR;
3041  emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3042  getInvertedCondCode(OpAndCC.second), MIB);
3043  I.eraseFromParent();
3044  return true;
3045  }
3046 
3047  case TargetOpcode::G_PTRMASK: {
3048  Register MaskReg = I.getOperand(2).getReg();
3049  Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3050  // TODO: Implement arbitrary cases
3051  if (!MaskVal || !isShiftedMask_64(*MaskVal))
3052  return false;
3053 
3054  uint64_t Mask = *MaskVal;
3055  I.setDesc(TII.get(AArch64::ANDXri));
3056  I.getOperand(2).ChangeToImmediate(
3058 
3059  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3060  }
3061  case TargetOpcode::G_PTRTOINT:
3062  case TargetOpcode::G_TRUNC: {
3063  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3064  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3065 
3066  const Register DstReg = I.getOperand(0).getReg();
3067  const Register SrcReg = I.getOperand(1).getReg();
3068 
3069  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3070  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3071 
3072  if (DstRB.getID() != SrcRB.getID()) {
3073  LLVM_DEBUG(
3074  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3075  return false;
3076  }
3077 
3078  if (DstRB.getID() == AArch64::GPRRegBankID) {
3079  const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3080  if (!DstRC)
3081  return false;
3082 
3083  const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3084  if (!SrcRC)
3085  return false;
3086 
3087  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3088  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3089  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3090  return false;
3091  }
3092 
3093  if (DstRC == SrcRC) {
3094  // Nothing to be done
3095  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3096  SrcTy == LLT::scalar(64)) {
3097  llvm_unreachable("TableGen can import this case");
3098  return false;
3099  } else if (DstRC == &AArch64::GPR32RegClass &&
3100  SrcRC == &AArch64::GPR64RegClass) {
3101  I.getOperand(1).setSubReg(AArch64::sub_32);
3102  } else {
3103  LLVM_DEBUG(
3104  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3105  return false;
3106  }
3107 
3108  I.setDesc(TII.get(TargetOpcode::COPY));
3109  return true;
3110  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3111  if (DstTy == LLT::fixed_vector(4, 16) &&
3112  SrcTy == LLT::fixed_vector(4, 32)) {
3113  I.setDesc(TII.get(AArch64::XTNv4i16));
3115  return true;
3116  }
3117 
3118  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3119  MachineInstr *Extract = emitExtractVectorElt(
3120  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3121  if (!Extract)
3122  return false;
3123  I.eraseFromParent();
3124  return true;
3125  }
3126 
3127  // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3128  if (Opcode == TargetOpcode::G_PTRTOINT) {
3129  assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3130  I.setDesc(TII.get(TargetOpcode::COPY));
3131  return selectCopy(I, TII, MRI, TRI, RBI);
3132  }
3133  }
3134 
3135  return false;
3136  }
3137 
3138  case TargetOpcode::G_ANYEXT: {
3139  if (selectUSMovFromExtend(I, MRI))
3140  return true;
3141 
3142  const Register DstReg = I.getOperand(0).getReg();
3143  const Register SrcReg = I.getOperand(1).getReg();
3144 
3145  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3146  if (RBDst.getID() != AArch64::GPRRegBankID) {
3147  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3148  << ", expected: GPR\n");
3149  return false;
3150  }
3151 
3152  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3153  if (RBSrc.getID() != AArch64::GPRRegBankID) {
3154  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3155  << ", expected: GPR\n");
3156  return false;
3157  }
3158 
3159  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3160 
3161  if (DstSize == 0) {
3162  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3163  return false;
3164  }
3165 
3166  if (DstSize != 64 && DstSize > 32) {
3167  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3168  << ", expected: 32 or 64\n");
3169  return false;
3170  }
3171  // At this point G_ANYEXT is just like a plain COPY, but we need
3172  // to explicitly form the 64-bit value if any.
3173  if (DstSize > 32) {
3174  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3175  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3176  .addDef(ExtSrc)
3177  .addImm(0)
3178  .addUse(SrcReg)
3179  .addImm(AArch64::sub_32);
3180  I.getOperand(1).setReg(ExtSrc);
3181  }
3182  return selectCopy(I, TII, MRI, TRI, RBI);
3183  }
3184 
3185  case TargetOpcode::G_ZEXT:
3186  case TargetOpcode::G_SEXT_INREG:
3187  case TargetOpcode::G_SEXT: {
3188  if (selectUSMovFromExtend(I, MRI))
3189  return true;
3190 
3191  unsigned Opcode = I.getOpcode();
3192  const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3193  const Register DefReg = I.getOperand(0).getReg();
3194  Register SrcReg = I.getOperand(1).getReg();
3195  const LLT DstTy = MRI.getType(DefReg);
3196  const LLT SrcTy = MRI.getType(SrcReg);
3197  unsigned DstSize = DstTy.getSizeInBits();
3198  unsigned SrcSize = SrcTy.getSizeInBits();
3199 
3200  // SEXT_INREG has the same src reg size as dst, the size of the value to be
3201  // extended is encoded in the imm.
3202  if (Opcode == TargetOpcode::G_SEXT_INREG)
3203  SrcSize = I.getOperand(2).getImm();
3204 
3205  if (DstTy.isVector())
3206  return false; // Should be handled by imported patterns.
3207 
3208  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3209  AArch64::GPRRegBankID &&
3210  "Unexpected ext regbank");
3211 
3212  MachineInstr *ExtI;
3213 
3214  // First check if we're extending the result of a load which has a dest type
3215  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3216  // GPR register on AArch64 and all loads which are smaller automatically
3217  // zero-extend the upper bits. E.g.
3218  // %v(s8) = G_LOAD %p, :: (load 1)
3219  // %v2(s32) = G_ZEXT %v(s8)
3220  if (!IsSigned) {
3221  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3222  bool IsGPR =
3223  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3224  if (LoadMI && IsGPR) {
3225  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3226  unsigned BytesLoaded = MemOp->getSize();
3227  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3228  return selectCopy(I, TII, MRI, TRI, RBI);
3229  }
3230 
3231  // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3232  // + SUBREG_TO_REG.
3233  //
3234  // If we are zero extending from 32 bits to 64 bits, it's possible that
3235  // the instruction implicitly does the zero extend for us. In that case,
3236  // we only need the SUBREG_TO_REG.
3237  if (IsGPR && SrcSize == 32 && DstSize == 64) {
3238  // Unlike with the G_LOAD case, we don't want to look through copies
3239  // here. (See isDef32.)
3240  MachineInstr *Def = MRI.getVRegDef(SrcReg);
3241  Register SubregToRegSrc = SrcReg;
3242 
3243  // Does the instruction implicitly zero extend?
3244  if (!Def || !isDef32(*Def)) {
3245  // No. Zero out using an OR.
3246  Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3247  const Register ZReg = AArch64::WZR;
3248  MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3249  SubregToRegSrc = OrDst;
3250  }
3251 
3252  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3253  .addImm(0)
3254  .addUse(SubregToRegSrc)
3255  .addImm(AArch64::sub_32);
3256 
3257  if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3258  MRI)) {
3259  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3260  return false;
3261  }
3262 
3263  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3264  MRI)) {
3265  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3266  return false;
3267  }
3268 
3269  I.eraseFromParent();
3270  return true;
3271  }
3272  }
3273 
3274  if (DstSize == 64) {
3275  if (Opcode != TargetOpcode::G_SEXT_INREG) {
3276  // FIXME: Can we avoid manually doing this?
3277  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3278  MRI)) {
3279  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3280  << " operand\n");
3281  return false;
3282  }
3283  SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3284  {&AArch64::GPR64RegClass}, {})
3285  .addImm(0)
3286  .addUse(SrcReg)
3287  .addImm(AArch64::sub_32)
3288  .getReg(0);
3289  }
3290 
3291  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3292  {DefReg}, {SrcReg})
3293  .addImm(0)
3294  .addImm(SrcSize - 1);
3295  } else if (DstSize <= 32) {
3296  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3297  {DefReg}, {SrcReg})
3298  .addImm(0)
3299  .addImm(SrcSize - 1);
3300  } else {
3301  return false;
3302  }
3303 
3305  I.eraseFromParent();
3306  return true;
3307  }
3308 
3309  case TargetOpcode::G_SITOFP:
3310  case TargetOpcode::G_UITOFP:
3311  case TargetOpcode::G_FPTOSI:
3312  case TargetOpcode::G_FPTOUI: {
3313  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3314  SrcTy = MRI.getType(I.getOperand(1).getReg());
3315  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3316  if (NewOpc == Opcode)
3317  return false;
3318 
3319  I.setDesc(TII.get(NewOpc));
3321  I.setFlags(MachineInstr::NoFPExcept);
3322 
3323  return true;
3324  }
3325 
3326  case TargetOpcode::G_FREEZE:
3327  return selectCopy(I, TII, MRI, TRI, RBI);
3328 
3329  case TargetOpcode::G_INTTOPTR:
3330  // The importer is currently unable to import pointer types since they
3331  // didn't exist in SelectionDAG.
3332  return selectCopy(I, TII, MRI, TRI, RBI);
3333 
3334  case TargetOpcode::G_BITCAST:
3335  // Imported SelectionDAG rules can handle every bitcast except those that
3336  // bitcast from a type to the same type. Ideally, these shouldn't occur
3337  // but we might not run an optimizer that deletes them. The other exception
3338  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3339  // of them.
3340  return selectCopy(I, TII, MRI, TRI, RBI);
3341 
3342  case TargetOpcode::G_SELECT: {
3343  auto &Sel = cast<GSelect>(I);
3344  if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
3345  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3346  << ", expected: " << LLT::scalar(1) << '\n');
3347  return false;
3348  }
3349 
3350  const Register CondReg = Sel.getCondReg();
3351  const Register TReg = Sel.getTrueReg();
3352  const Register FReg = Sel.getFalseReg();
3353 
3354  if (tryOptSelect(Sel))
3355  return true;
3356 
3357  // Make sure to use an unused vreg instead of wzr, so that the peephole
3358  // optimizations will be able to optimize these.
3359  Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3360  auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3361  .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3362  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3363  if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3364  return false;
3365  Sel.eraseFromParent();
3366  return true;
3367  }
3368  case TargetOpcode::G_ICMP: {
3369  if (Ty.isVector())
3370  return selectVectorICmp(I, MRI);
3371 
3372  if (Ty != LLT::scalar(32)) {
3373  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3374  << ", expected: " << LLT::scalar(32) << '\n');
3375  return false;
3376  }
3377 
3378  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3379  const AArch64CC::CondCode InvCC =
3380  changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3381  emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3382  emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3383  /*Src2=*/AArch64::WZR, InvCC, MIB);
3384  I.eraseFromParent();
3385  return true;
3386  }
3387 
3388  case TargetOpcode::G_FCMP: {
3389  CmpInst::Predicate Pred =
3390  static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3391  if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3392  Pred) ||
3393  !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3394  return false;
3395  I.eraseFromParent();
3396  return true;
3397  }
3398  case TargetOpcode::G_VASTART:
3399  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3400  : selectVaStartAAPCS(I, MF, MRI);
3401  case TargetOpcode::G_INTRINSIC:
3402  return selectIntrinsic(I, MRI);
3403  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3404  return selectIntrinsicWithSideEffects(I, MRI);
3405  case TargetOpcode::G_IMPLICIT_DEF: {
3406  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3407  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3408  const Register DstReg = I.getOperand(0).getReg();
3409  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3410  const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3411  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3412  return true;
3413  }
3414  case TargetOpcode::G_BLOCK_ADDR: {
3415  if (TM.getCodeModel() == CodeModel::Large) {
3416  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3417  I.eraseFromParent();
3418  return true;
3419  } else {
3420  I.setDesc(TII.get(AArch64::MOVaddrBA));
3421  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3422  I.getOperand(0).getReg())
3423  .addBlockAddress(I.getOperand(1).getBlockAddress(),
3424  /* Offset */ 0, AArch64II::MO_PAGE)
3425  .addBlockAddress(
3426  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3428  I.eraseFromParent();
3429  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3430  }
3431  }
3432  case AArch64::G_DUP: {
3433  // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3434  // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3435  // difficult because at RBS we may end up pessimizing the fpr case if we
3436  // decided to add an anyextend to fix this. Manual selection is the most
3437  // robust solution for now.
3438  if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3439  AArch64::GPRRegBankID)
3440  return false; // We expect the fpr regbank case to be imported.
3441  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3442  if (VecTy == LLT::fixed_vector(8, 8))
3443  I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3444  else if (VecTy == LLT::fixed_vector(16, 8))
3445  I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3446  else if (VecTy == LLT::fixed_vector(4, 16))
3447  I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3448  else if (VecTy == LLT::fixed_vector(8, 16))
3449  I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3450  else
3451  return false;
3452  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3453  }
3454  case TargetOpcode::G_INTRINSIC_TRUNC:
3455  return selectIntrinsicTrunc(I, MRI);
3456  case TargetOpcode::G_INTRINSIC_ROUND:
3457  return selectIntrinsicRound(I, MRI);
3458  case TargetOpcode::G_BUILD_VECTOR:
3459  return selectBuildVector(I, MRI);
3460  case TargetOpcode::G_MERGE_VALUES:
3461  return selectMergeValues(I, MRI);
3462  case TargetOpcode::G_UNMERGE_VALUES:
3463  return selectUnmergeValues(I, MRI);
3464  case TargetOpcode::G_SHUFFLE_VECTOR:
3465  return selectShuffleVector(I, MRI);
3466  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3467  return selectExtractElt(I, MRI);
3468  case TargetOpcode::G_INSERT_VECTOR_ELT:
3469  return selectInsertElt(I, MRI);
3470  case TargetOpcode::G_CONCAT_VECTORS:
3471  return selectConcatVectors(I, MRI);
3472  case TargetOpcode::G_JUMP_TABLE:
3473  return selectJumpTable(I, MRI);
3474  case TargetOpcode::G_VECREDUCE_FADD:
3475  case TargetOpcode::G_VECREDUCE_ADD:
3476  return selectReduction(I, MRI);
3477  case TargetOpcode::G_MEMCPY:
3478  case TargetOpcode::G_MEMCPY_INLINE:
3479  case TargetOpcode::G_MEMMOVE:
3480  case TargetOpcode::G_MEMSET:
3481  assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3482  return selectMOPS(I, MRI);
3483  }
3484 
3485  return false;
3486 }
3487 
3488 bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3490  Register VecReg = I.getOperand(1).getReg();
3491  LLT VecTy = MRI.getType(VecReg);
3492  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3493  // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3494  // a subregister copy afterwards.
3495  if (VecTy == LLT::fixed_vector(2, 32)) {
3496  Register DstReg = I.getOperand(0).getReg();
3497  auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3498  {VecReg, VecReg});
3499  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3500  .addReg(AddP.getReg(0), 0, AArch64::ssub)
3501  .getReg(0);
3502  RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3503  I.eraseFromParent();
3504  return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3505  }
3506 
3507  unsigned Opc = 0;
3508  if (VecTy == LLT::fixed_vector(16, 8))
3509  Opc = AArch64::ADDVv16i8v;
3510  else if (VecTy == LLT::fixed_vector(8, 16))
3511  Opc = AArch64::ADDVv8i16v;
3512  else if (VecTy == LLT::fixed_vector(4, 32))
3513  Opc = AArch64::ADDVv4i32v;
3514  else if (VecTy == LLT::fixed_vector(2, 64))
3515  Opc = AArch64::ADDPv2i64p;
3516  else {
3517  LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3518  return false;
3519  }
3520  I.setDesc(TII.get(Opc));
3521  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3522  }
3523 
3524  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3525  unsigned Opc = 0;
3526  if (VecTy == LLT::fixed_vector(2, 32))
3527  Opc = AArch64::FADDPv2i32p;
3528  else if (VecTy == LLT::fixed_vector(2, 64))
3529  Opc = AArch64::FADDPv2i64p;
3530  else {
3531  LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3532  return false;
3533  }
3534  I.setDesc(TII.get(Opc));
3535  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3536  }
3537  return false;
3538 }
3539 
3540 bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3542  unsigned Mopcode;
3543  switch (GI.getOpcode()) {
3544  case TargetOpcode::G_MEMCPY:
3545  case TargetOpcode::G_MEMCPY_INLINE:
3546  Mopcode = AArch64::MOPSMemoryCopyPseudo;
3547  break;
3548  case TargetOpcode::G_MEMMOVE:
3549  Mopcode = AArch64::MOPSMemoryMovePseudo;
3550  break;
3551  case TargetOpcode::G_MEMSET:
3552  // For tagged memset see llvm.aarch64.mops.memset.tag
3553  Mopcode = AArch64::MOPSMemorySetPseudo;
3554  break;
3555  }
3556 
3557  auto &DstPtr = GI.getOperand(0);
3558  auto &SrcOrVal = GI.getOperand(1);
3559  auto &Size = GI.getOperand(2);
3560 
3561  // Create copies of the registers that can be clobbered.
3562  const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3563  const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3564  const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3565 
3566  const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3567  const auto &SrcValRegClass =
3568  IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3569 
3570  // Constrain to specific registers
3571  RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3572  RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3573  RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3574 
3575  MIB.buildCopy(DstPtrCopy, DstPtr);
3576  MIB.buildCopy(SrcValCopy, SrcOrVal);
3577  MIB.buildCopy(SizeCopy, Size);
3578 
3579  // New instruction uses the copied registers because it must update them.
3580  // The defs are not used since they don't exist in G_MEM*. They are still
3581  // tied.
3582  // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3583  Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3584  Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3585  if (IsSet) {
3586  MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3587  {DstPtrCopy, SizeCopy, SrcValCopy});
3588  } else {
3589  Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3590  MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3591  {DstPtrCopy, SrcValCopy, SizeCopy});
3592  }
3593 
3594  GI.eraseFromParent();
3595  return true;
3596 }
3597 
3598 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3600  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3601  Register JTAddr = I.getOperand(0).getReg();
3602  unsigned JTI = I.getOperand(1).getIndex();
3603  Register Index = I.getOperand(2).getReg();
3604 
3605  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3606  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3607 
3608  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3609  auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3610  {TargetReg, ScratchReg}, {JTAddr, Index})
3611  .addJumpTableIndex(JTI);
3612  // Build the indirect branch.
3613  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3614  I.eraseFromParent();
3615  return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3616 }
3617 
3618 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3620  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3621  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3622 
3623  Register DstReg = I.getOperand(0).getReg();
3624  unsigned JTI = I.getOperand(1).getIndex();
3625  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3626  auto MovMI =
3627  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3628  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3630  I.eraseFromParent();
3631  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3632 }
3633 
3634 bool AArch64InstructionSelector::selectTLSGlobalValue(
3636  if (!STI.isTargetMachO())
3637  return false;
3638  MachineFunction &MF = *I.getParent()->getParent();
3639  MF.getFrameInfo().setAdjustsStack(true);
3640 
3641  const auto &GlobalOp = I.getOperand(1);
3642  assert(GlobalOp.getOffset() == 0 &&
3643  "Shouldn't have an offset on TLS globals!");
3644  const GlobalValue &GV = *GlobalOp.getGlobal();
3645 
3646  auto LoadGOT =
3647  MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3648  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3649 
3650  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3651  {LoadGOT.getReg(0)})
3652  .addImm(0);
3653 
3654  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3655  // TLS calls preserve all registers except those that absolutely must be
3656  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3657  // silly).
3658  MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3659  .addUse(AArch64::X0, RegState::Implicit)
3660  .addDef(AArch64::X0, RegState::Implicit)
3661  .addRegMask(TRI.getTLSCallPreservedMask());
3662 
3663  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3664  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3665  MRI);
3666  I.eraseFromParent();
3667  return true;
3668 }
3669 
3670 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3672  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3673 
3674  // Select the correct opcode.
3675  unsigned Opc = 0;
3676  if (!SrcTy.isVector()) {
3677  switch (SrcTy.getSizeInBits()) {
3678  default:
3679  case 16:
3680  Opc = AArch64::FRINTZHr;
3681  break;
3682  case 32:
3683  Opc = AArch64::FRINTZSr;
3684  break;
3685  case 64:
3686  Opc = AArch64::FRINTZDr;
3687  break;
3688  }
3689  } else {
3690  unsigned NumElts = SrcTy.getNumElements();
3691  switch (SrcTy.getElementType().getSizeInBits()) {
3692  default:
3693  break;
3694  case 16:
3695  if (NumElts == 4)
3696  Opc = AArch64::FRINTZv4f16;
3697  else if (NumElts == 8)
3698  Opc = AArch64::FRINTZv8f16;
3699  break;
3700  case 32:
3701  if (NumElts == 2)
3702  Opc = AArch64::FRINTZv2f32;
3703  else if (NumElts == 4)
3704  Opc = AArch64::FRINTZv4f32;
3705  break;
3706  case 64:
3707  if (NumElts == 2)
3708  Opc = AArch64::FRINTZv2f64;
3709  break;
3710  }
3711  }
3712 
3713  if (!Opc) {
3714  // Didn't get an opcode above, bail.
3715  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3716  return false;
3717  }
3718 
3719  // Legalization would have set us up perfectly for this; we just need to
3720  // set the opcode and move on.
3721  I.setDesc(TII.get(Opc));
3722  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3723 }
3724 
3725 bool AArch64InstructionSelector::selectIntrinsicRound(
3727  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3728 
3729  // Select the correct opcode.
3730  unsigned Opc = 0;
3731  if (!SrcTy.isVector()) {
3732  switch (SrcTy.getSizeInBits()) {
3733  default:
3734  case 16:
3735  Opc = AArch64::FRINTAHr;
3736  break;
3737  case 32:
3738  Opc = AArch64::FRINTASr;
3739  break;
3740  case 64:
3741  Opc = AArch64::FRINTADr;
3742  break;
3743  }
3744  } else {
3745  unsigned NumElts = SrcTy.getNumElements();
3746  switch (SrcTy.getElementType().getSizeInBits()) {
3747  default:
3748  break;
3749  case 16:
3750  if (NumElts == 4)
3751  Opc = AArch64::FRINTAv4f16;
3752  else if (NumElts == 8)
3753  Opc = AArch64::FRINTAv8f16;
3754  break;
3755  case 32:
3756  if (NumElts == 2)
3757  Opc = AArch64::FRINTAv2f32;
3758  else if (NumElts == 4)
3759  Opc = AArch64::FRINTAv4f32;
3760  break;
3761  case 64:
3762  if (NumElts == 2)
3763  Opc = AArch64::FRINTAv2f64;
3764  break;
3765  }
3766  }
3767 
3768  if (!Opc) {
3769  // Didn't get an opcode above, bail.
3770  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3771  return false;
3772  }
3773 
3774  // Legalization would have set us up perfectly for this; we just need to
3775  // set the opcode and move on.
3776  I.setDesc(TII.get(Opc));
3777  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3778 }
3779 
3780 bool AArch64InstructionSelector::selectVectorICmp(
3782  Register DstReg = I.getOperand(0).getReg();
3783  LLT DstTy = MRI.getType(DstReg);
3784  Register SrcReg = I.getOperand(2).getReg();
3785  Register Src2Reg = I.getOperand(3).getReg();
3786  LLT SrcTy = MRI.getType(SrcReg);
3787 
3788  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3789  unsigned NumElts = DstTy.getNumElements();
3790 
3791  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3792  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3793  // Third index is cc opcode:
3794  // 0 == eq
3795  // 1 == ugt
3796  // 2 == uge
3797  // 3 == ult
3798  // 4 == ule
3799  // 5 == sgt
3800  // 6 == sge
3801  // 7 == slt
3802  // 8 == sle
3803  // ne is done by negating 'eq' result.
3804 
3805  // This table below assumes that for some comparisons the operands will be
3806  // commuted.
3807  // ult op == commute + ugt op
3808  // ule op == commute + uge op
3809  // slt op == commute + sgt op
3810  // sle op == commute + sge op
3811  unsigned PredIdx = 0;
3812  bool SwapOperands = false;
3813  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3814  switch (Pred) {
3815  case CmpInst::ICMP_NE:
3816  case CmpInst::ICMP_EQ:
3817  PredIdx = 0;
3818  break;
3819  case CmpInst::ICMP_UGT:
3820  PredIdx = 1;
3821  break;
3822  case CmpInst::ICMP_UGE:
3823  PredIdx = 2;
3824  break;
3825  case CmpInst::ICMP_ULT:
3826  PredIdx = 3;
3827  SwapOperands = true;
3828  break;
3829  case CmpInst::ICMP_ULE:
3830  PredIdx = 4;
3831  SwapOperands = true;
3832  break;
3833  case CmpInst::ICMP_SGT:
3834  PredIdx = 5;
3835  break;
3836  case CmpInst::ICMP_SGE:
3837  PredIdx = 6;
3838  break;
3839  case CmpInst::ICMP_SLT:
3840  PredIdx = 7;
3841  SwapOperands = true;
3842  break;
3843  case CmpInst::ICMP_SLE:
3844  PredIdx = 8;
3845  SwapOperands = true;
3846  break;
3847  default:
3848  llvm_unreachable("Unhandled icmp predicate");
3849  return false;
3850  }
3851 
3852  // This table obviously should be tablegen'd when we have our GISel native
3853  // tablegen selector.
3854 
3855  static const unsigned OpcTable[4][4][9] = {
3856  {
3857  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3858  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3859  0 /* invalid */},
3860  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3861  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3862  0 /* invalid */},
3863  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3864  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3865  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3866  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3867  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3868  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3869  },
3870  {
3871  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3872  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3873  0 /* invalid */},
3874  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3875  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3876  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3877  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3878  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3879  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3880  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3881  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3882  0 /* invalid */}
3883  },
3884  {
3885  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3886  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3887  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3888  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3889  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3890  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3891  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3892  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3893  0 /* invalid */},
3894  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3895  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3896  0 /* invalid */}
3897  },
3898  {
3899  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3900  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3901  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3902  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3903  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3904  0 /* invalid */},
3905  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3906  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3907  0 /* invalid */},
3908  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3909  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3910  0 /* invalid */}
3911  },
3912  };
3913  unsigned EltIdx = Log2_32(SrcEltSize / 8);
3914  unsigned NumEltsIdx = Log2_32(NumElts / 2);
3915  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3916  if (!Opc) {
3917  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3918  return false;
3919  }
3920 
3921  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3922  const TargetRegisterClass *SrcRC =
3923  getRegClassForTypeOnBank(SrcTy, VecRB, true);
3924  if (!SrcRC) {
3925  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3926  return false;
3927  }
3928 
3929  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3930  if (SrcTy.getSizeInBits() == 128)
3931  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3932 
3933  if (SwapOperands)
3934  std::swap(SrcReg, Src2Reg);
3935 
3936  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3938 
3939  // Invert if we had a 'ne' cc.
3940  if (NotOpc) {
3941  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3943  } else {
3944  MIB.buildCopy(DstReg, Cmp.getReg(0));
3945  }
3946  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3947  I.eraseFromParent();
3948  return true;
3949 }
3950 
3951 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3952  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3953  MachineIRBuilder &MIRBuilder) const {
3954  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3955 
3956  auto BuildFn = [&](unsigned SubregIndex) {
3957  auto Ins =
3958  MIRBuilder
3959  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3960  .addImm(SubregIndex);
3963  return &*Ins;
3964  };
3965 
3966  switch (EltSize) {
3967  case 16:
3968  return BuildFn(AArch64::hsub);
3969  case 32:
3970  return BuildFn(AArch64::ssub);
3971  case 64:
3972  return BuildFn(AArch64::dsub);
3973  default:
3974  return nullptr;
3975  }
3976 }
3977 
3980  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3981  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3982  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3983  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3984  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3985 
3986  if (I.getNumOperands() != 3)
3987  return false;
3988 
3989  // Merging 2 s64s into an s128.
3990  if (DstTy == LLT::scalar(128)) {
3991  if (SrcTy.getSizeInBits() != 64)
3992  return false;
3993  Register DstReg = I.getOperand(0).getReg();
3994  Register Src1Reg = I.getOperand(1).getReg();
3995  Register Src2Reg = I.getOperand(2).getReg();
3996  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3997  MachineInstr *InsMI =
3998  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3999  if (!InsMI)
4000  return false;
4001  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4002  Src2Reg, /* LaneIdx */ 1, RB, MIB);
4003  if (!Ins2MI)
4004  return false;
4005  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4006  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4007  I.eraseFromParent();
4008  return true;
4009  }
4010 
4011  if (RB.getID() != AArch64::GPRRegBankID)
4012  return false;
4013 
4014  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4015  return false;
4016 
4017  auto *DstRC = &AArch64::GPR64RegClass;
4018  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4019  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4020  TII.get(TargetOpcode::SUBREG_TO_REG))
4021  .addDef(SubToRegDef)
4022  .addImm(0)
4023  .addUse(I.getOperand(1).getReg())
4024  .addImm(AArch64::sub_32);
4025  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4026  // Need to anyext the second scalar before we can use bfm
4027  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4028  TII.get(TargetOpcode::SUBREG_TO_REG))
4029  .addDef(SubToRegDef2)
4030  .addImm(0)
4031  .addUse(I.getOperand(2).getReg())
4032  .addImm(AArch64::sub_32);
4033  MachineInstr &BFM =
4034  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4035  .addDef(I.getOperand(0).getReg())
4036  .addUse(SubToRegDef)
4037  .addUse(SubToRegDef2)
4038  .addImm(32)
4039  .addImm(31);
4040  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4041  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4043  I.eraseFromParent();
4044  return true;
4045 }
4046 
4047 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4048  const unsigned EltSize) {
4049  // Choose a lane copy opcode and subregister based off of the size of the
4050  // vector's elements.
4051  switch (EltSize) {
4052  case 8:
4053  CopyOpc = AArch64::DUPi8;
4054  ExtractSubReg = AArch64::bsub;
4055  break;
4056  case 16:
4057  CopyOpc = AArch64::DUPi16;
4058  ExtractSubReg = AArch64::hsub;
4059  break;
4060  case 32:
4061  CopyOpc = AArch64::DUPi32;
4062  ExtractSubReg = AArch64::ssub;
4063  break;
4064  case 64:
4065  CopyOpc = AArch64::DUPi64;
4066  ExtractSubReg = AArch64::dsub;
4067  break;
4068  default:
4069  // Unknown size, bail out.
4070  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
4071  return false;
4072  }
4073  return true;
4074 }
4075 
4076 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4077  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4078  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4079  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4080  unsigned CopyOpc = 0;
4081  unsigned ExtractSubReg = 0;
4082  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4083  LLVM_DEBUG(
4084  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4085  return nullptr;
4086  }
4087 
4088  const TargetRegisterClass *DstRC =
4089  getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4090  if (!DstRC) {
4091  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4092  return nullptr;
4093  }
4094 
4095  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4096  const LLT &VecTy = MRI.getType(VecReg);
4097  const TargetRegisterClass *VecRC =
4098  getRegClassForTypeOnBank(VecTy, VecRB, true);
4099  if (!VecRC) {
4100  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4101  return nullptr;
4102  }
4103 
4104  // The register that we're going to copy into.
4105  Register InsertReg = VecReg;
4106  if (!DstReg)
4107  DstReg = MRI.createVirtualRegister(DstRC);
4108  // If the lane index is 0, we just use a subregister COPY.
4109  if (LaneIdx == 0) {
4110  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4111  .addReg(VecReg, 0, ExtractSubReg);
4112  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4113  return &*Copy;
4114  }
4115 
4116  // Lane copies require 128-bit wide registers. If we're dealing with an
4117  // unpacked vector, then we need to move up to that width. Insert an implicit
4118  // def and a subregister insert to get us there.
4119  if (VecTy.getSizeInBits() != 128) {
4120  MachineInstr *ScalarToVector = emitScalarToVector(
4121  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4122  if (!ScalarToVector)
4123  return nullptr;
4124  InsertReg = ScalarToVector->getOperand(0).getReg();
4125  }
4126 
4127  MachineInstr *LaneCopyMI =
4128  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4129  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4130 
4131  // Make sure that we actually constrain the initial copy.
4132  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4133  return LaneCopyMI;
4134 }
4135 
4136 bool AArch64InstructionSelector::selectExtractElt(
4138  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4139  "unexpected opcode!");
4140  Register DstReg = I.getOperand(0).getReg();
4141  const LLT NarrowTy = MRI.getType(DstReg);
4142  const Register SrcReg = I.getOperand(1).getReg();
4143  const LLT WideTy = MRI.getType(SrcReg);
4144  (void)WideTy;
4145  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4146  "source register size too small!");
4147  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4148 
4149  // Need the lane index to determine the correct copy opcode.
4150  MachineOperand &LaneIdxOp = I.getOperand(2);
4151  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4152 
4153  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4154  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4155  return false;
4156  }
4157 
4158  // Find the index to extract from.
4159  auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4160  if (!VRegAndVal)
4161  return false;
4162  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4163 
4164 
4165  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4166  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4167  LaneIdx, MIB);
4168  if (!Extract)
4169  return false;
4170 
4171  I.eraseFromParent();
4172  return true;
4173 }
4174 
4175 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4177  unsigned NumElts = I.getNumOperands() - 1;
4178  Register SrcReg = I.getOperand(NumElts).getReg();
4179  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4180  const LLT SrcTy = MRI.getType(SrcReg);
4181 
4182  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4183  if (SrcTy.getSizeInBits() > 128) {
4184  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4185  return false;
4186  }
4187 
4188  // We implement a split vector operation by treating the sub-vectors as
4189  // scalars and extracting them.
4190  const RegisterBank &DstRB =
4191  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4192  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4193  Register Dst = I.getOperand(OpIdx).getReg();
4194  MachineInstr *Extract =
4195  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4196  if (!Extract)
4197  return false;
4198  }
4199  I.eraseFromParent();
4200  return true;
4201 }
4202 
4205  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4206  "unexpected opcode");
4207 
4208  // TODO: Handle unmerging into GPRs and from scalars to scalars.
4209  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4210  AArch64::FPRRegBankID ||
4211  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4212  AArch64::FPRRegBankID) {
4213  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4214  "currently unsupported.\n");
4215  return false;
4216  }
4217 
4218  // The last operand is the vector source register, and every other operand is
4219  // a register to unpack into.
4220  unsigned NumElts = I.getNumOperands() - 1;
4221  Register SrcReg = I.getOperand(NumElts).getReg();
4222  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4223  const LLT WideTy = MRI.getType(SrcReg);
4224  (void)WideTy;
4225  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4226  "can only unmerge from vector or s128 types!");
4227  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4228  "source register size too small!");
4229 
4230  if (!NarrowTy.isScalar())
4231  return selectSplitVectorUnmerge(I, MRI);
4232 
4233  // Choose a lane copy opcode and subregister based off of the size of the
4234  // vector's elements.
4235  unsigned CopyOpc = 0;
4236  unsigned ExtractSubReg = 0;
4237  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4238  return false;
4239 
4240  // Set up for the lane copies.
4241  MachineBasicBlock &MBB = *I.getParent();
4242 
4243  // Stores the registers we'll be copying from.
4244  SmallVector<Register, 4> InsertRegs;
4245 
4246  // We'll use the first register twice, so we only need NumElts-1 registers.
4247  unsigned NumInsertRegs = NumElts - 1;
4248 
4249  // If our elements fit into exactly 128 bits, then we can copy from the source
4250  // directly. Otherwise, we need to do a bit of setup with some subregister
4251  // inserts.
4252  if (NarrowTy.getSizeInBits() * NumElts == 128) {
4253  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4254  } else {
4255  // No. We have to perform subregister inserts. For each insert, create an
4256  // implicit def and a subregister insert, and save the register we create.
4257  const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4258  LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4259  *RBI.getRegBank(SrcReg, MRI, TRI));
4260  unsigned SubReg = 0;
4261  bool Found = getSubRegForClass(RC, TRI, SubReg);
4262  (void)Found;
4263  assert(Found && "expected to find last operand's subeg idx");
4264  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4265  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4266  MachineInstr &ImpDefMI =
4267  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4268  ImpDefReg);
4269 
4270  // Now, create the subregister insert from SrcReg.
4271  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4272  MachineInstr &InsMI =
4273  *BuildMI(MBB, I, I.getDebugLoc(),
4274  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4275  .addUse(ImpDefReg)
4276  .addUse(SrcReg)
4277  .addImm(SubReg);
4278 
4279  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4281 
4282  // Save the register so that we can copy from it after.
4283  InsertRegs.push_back(InsertReg);
4284  }
4285  }
4286 
4287  // Now that we've created any necessary subregister inserts, we can
4288  // create the copies.
4289  //
4290  // Perform the first copy separately as a subregister copy.
4291  Register CopyTo = I.getOperand(0).getReg();
4292  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4293  .addReg(InsertRegs[0], 0, ExtractSubReg);
4294  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4295 
4296  // Now, perform the remaining copies as vector lane copies.
4297  unsigned LaneIdx = 1;
4298  for (Register InsReg : InsertRegs) {
4299  Register CopyTo = I.getOperand(LaneIdx).getReg();
4300  MachineInstr &CopyInst =
4301  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4302  .addUse(InsReg)
4303  .addImm(LaneIdx);
4304  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4305  ++LaneIdx;
4306  }
4307 
4308  // Separately constrain the first copy's destination. Because of the
4309  // limitation in constrainOperandRegClass, we can't guarantee that this will
4310  // actually be constrained. So, do it ourselves using the second operand.
4311  const TargetRegisterClass *RC =
4312  MRI.getRegClassOrNull(I.getOperand(1).getReg());
4313  if (!RC) {
4314  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4315  return false;
4316  }
4317 
4318  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4319  I.eraseFromParent();
4320  return true;
4321 }
4322 
4323 bool AArch64InstructionSelector::selectConcatVectors(
4325  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4326  "Unexpected opcode");
4327  Register Dst = I.getOperand(0).getReg();
4328  Register Op1 = I.getOperand(1).getReg();
4329  Register Op2 = I.getOperand(2).getReg();
4330  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4331  if (!ConcatMI)
4332  return false;
4333  I.eraseFromParent();
4334  return true;
4335 }
4336 
4337 unsigned
4338 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4339  MachineFunction &MF) const {
4340  Type *CPTy = CPVal->getType();
4342 
4344  return MCP->getConstantPoolIndex(CPVal, Alignment);
4345 }
4346 
4347 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4348  const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4349  auto &MF = MIRBuilder.getMF();
4350  unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4351 
4352  auto Adrp =
4353  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4354  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4355 
4356  MachineInstr *LoadMI = nullptr;
4357  MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4358  unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4359  switch (Size) {
4360  case 16:
4361  LoadMI =
4362  &*MIRBuilder
4363  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4364  .addConstantPoolIndex(CPIdx, 0,
4366  break;
4367  case 8:
4368  LoadMI =
4369  &*MIRBuilder
4370  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4371  .addConstantPoolIndex(CPIdx, 0,
4373  break;
4374  case 4:
4375  LoadMI =
4376  &*MIRBuilder
4377  .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4378  .addConstantPoolIndex(CPIdx, 0,
4380  break;
4381  case 2:
4382  LoadMI =
4383  &*MIRBuilder
4384  .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4385  .addConstantPoolIndex(CPIdx, 0,
4387  break;
4388  default:
4389  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4390  << *CPVal->getType());
4391  return nullptr;
4392  }
4393  LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4394  MachineMemOperand::MOLoad,
4395  Size, Align(Size)));
4397  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4398  return LoadMI;
4399 }
4400 
4401 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4402 /// size and RB.
4403 static std::pair<unsigned, unsigned>
4404 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4405  unsigned Opc, SubregIdx;
4406  if (RB.getID() == AArch64::GPRRegBankID) {
4407  if (EltSize == 16) {
4408  Opc = AArch64::INSvi16gpr;
4409  SubregIdx = AArch64::ssub;
4410  } else if (EltSize == 32) {
4411  Opc = AArch64::INSvi32gpr;
4412  SubregIdx = AArch64::ssub;
4413  } else if (EltSize == 64) {
4414  Opc = AArch64::INSvi64gpr;
4415  SubregIdx = AArch64::dsub;
4416  } else {
4417  llvm_unreachable("invalid elt size!");
4418  }
4419  } else {
4420  if (EltSize == 8) {
4421  Opc = AArch64::INSvi8lane;
4422  SubregIdx = AArch64::bsub;
4423  } else if (EltSize == 16) {
4424  Opc = AArch64::INSvi16lane;
4425  SubregIdx = AArch64::hsub;
4426  } else if (EltSize == 32) {
4427  Opc = AArch64::INSvi32lane;
4428  SubregIdx = AArch64::ssub;
4429  } else if (EltSize == 64) {
4430  Opc = AArch64::INSvi64lane;
4431  SubregIdx = AArch64::dsub;
4432  } else {
4433  llvm_unreachable("invalid elt size!");
4434  }
4435  }
4436  return std::make_pair(Opc, SubregIdx);
4437 }
4438 
4439 MachineInstr *AArch64InstructionSelector::emitInstr(
4440  unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4441  std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4442  const ComplexRendererFns &RenderFns) const {
4443  assert(Opcode && "Expected an opcode?");
4444  assert(!isPreISelGenericOpcode(Opcode) &&
4445  "Function should only be used to produce selected instructions!");
4446  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4447  if (RenderFns)
4448  for (auto &Fn : *RenderFns)
4449  Fn(MI);
4451  return &*MI;
4452 }
4453 
4454 MachineInstr *AArch64InstructionSelector::emitAddSub(
4455  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4457  MachineIRBuilder &MIRBuilder) const {
4458  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4459  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4460  auto Ty = MRI.getType(LHS.getReg());
4461  assert(!Ty.isVector() && "Expected a scalar or pointer?");
4462  unsigned Size = Ty.getSizeInBits();
4463  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4464  bool Is32Bit = Size == 32;
4465 
4466  // INSTRri form with positive arithmetic immediate.
4467  if (auto Fns = selectArithImmed(RHS))
4468  return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4469  MIRBuilder, Fns);
4470 
4471  // INSTRri form with negative arithmetic immediate.
4472  if (auto Fns = selectNegArithImmed(RHS))
4473  return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4474  MIRBuilder, Fns);
4475 
4476  // INSTRrx form.
4477  if (auto Fns = selectArithExtendedRegister(RHS))
4478  return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4479  MIRBuilder, Fns);
4480 
4481  // INSTRrs form.
4482  if (auto Fns = selectShiftedRegister(RHS))
4483  return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4484  MIRBuilder, Fns);
4485  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4486  MIRBuilder);
4487 }
4488 
4489 MachineInstr *
4490 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4492  MachineIRBuilder &MIRBuilder) const {
4493  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4494  {{AArch64::ADDXri, AArch64::ADDWri},
4495  {AArch64::ADDXrs, AArch64::ADDWrs},
4496  {AArch64::ADDXrr, AArch64::ADDWrr},
4497  {AArch64::SUBXri, AArch64::SUBWri},
4498  {AArch64::ADDXrx, AArch64::ADDWrx}}};
4499  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4500 }
4501 
4502 MachineInstr *
4503 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4505  MachineIRBuilder &MIRBuilder) const {
4506  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4507  {{AArch64::ADDSXri, AArch64::ADDSWri},
4508  {AArch64::ADDSXrs, AArch64::ADDSWrs},
4509  {AArch64::ADDSXrr, AArch64::ADDSWrr},
4510  {AArch64::SUBSXri, AArch64::SUBSWri},
4511  {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4512  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4513 }
4514 
4515 MachineInstr *
4516 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4518  MachineIRBuilder &MIRBuilder) const {
4519  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4520  {{AArch64::SUBSXri, AArch64::SUBSWri},
4521  {AArch64::SUBSXrs, AArch64::SUBSWrs},
4522  {AArch64::SUBSXrr, AArch64::SUBSWrr},
4523  {AArch64::ADDSXri, AArch64::ADDSWri},
4524  {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4525  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4526 }
4527 
4528 MachineInstr *
4529 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4530  MachineIRBuilder &MIRBuilder) const {
4531  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4532  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4533  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4534  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4535 }
4536 
4537 MachineInstr *
4538 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4539  MachineIRBuilder &MIRBuilder) const {
4540  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4541  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4542  LLT Ty = MRI.getType(LHS.getReg());
4543  unsigned RegSize = Ty.getSizeInBits();
4544  bool Is32Bit = (RegSize == 32);
4545  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4546  {AArch64::ANDSXrs, AArch64::ANDSWrs},
4547  {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4548  // ANDS needs a logical immediate for its immediate form. Check if we can
4549  // fold one in.
4550  if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4551  int64_t Imm = ValAndVReg->Value.getSExtValue();
4552 
4554  auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4556  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4557  return &*TstMI;
4558  }
4559  }
4560 
4561  if (auto Fns = selectLogicalShiftedRegister(RHS))
4562  return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4563  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4564 }
4565 
4566 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4568  MachineIRBuilder &MIRBuilder) const {
4569  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4570  assert(Predicate.isPredicate() && "Expected predicate?");
4571  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4572  LLT CmpTy = MRI.getType(LHS.getReg());
4573  assert(!CmpTy.isVector() && "Expected scalar or pointer");
4574  unsigned Size = CmpTy.getSizeInBits();
4575  (void)Size;
4576  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4577  // Fold the compare into a cmn or tst if possible.
4578  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4579  return FoldCmp;
4580  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4581  return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4582 }
4583 
4584 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4585  Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4586  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4587 #ifndef NDEBUG
4588  LLT Ty = MRI.getType(Dst);
4589  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4590  "Expected a 32-bit scalar register?");
4591 #endif
4592  const Register ZReg = AArch64::WZR;
4593  AArch64CC::CondCode CC1, CC2;
4594  changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4595  auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4596  if (CC2 == AArch64CC::AL)
4597  return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4598  MIRBuilder);
4599  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4600  Register Def1Reg = MRI.createVirtualRegister(RC);
4601  Register Def2Reg = MRI.createVirtualRegister(RC);
4602  auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4603  emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4604  emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4605  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4607  return &*OrMI;
4608 }
4609 
4610 MachineInstr *
4611 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4612  MachineIRBuilder &MIRBuilder,
4613  Optional<CmpInst::Predicate> Pred) const {
4614  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4615  LLT Ty = MRI.getType(LHS);
4616  if (Ty.isVector())
4617  return nullptr;
4618  unsigned OpSize = Ty.getSizeInBits();
4619  if (OpSize != 32 && OpSize != 64)
4620  return nullptr;
4621 
4622  // If this is a compare against +0.0, then we don't have
4623  // to explicitly materialize a constant.
4624  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4625  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4626 
4627  auto IsEqualityPred = [](CmpInst::Predicate P) {
4628  return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4629  P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4630  };
4631  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4632  // Try commutating the operands.
4633  const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4634  if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4635  ShouldUseImm = true;
4636  std::swap(LHS, RHS);
4637  }
4638  }
4639  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4640  {AArch64::FCMPSri, AArch64::FCMPDri}};
4641  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4642 
4643  // Partially build the compare. Decide if we need to add a use for the
4644  // third operand based off whether or not we're comparing against 0.0.
4645  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4646  CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4647  if (!ShouldUseImm)
4648  CmpMI.addUse(RHS);
4649  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4650  return &*CmpMI;
4651 }
4652 
4653 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4654  Optional<Register> Dst, Register Op1, Register Op2,
4655  MachineIRBuilder &MIRBuilder) const {
4656  // We implement a vector concat by:
4657  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4658  // 2. Insert the upper vector into the destination's upper element
4659  // TODO: some of this code is common with G_BUILD_VECTOR handling.
4660  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4661 
4662  const LLT Op1Ty = MRI.getType(Op1);
4663  const LLT Op2Ty = MRI.getType(Op2);
4664 
4665  if (Op1Ty != Op2Ty) {
4666  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4667  return nullptr;
4668  }
4669  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4670 
4671  if (Op1Ty.getSizeInBits() >= 128) {
4672  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4673  return nullptr;
4674  }
4675 
4676  // At the moment we just support 64 bit vector concats.
4677  if (Op1Ty.getSizeInBits() != 64) {
4678  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4679  return nullptr;
4680  }
4681 
4682  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4683  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4684  const TargetRegisterClass *DstRC =
4685  getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4686 
4687  MachineInstr *WidenedOp1 =
4688  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4689  MachineInstr *WidenedOp2 =
4690  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4691  if (!WidenedOp1 || !WidenedOp2) {
4692  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4693  return nullptr;
4694  }
4695 
4696  // Now do the insert of the upper element.
4697  unsigned InsertOpc, InsSubRegIdx;
4698  std::tie(InsertOpc, InsSubRegIdx) =
4699  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4700 
4701  if (!Dst)
4702  Dst = MRI.createVirtualRegister(DstRC);
4703  auto InsElt =
4704  MIRBuilder
4705  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4706  .addImm(1) /* Lane index */
4707  .addUse(WidenedOp2->getOperand(0).getReg())
4708  .addImm(0);
4709  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4710  return &*InsElt;
4711 }
4712 
4713 MachineInstr *
4714 AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4715  Register Src2, AArch64CC::CondCode Pred,
4716  MachineIRBuilder &MIRBuilder) const {
4717  auto &MRI = *MIRBuilder.getMRI();
4718  const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4719  // If we used a register class, then this won't necessarily have an LLT.
4720  // Compute the size based off whether or not we have a class or bank.
4721  unsigned Size;
4722  if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4723  Size = TRI.getRegSizeInBits(*RC);
4724  else
4725  Size = MRI.getType(Dst).getSizeInBits();
4726  // Some opcodes use s1.
4727  assert(Size <= 64 && "Expected 64 bits or less only!");
4728  static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4729  unsigned Opc = OpcTable[Size == 64];
4730  auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4732  return &*CSINC;
4733 }
4734 
4735 std::pair<MachineInstr *, AArch64CC::CondCode>
4736 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4739  MachineIRBuilder &MIRBuilder) const {
4740  switch (Opcode) {
4741  default:
4742  llvm_unreachable("Unexpected opcode!");
4743  case TargetOpcode::G_SADDO:
4744  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4745  case TargetOpcode::G_UADDO:
4746  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4747  case TargetOpcode::G_SSUBO:
4748  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4749  case TargetOpcode::G_USUBO:
4750  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4751  }
4752 }
4753 
4754 /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4755 /// expressed as a conjunction.
4756 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
4757 /// changing the conditions on the CMP tests.
4758 /// (this means we can call emitConjunctionRec() with
4759 /// Negate==true on this sub-tree)
4760 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
4761 /// cannot do the negation naturally. We are required to
4762 /// emit the subtree first in this case.
4763 /// \param WillNegate Is true if are called when the result of this
4764 /// subexpression must be negated. This happens when the
4765 /// outer expression is an OR. We can use this fact to know
4766 /// that we have a double negation (or (or ...) ...) that
4767 /// can be implemented for free.
4768 static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4769  bool WillNegate, MachineRegisterInfo &MRI,
4770  unsigned Depth = 0) {
4771  if (!MRI.hasOneNonDBGUse(Val))
4772  return false;
4773  MachineInstr *ValDef = MRI.getVRegDef(Val);
4774  unsigned Opcode = ValDef->getOpcode();
4775  if (Opcode == TargetOpcode::G_TRUNC) {
4776  // Look through a trunc.
4777  Val = ValDef->getOperand(1).getReg();
4778  ValDef = MRI.getVRegDef(Val);
4779  Opcode = ValDef->getOpcode();
4780  }
4781  if (isa<GAnyCmp>(ValDef)) {
4782  CanNegate = true;
4783  MustBeFirst = false;
4784  return true;
4785  }
4786  // Protect against exponential runtime and stack overflow.
4787  if (Depth > 6)
4788  return false;
4789  if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4790  bool IsOR = Opcode == TargetOpcode::G_OR;
4791  Register O0 = ValDef->getOperand(1).getReg();
4792  Register O1 = ValDef->getOperand(2).getReg();
4793  bool CanNegateL;
4794  bool MustBeFirstL;
4795  if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4796  return false;
4797  bool CanNegateR;
4798  bool MustBeFirstR;
4799  if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4800  return false;
4801 
4802  if (MustBeFirstL && MustBeFirstR)
4803  return false;
4804 
4805  if (IsOR) {
4806  // For an OR expression we need to be able to naturally negate at least
4807  // one side or we cannot do the transformation at all.
4808  if (!CanNegateL && !CanNegateR)
4809  return false;
4810  // If we the result of the OR will be negated and we can naturally negate
4811  // the leaves, then this sub-tree as a whole negates naturally.
4812  CanNegate = WillNegate && CanNegateL && CanNegateR;
4813  // If we cannot naturally negate the whole sub-tree, then this must be
4814  // emitted first.
4815  MustBeFirst = !CanNegate;
4816  } else {
4817  assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4818  // We cannot naturally negate an AND operation.
4819  CanNegate = false;
4820  MustBeFirst = MustBeFirstL || MustBeFirstR;
4821  }
4822  return true;
4823  }
4824  return false;
4825 }
4826 
4830  MachineIRBuilder &MIB) const {
4831  // TODO: emit CMN as an optimization.
4832  auto &MRI = *MIB.getMRI();
4833  LLT OpTy = MRI.getType(LHS);
4834  assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4835  unsigned CCmpOpc;
4836  if (CmpInst::isIntPredicate(CC)) {
4837  CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4838  } else {
4839  switch (OpTy.getSizeInBits()) {
4840  case 16:
4841  CCmpOpc = AArch64::FCCMPHrr;
4842  break;
4843  case 32:
4844  CCmpOpc = AArch64::FCCMPSrr;
4845  break;
4846  case 64:
4847  CCmpOpc = AArch64::FCCMPDrr;
4848  break;
4849  default:
4850  return nullptr;
4851  }
4852  }
4854  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4855  auto CCmp =
4856  MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
4858  return &*CCmp;
4859 }
4860 
4862  Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4864  // We're at a tree leaf, produce a conditional comparison operation.
4865  auto &MRI = *MIB.getMRI();
4866  MachineInstr *ValDef = MRI.getVRegDef(Val);
4867  unsigned Opcode = ValDef->getOpcode();
4868  if (Opcode == TargetOpcode::G_TRUNC) {
4869  // Look through a trunc.
4870  Val = ValDef->getOperand(1).getReg();
4871  ValDef = MRI.getVRegDef(Val);
4872  Opcode = ValDef->getOpcode();
4873  }
4874  if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4875  Register LHS = Cmp->getLHSReg();
4876  Register RHS = Cmp->getRHSReg();
4877  CmpInst::Predicate CC = Cmp->getCond();
4878  if (Negate)
4879  CC = CmpInst::getInversePredicate(CC);
4880  if (isa<GICmp>(Cmp)) {
4881  OutCC = changeICMPPredToAArch64CC(CC);
4882  } else {
4883  // Handle special FP cases.
4884  AArch64CC::CondCode ExtraCC;
4885  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4886  // Some floating point conditions can't be tested with a single condition
4887  // code. Construct an additional comparison in this case.
4888  if (ExtraCC != AArch64CC::AL) {
4889  MachineInstr *ExtraCmp;
4890  if (!CCOp)
4891  ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4892  else
4893  ExtraCmp =
4894  emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4895  CCOp = ExtraCmp->getOperand(0).getReg();
4896  Predicate = ExtraCC;
4897  }
4898  }
4899 
4900  // Produce a normal comparison if we are first in the chain
4901  if (!CCOp) {
4902  auto Dst = MRI.cloneVirtualRegister(LHS);
4903  if (isa<GICmp>(Cmp))
4904  return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4905  return emitFPCompare(Cmp->getOperand(2).getReg(),
4906  Cmp->getOperand(3).getReg(), MIB);
4907  }
4908  // Otherwise produce a ccmp.
4909  return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4910  }
4911  assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4912 
4913  bool IsOR = Opcode == TargetOpcode::G_OR;
4914 
4915  Register LHS = ValDef->getOperand(1).getReg();
4916  bool CanNegateL;
4917  bool MustBeFirstL;
4918  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4919  assert(ValidL && "Valid conjunction/disjunction tree");
4920  (void)ValidL;
4921 
4922  Register RHS = ValDef->getOperand(2).getReg();
4923  bool CanNegateR;
4924  bool MustBeFirstR;
4925  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4926  assert(ValidR && "Valid conjunction/disjunction tree");
4927  (void)ValidR;
4928 
4929  // Swap sub-tree that must come first to the right side.
4930  if (MustBeFirstL) {
4931  assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4932  std::swap(LHS, RHS);
4933  std::swap(CanNegateL, CanNegateR);
4934  std::swap(MustBeFirstL, MustBeFirstR);
4935  }
4936 
4937  bool NegateR;
4938  bool NegateAfterR;
4939  bool NegateL;
4940  bool NegateAfterAll;
4941  if (Opcode == TargetOpcode::G_OR) {
4942  // Swap the sub-tree that we can negate naturally to the left.
4943  if (!CanNegateL) {
4944  assert(CanNegateR && "at least one side must be negatable");
4945  assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4946  assert(!Negate);
4947  std::swap(LHS, RHS);
4948  NegateR = false;
4949  NegateAfterR = true;
4950  } else {
4951  // Negate the left sub-tree if possible, otherwise negate the result.
4952  NegateR = CanNegateR;
4953  NegateAfterR = !CanNegateR;
4954  }
4955  NegateL = true;
4956  NegateAfterAll = !Negate;
4957  } else {
4958  assert(Opcode == TargetOpcode::G_AND &&
4959  "Valid conjunction/disjunction tree");
4960  assert(!Negate && "Valid conjunction/disjunction tree");
4961 
4962  NegateL = false;
4963  NegateR = false;
4964  NegateAfterR = false;
4965  NegateAfterAll = false;
4966  }
4967 
4968  // Emit sub-trees.
4969  AArch64CC::CondCode RHSCC;
4970  MachineInstr *CmpR =
4971  emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4972  if (NegateAfterR)
4973  RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4975  LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4976  if (NegateAfterAll)
4977  OutCC = AArch64CC::getInvertedCondCode(OutCC);
4978  return CmpL;
4979 }
4980 
4982  Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4983  bool DummyCanNegate;
4984  bool DummyMustBeFirst;
4985  if (!canEmitC