LLVM  16.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
23 #include "llvm/ADT/Optional.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Debug.h"
50 #include <optional>
51 
52 #define DEBUG_TYPE "aarch64-isel"
53 
54 using namespace llvm;
55 using namespace MIPatternMatch;
56 using namespace AArch64GISelUtils;
57 
58 namespace llvm {
59 class BlockFrequencyInfo;
60 class ProfileSummaryInfo;
61 }
62 
63 namespace {
64 
65 #define GET_GLOBALISEL_PREDICATE_BITSET
66 #include "AArch64GenGlobalISel.inc"
67 #undef GET_GLOBALISEL_PREDICATE_BITSET
68 
69 
70 class AArch64InstructionSelector : public InstructionSelector {
71 public:
72  AArch64InstructionSelector(const AArch64TargetMachine &TM,
73  const AArch64Subtarget &STI,
74  const AArch64RegisterBankInfo &RBI);
75 
76  bool select(MachineInstr &I) override;
77  static const char *getName() { return DEBUG_TYPE; }
78 
79  void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80  CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
81  BlockFrequencyInfo *BFI) override {
82  InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83  MIB.setMF(MF);
84 
85  // hasFnAttribute() is expensive to call on every BRCOND selection, so
86  // cache it here for each run of the selector.
87  ProduceNonFlagSettingCondBr =
88  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89  MFReturnAddr = Register();
90 
91  processPHIs(MF);
92  }
93 
94 private:
95  /// tblgen-erated 'select' implementation, used as the initial selector for
96  /// the patterns that don't require complex C++.
97  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98 
99  // A lowering phase that runs before any selection attempts.
100  // Returns true if the instruction was modified.
101  bool preISelLower(MachineInstr &I);
102 
103  // An early selection function that runs before the selectImpl() call.
104  bool earlySelect(MachineInstr &I);
105 
106  // Do some preprocessing of G_PHIs before we begin selection.
107  void processPHIs(MachineFunction &MF);
108 
109  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
110 
111  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
112  bool contractCrossBankCopyIntoStore(MachineInstr &I,
114 
115  bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
116 
117  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
118  MachineRegisterInfo &MRI) const;
119  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
120  MachineRegisterInfo &MRI) const;
121 
122  ///@{
123  /// Helper functions for selectCompareBranch.
124  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
125  MachineIRBuilder &MIB) const;
126  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
127  MachineIRBuilder &MIB) const;
128  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
129  MachineIRBuilder &MIB) const;
130  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
131  MachineBasicBlock *DstMBB,
132  MachineIRBuilder &MIB) const;
133  ///@}
134 
135  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
137 
138  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
139  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
140 
141  // Helper to generate an equivalent of scalar_to_vector into a new register,
142  // returned via 'Dst'.
143  MachineInstr *emitScalarToVector(unsigned EltSize,
144  const TargetRegisterClass *DstRC,
145  Register Scalar,
146  MachineIRBuilder &MIRBuilder) const;
147 
148  /// Emit a lane insert into \p DstReg, or a new vector register if None is
149  /// provided.
150  ///
151  /// The lane inserted into is defined by \p LaneIdx. The vector source
152  /// register is given by \p SrcReg. The register containing the element is
153  /// given by \p EltReg.
154  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
155  Register EltReg, unsigned LaneIdx,
156  const RegisterBank &RB,
157  MachineIRBuilder &MIRBuilder) const;
158 
159  /// Emit a sequence of instructions representing a constant \p CV for a
160  /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
161  ///
162  /// \returns the last instruction in the sequence on success, and nullptr
163  /// otherwise.
164  MachineInstr *emitConstantVector(Register Dst, Constant *CV,
165  MachineIRBuilder &MIRBuilder,
167 
168  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
169  bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
171  /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
172  /// SUBREG_TO_REG.
173  bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
174  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
177 
178  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
179  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
180  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
181  bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
182 
183  /// Helper function to select vector load intrinsics like
184  /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
185  /// \p Opc is the opcode that the selected instruction should use.
186  /// \p NumVecs is the number of vector destinations for the instruction.
187  /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
188  bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
189  MachineInstr &I);
190  bool selectIntrinsicWithSideEffects(MachineInstr &I,
192  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
193  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
194  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
195  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
196  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
197  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
198  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
199  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
200  bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
201  bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
202 
203  unsigned emitConstantPoolEntry(const Constant *CPVal,
204  MachineFunction &MF) const;
205  MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
206  MachineIRBuilder &MIRBuilder) const;
207 
208  // Emit a vector concat operation.
209  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
210  Register Op2,
211  MachineIRBuilder &MIRBuilder) const;
212 
213  // Emit an integer compare between LHS and RHS, which checks for Predicate.
214  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
216  MachineIRBuilder &MIRBuilder) const;
217 
218  /// Emit a floating point comparison between \p LHS and \p RHS.
219  /// \p Pred if given is the intended predicate to use.
220  MachineInstr *emitFPCompare(Register LHS, Register RHS,
221  MachineIRBuilder &MIRBuilder,
223 
224  MachineInstr *emitInstr(unsigned Opcode,
225  std::initializer_list<llvm::DstOp> DstOps,
226  std::initializer_list<llvm::SrcOp> SrcOps,
227  MachineIRBuilder &MIRBuilder,
228  const ComplexRendererFns &RenderFns = None) const;
229  /// Helper function to emit an add or sub instruction.
230  ///
231  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
232  /// in a specific order.
233  ///
234  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
235  ///
236  /// \code
237  /// const std::array<std::array<unsigned, 2>, 4> Table {
238  /// {{AArch64::ADDXri, AArch64::ADDWri},
239  /// {AArch64::ADDXrs, AArch64::ADDWrs},
240  /// {AArch64::ADDXrr, AArch64::ADDWrr},
241  /// {AArch64::SUBXri, AArch64::SUBWri},
242  /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
243  /// \endcode
244  ///
245  /// Each row in the table corresponds to a different addressing mode. Each
246  /// column corresponds to a different register size.
247  ///
248  /// \attention Rows must be structured as follows:
249  /// - Row 0: The ri opcode variants
250  /// - Row 1: The rs opcode variants
251  /// - Row 2: The rr opcode variants
252  /// - Row 3: The ri opcode variants for negative immediates
253  /// - Row 4: The rx opcode variants
254  ///
255  /// \attention Columns must be structured as follows:
256  /// - Column 0: The 64-bit opcode variants
257  /// - Column 1: The 32-bit opcode variants
258  ///
259  /// \p Dst is the destination register of the binop to emit.
260  /// \p LHS is the left-hand operand of the binop to emit.
261  /// \p RHS is the right-hand operand of the binop to emit.
262  MachineInstr *emitAddSub(
263  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
265  MachineIRBuilder &MIRBuilder) const;
266  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
268  MachineIRBuilder &MIRBuilder) const;
270  MachineIRBuilder &MIRBuilder) const;
272  MachineIRBuilder &MIRBuilder) const;
274  MachineIRBuilder &MIRBuilder) const;
276  MachineIRBuilder &MIRBuilder) const;
277  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
279  MachineIRBuilder &MIRBuilder) const;
280  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
281  const RegisterBank &DstRB, LLT ScalarTy,
282  Register VecReg, unsigned LaneIdx,
283  MachineIRBuilder &MIRBuilder) const;
284  MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
285  AArch64CC::CondCode Pred,
286  MachineIRBuilder &MIRBuilder) const;
287  /// Emit a CSet for a FP compare.
288  ///
289  /// \p Dst is expected to be a 32-bit scalar register.
290  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
291  MachineIRBuilder &MIRBuilder) const;
292 
293  /// Emit the overflow op for \p Opcode.
294  ///
295  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
296  /// G_USUBO, etc.
297  std::pair<MachineInstr *, AArch64CC::CondCode>
298  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
299  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
300 
301  /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
302  /// In some cases this is even possible with OR operations in the expression.
304  MachineIRBuilder &MIB) const;
308  AArch64CC::CondCode OutCC,
309  MachineIRBuilder &MIB) const;
311  bool Negate, Register CCOp,
313  MachineIRBuilder &MIB) const;
314 
315  /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
316  /// \p IsNegative is true if the test should be "not zero".
317  /// This will also optimize the test bit instruction when possible.
318  MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
319  MachineBasicBlock *DstMBB,
320  MachineIRBuilder &MIB) const;
321 
322  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
323  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
324  MachineBasicBlock *DestMBB,
325  MachineIRBuilder &MIB) const;
326 
327  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
328  // We use these manually instead of using the importer since it doesn't
329  // support SDNodeXForm.
330  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
331  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
332  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
333  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
334 
335  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
336  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
337  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
338 
339  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
340  unsigned Size) const;
341 
342  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
343  return selectAddrModeUnscaled(Root, 1);
344  }
345  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
346  return selectAddrModeUnscaled(Root, 2);
347  }
348  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
349  return selectAddrModeUnscaled(Root, 4);
350  }
351  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
352  return selectAddrModeUnscaled(Root, 8);
353  }
354  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
355  return selectAddrModeUnscaled(Root, 16);
356  }
357 
358  /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
359  /// from complex pattern matchers like selectAddrModeIndexed().
360  ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
361  MachineRegisterInfo &MRI) const;
362 
363  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
364  unsigned Size) const;
365  template <int Width>
366  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
367  return selectAddrModeIndexed(Root, Width / 8);
368  }
369 
370  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
371  const MachineRegisterInfo &MRI) const;
372  ComplexRendererFns
373  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
374  unsigned SizeInBytes) const;
375 
376  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
377  /// or not a shift + extend should be folded into an addressing mode. Returns
378  /// None when this is not profitable or possible.
379  ComplexRendererFns
380  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
381  MachineOperand &Offset, unsigned SizeInBytes,
382  bool WantsExt) const;
383  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
384  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
385  unsigned SizeInBytes) const;
386  template <int Width>
387  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
388  return selectAddrModeXRO(Root, Width / 8);
389  }
390 
391  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
392  unsigned SizeInBytes) const;
393  template <int Width>
394  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
395  return selectAddrModeWRO(Root, Width / 8);
396  }
397 
398  ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
399  bool AllowROR = false) const;
400 
401  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
402  return selectShiftedRegister(Root);
403  }
404 
405  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
406  return selectShiftedRegister(Root, true);
407  }
408 
409  /// Given an extend instruction, determine the correct shift-extend type for
410  /// that instruction.
411  ///
412  /// If the instruction is going to be used in a load or store, pass
413  /// \p IsLoadStore = true.
415  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
416  bool IsLoadStore = false) const;
417 
418  /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
419  ///
420  /// \returns Either \p Reg if no change was necessary, or the new register
421  /// created by moving \p Reg.
422  ///
423  /// Note: This uses emitCopy right now.
424  Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
425  MachineIRBuilder &MIB) const;
426 
427  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
428 
429  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
430  int OpIdx = -1) const;
431  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
432  int OpIdx = -1) const;
433  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
434  int OpIdx = -1) const;
435  void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
436  int OpIdx = -1) const;
437  void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
438  int OpIdx = -1) const;
439  void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
440  int OpIdx = -1) const;
441  void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
442  const MachineInstr &MI,
443  int OpIdx = -1) const;
444 
445  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
446  void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
447 
448  // Optimization methods.
449  bool tryOptSelect(GSelect &Sel);
450  bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
451  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
453  MachineIRBuilder &MIRBuilder) const;
454 
455  /// Return true if \p MI is a load or store of \p NumBytes bytes.
456  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
457 
458  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
459  /// register zeroed out. In other words, the result of MI has been explicitly
460  /// zero extended.
461  bool isDef32(const MachineInstr &MI) const;
462 
463  const AArch64TargetMachine &TM;
464  const AArch64Subtarget &STI;
465  const AArch64InstrInfo &TII;
466  const AArch64RegisterInfo &TRI;
467  const AArch64RegisterBankInfo &RBI;
468 
469  bool ProduceNonFlagSettingCondBr = false;
470 
471  // Some cached values used during selection.
472  // We use LR as a live-in register, and we keep track of it here as it can be
473  // clobbered by calls.
474  Register MFReturnAddr;
475 
476  MachineIRBuilder MIB;
477 
478 #define GET_GLOBALISEL_PREDICATES_DECL
479 #include "AArch64GenGlobalISel.inc"
480 #undef GET_GLOBALISEL_PREDICATES_DECL
481 
482 // We declare the temporaries used by selectImpl() in the class to minimize the
483 // cost of constructing placeholder values.
484 #define GET_GLOBALISEL_TEMPORARIES_DECL
485 #include "AArch64GenGlobalISel.inc"
486 #undef GET_GLOBALISEL_TEMPORARIES_DECL
487 };
488 
489 } // end anonymous namespace
490 
491 #define GET_GLOBALISEL_IMPL
492 #include "AArch64GenGlobalISel.inc"
493 #undef GET_GLOBALISEL_IMPL
494 
495 AArch64InstructionSelector::AArch64InstructionSelector(
496  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
497  const AArch64RegisterBankInfo &RBI)
498  : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
499  RBI(RBI),
501 #include "AArch64GenGlobalISel.inc"
504 #include "AArch64GenGlobalISel.inc"
506 {
507 }
508 
509 // FIXME: This should be target-independent, inferred from the types declared
510 // for each class in the bank.
511 //
512 /// Given a register bank, and a type, return the smallest register class that
513 /// can represent that combination.
514 static const TargetRegisterClass *
515 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
516  bool GetAllRegSet = false) {
517  if (RB.getID() == AArch64::GPRRegBankID) {
518  if (Ty.getSizeInBits() <= 32)
519  return GetAllRegSet ? &AArch64::GPR32allRegClass
520  : &AArch64::GPR32RegClass;
521  if (Ty.getSizeInBits() == 64)
522  return GetAllRegSet ? &AArch64::GPR64allRegClass
523  : &AArch64::GPR64RegClass;
524  if (Ty.getSizeInBits() == 128)
525  return &AArch64::XSeqPairsClassRegClass;
526  return nullptr;
527  }
528 
529  if (RB.getID() == AArch64::FPRRegBankID) {
530  switch (Ty.getSizeInBits()) {
531  case 8:
532  return &AArch64::FPR8RegClass;
533  case 16:
534  return &AArch64::FPR16RegClass;
535  case 32:
536  return &AArch64::FPR32RegClass;
537  case 64:
538  return &AArch64::FPR64RegClass;
539  case 128:
540  return &AArch64::FPR128RegClass;
541  }
542  return nullptr;
543  }
544 
545  return nullptr;
546 }
547 
548 /// Given a register bank, and size in bits, return the smallest register class
549 /// that can represent that combination.
550 static const TargetRegisterClass *
551 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
552  bool GetAllRegSet = false) {
553  unsigned RegBankID = RB.getID();
554 
555  if (RegBankID == AArch64::GPRRegBankID) {
556  if (SizeInBits <= 32)
557  return GetAllRegSet ? &AArch64::GPR32allRegClass
558  : &AArch64::GPR32RegClass;
559  if (SizeInBits == 64)
560  return GetAllRegSet ? &AArch64::GPR64allRegClass
561  : &AArch64::GPR64RegClass;
562  if (SizeInBits == 128)
563  return &AArch64::XSeqPairsClassRegClass;
564  }
565 
566  if (RegBankID == AArch64::FPRRegBankID) {
567  switch (SizeInBits) {
568  default:
569  return nullptr;
570  case 8:
571  return &AArch64::FPR8RegClass;
572  case 16:
573  return &AArch64::FPR16RegClass;
574  case 32:
575  return &AArch64::FPR32RegClass;
576  case 64:
577  return &AArch64::FPR64RegClass;
578  case 128:
579  return &AArch64::FPR128RegClass;
580  }
581  }
582 
583  return nullptr;
584 }
585 
586 /// Returns the correct subregister to use for a given register class.
588  const TargetRegisterInfo &TRI, unsigned &SubReg) {
589  switch (TRI.getRegSizeInBits(*RC)) {
590  case 8:
591  SubReg = AArch64::bsub;
592  break;
593  case 16:
594  SubReg = AArch64::hsub;
595  break;
596  case 32:
597  if (RC != &AArch64::FPR32RegClass)
598  SubReg = AArch64::sub_32;
599  else
600  SubReg = AArch64::ssub;
601  break;
602  case 64:
603  SubReg = AArch64::dsub;
604  break;
605  default:
606  LLVM_DEBUG(
607  dbgs() << "Couldn't find appropriate subregister for register class.");
608  return false;
609  }
610 
611  return true;
612 }
613 
614 /// Returns the minimum size the given register bank can hold.
615 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
616  switch (RB.getID()) {
617  case AArch64::GPRRegBankID:
618  return 32;
619  case AArch64::FPRRegBankID:
620  return 8;
621  default:
622  llvm_unreachable("Tried to get minimum size for unknown register bank.");
623  }
624 }
625 
626 /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
627 /// Helper function for functions like createDTuple and createQTuple.
628 ///
629 /// \p RegClassIDs - The list of register class IDs available for some tuple of
630 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
631 /// expected to contain between 2 and 4 tuple classes.
632 ///
633 /// \p SubRegs - The list of subregister classes associated with each register
634 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
635 /// subregister class. The index of each subregister class is expected to
636 /// correspond with the index of each register class.
637 ///
638 /// \returns Either the destination register of REG_SEQUENCE instruction that
639 /// was created, or the 0th element of \p Regs if \p Regs contains a single
640 /// element.
642  const unsigned RegClassIDs[],
643  const unsigned SubRegs[], MachineIRBuilder &MIB) {
644  unsigned NumRegs = Regs.size();
645  if (NumRegs == 1)
646  return Regs[0];
647  assert(NumRegs >= 2 && NumRegs <= 4 &&
648  "Only support between two and 4 registers in a tuple!");
650  auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
651  auto RegSequence =
652  MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
653  for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
654  RegSequence.addUse(Regs[I]);
655  RegSequence.addImm(SubRegs[I]);
656  }
657  return RegSequence.getReg(0);
658 }
659 
660 /// Create a tuple of D-registers using the registers in \p Regs.
662  static const unsigned RegClassIDs[] = {
663  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
664  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
665  AArch64::dsub2, AArch64::dsub3};
666  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
667 }
668 
669 /// Create a tuple of Q-registers using the registers in \p Regs.
671  static const unsigned RegClassIDs[] = {
672  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
673  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
674  AArch64::qsub2, AArch64::qsub3};
675  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
676 }
677 
679  auto &MI = *Root.getParent();
680  auto &MBB = *MI.getParent();
681  auto &MF = *MBB.getParent();
682  auto &MRI = MF.getRegInfo();
683  uint64_t Immed;
684  if (Root.isImm())
685  Immed = Root.getImm();
686  else if (Root.isCImm())
687  Immed = Root.getCImm()->getZExtValue();
688  else if (Root.isReg()) {
689  auto ValAndVReg =
691  if (!ValAndVReg)
692  return None;
693  Immed = ValAndVReg->Value.getSExtValue();
694  } else
695  return None;
696  return Immed;
697 }
698 
699 /// Check whether \p I is a currently unsupported binary operation:
700 /// - it has an unsized type
701 /// - an operand is not a vreg
702 /// - all operands are not in the same bank
703 /// These are checks that should someday live in the verifier, but right now,
704 /// these are mostly limitations of the aarch64 selector.
705 static bool unsupportedBinOp(const MachineInstr &I,
706  const AArch64RegisterBankInfo &RBI,
707  const MachineRegisterInfo &MRI,
708  const AArch64RegisterInfo &TRI) {
709  LLT Ty = MRI.getType(I.getOperand(0).getReg());
710  if (!Ty.isValid()) {
711  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
712  return true;
713  }
714 
715  const RegisterBank *PrevOpBank = nullptr;
716  for (auto &MO : I.operands()) {
717  // FIXME: Support non-register operands.
718  if (!MO.isReg()) {
719  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
720  return true;
721  }
722 
723  // FIXME: Can generic operations have physical registers operands? If
724  // so, this will need to be taught about that, and we'll need to get the
725  // bank out of the minimal class for the register.
726  // Either way, this needs to be documented (and possibly verified).
727  if (!Register::isVirtualRegister(MO.getReg())) {
728  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
729  return true;
730  }
731 
732  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
733  if (!OpBank) {
734  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
735  return true;
736  }
737 
738  if (PrevOpBank && OpBank != PrevOpBank) {
739  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
740  return true;
741  }
742  PrevOpBank = OpBank;
743  }
744  return false;
745 }
746 
747 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
748 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
749 /// and of size \p OpSize.
750 /// \returns \p GenericOpc if the combination is unsupported.
751 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
752  unsigned OpSize) {
753  switch (RegBankID) {
754  case AArch64::GPRRegBankID:
755  if (OpSize == 32) {
756  switch (GenericOpc) {
757  case TargetOpcode::G_SHL:
758  return AArch64::LSLVWr;
759  case TargetOpcode::G_LSHR:
760  return AArch64::LSRVWr;
761  case TargetOpcode::G_ASHR:
762  return AArch64::ASRVWr;
763  default:
764  return GenericOpc;
765  }
766  } else if (OpSize == 64) {
767  switch (GenericOpc) {
768  case TargetOpcode::G_PTR_ADD:
769  return AArch64::ADDXrr;
770  case TargetOpcode::G_SHL:
771  return AArch64::LSLVXr;
772  case TargetOpcode::G_LSHR:
773  return AArch64::LSRVXr;
774  case TargetOpcode::G_ASHR:
775  return AArch64::ASRVXr;
776  default:
777  return GenericOpc;
778  }
779  }
780  break;
781  case AArch64::FPRRegBankID:
782  switch (OpSize) {
783  case 32:
784  switch (GenericOpc) {
785  case TargetOpcode::G_FADD:
786  return AArch64::FADDSrr;
787  case TargetOpcode::G_FSUB:
788  return AArch64::FSUBSrr;
789  case TargetOpcode::G_FMUL:
790  return AArch64::FMULSrr;
791  case TargetOpcode::G_FDIV:
792  return AArch64::FDIVSrr;
793  default:
794  return GenericOpc;
795  }
796  case 64:
797  switch (GenericOpc) {
798  case TargetOpcode::G_FADD:
799  return AArch64::FADDDrr;
800  case TargetOpcode::G_FSUB:
801  return AArch64::FSUBDrr;
802  case TargetOpcode::G_FMUL:
803  return AArch64::FMULDrr;
804  case TargetOpcode::G_FDIV:
805  return AArch64::FDIVDrr;
806  case TargetOpcode::G_OR:
807  return AArch64::ORRv8i8;
808  default:
809  return GenericOpc;
810  }
811  }
812  break;
813  }
814  return GenericOpc;
815 }
816 
817 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
818 /// appropriate for the (value) register bank \p RegBankID and of memory access
819 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
820 /// addressing mode (e.g., LDRXui).
821 /// \returns \p GenericOpc if the combination is unsupported.
822 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
823  unsigned OpSize) {
824  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
825  switch (RegBankID) {
826  case AArch64::GPRRegBankID:
827  switch (OpSize) {
828  case 8:
829  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
830  case 16:
831  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
832  case 32:
833  return isStore ? AArch64::STRWui : AArch64::LDRWui;
834  case 64:
835  return isStore ? AArch64::STRXui : AArch64::LDRXui;
836  }
837  break;
838  case AArch64::FPRRegBankID:
839  switch (OpSize) {
840  case 8:
841  return isStore ? AArch64::STRBui : AArch64::LDRBui;
842  case 16:
843  return isStore ? AArch64::STRHui : AArch64::LDRHui;
844  case 32:
845  return isStore ? AArch64::STRSui : AArch64::LDRSui;
846  case 64:
847  return isStore ? AArch64::STRDui : AArch64::LDRDui;
848  case 128:
849  return isStore ? AArch64::STRQui : AArch64::LDRQui;
850  }
851  break;
852  }
853  return GenericOpc;
854 }
855 
856 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
857 /// to \p *To.
858 ///
859 /// E.g "To = COPY SrcReg:SubReg"
861  const RegisterBankInfo &RBI, Register SrcReg,
862  const TargetRegisterClass *To, unsigned SubReg) {
863  assert(SrcReg.isValid() && "Expected a valid source register?");
864  assert(To && "Destination register class cannot be null");
865  assert(SubReg && "Expected a valid subregister");
866 
867  MachineIRBuilder MIB(I);
868  auto SubRegCopy =
869  MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
870  MachineOperand &RegOp = I.getOperand(1);
871  RegOp.setReg(SubRegCopy.getReg(0));
872 
873  // It's possible that the destination register won't be constrained. Make
874  // sure that happens.
875  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
876  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
877 
878  return true;
879 }
880 
881 /// Helper function to get the source and destination register classes for a
882 /// copy. Returns a std::pair containing the source register class for the
883 /// copy, and the destination register class for the copy. If a register class
884 /// cannot be determined, then it will be nullptr.
885 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
888  const RegisterBankInfo &RBI) {
889  Register DstReg = I.getOperand(0).getReg();
890  Register SrcReg = I.getOperand(1).getReg();
891  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
892  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
893  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
894  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
895 
896  // Special casing for cross-bank copies of s1s. We can technically represent
897  // a 1-bit value with any size of register. The minimum size for a GPR is 32
898  // bits. So, we need to put the FPR on 32 bits as well.
899  //
900  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
901  // then we can pull it into the helpers that get the appropriate class for a
902  // register bank. Or make a new helper that carries along some constraint
903  // information.
904  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
905  SrcSize = DstSize = 32;
906 
907  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
908  getMinClassForRegBank(DstRegBank, DstSize, true)};
909 }
910 
911 // FIXME: We need some sort of API in RBI/TRI to allow generic code to
912 // constrain operands of simple instructions given a TargetRegisterClass
913 // and LLT
915  const RegisterBankInfo &RBI) {
916  for (MachineOperand &MO : I.operands()) {
917  if (!MO.isReg())
918  continue;
919  Register Reg = MO.getReg();
920  if (!Reg)
921  continue;
922  if (Reg.isPhysical())
923  continue;
924  LLT Ty = MRI.getType(Reg);
925  const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
926  const TargetRegisterClass *RC =
927  RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
928  if (!RC) {
929  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
930  RC = getRegClassForTypeOnBank(Ty, RB);
931  if (!RC) {
932  LLVM_DEBUG(
933  dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
934  break;
935  }
936  }
937  RBI.constrainGenericRegister(Reg, *RC, MRI);
938  }
939 
940  return true;
941 }
942 
945  const RegisterBankInfo &RBI) {
946  Register DstReg = I.getOperand(0).getReg();
947  Register SrcReg = I.getOperand(1).getReg();
948  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
949  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
950 
951  // Find the correct register classes for the source and destination registers.
952  const TargetRegisterClass *SrcRC;
953  const TargetRegisterClass *DstRC;
954  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
955 
956  if (!DstRC) {
957  LLVM_DEBUG(dbgs() << "Unexpected dest size "
958  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
959  return false;
960  }
961 
962  // Is this a copy? If so, then we may need to insert a subregister copy.
963  if (I.isCopy()) {
964  // Yes. Check if there's anything to fix up.
965  if (!SrcRC) {
966  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
967  return false;
968  }
969 
970  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
971  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
972  unsigned SubReg;
973 
974  // If the source bank doesn't support a subregister copy small enough,
975  // then we first need to copy to the destination bank.
976  if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
977  const TargetRegisterClass *DstTempRC =
978  getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
979  getSubRegForClass(DstRC, TRI, SubReg);
980 
981  MachineIRBuilder MIB(I);
982  auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
983  copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
984  } else if (SrcSize > DstSize) {
985  // If the source register is bigger than the destination we need to
986  // perform a subregister copy.
987  const TargetRegisterClass *SubRegRC =
988  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
989  getSubRegForClass(SubRegRC, TRI, SubReg);
990  copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
991  } else if (DstSize > SrcSize) {
992  // If the destination register is bigger than the source we need to do
993  // a promotion using SUBREG_TO_REG.
994  const TargetRegisterClass *PromotionRC =
995  getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
996  getSubRegForClass(SrcRC, TRI, SubReg);
997 
998  Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
999  BuildMI(*I.getParent(), I, I.getDebugLoc(),
1000  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1001  .addImm(0)
1002  .addUse(SrcReg)
1003  .addImm(SubReg);
1004  MachineOperand &RegOp = I.getOperand(1);
1005  RegOp.setReg(PromoteReg);
1006  }
1007 
1008  // If the destination is a physical register, then there's nothing to
1009  // change, so we're done.
1010  if (Register::isPhysicalRegister(DstReg))
1011  return true;
1012  }
1013 
1014  // No need to constrain SrcReg. It will get constrained when we hit another
1015  // of its use or its defs. Copies do not have constraints.
1016  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1017  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1018  << " operand\n");
1019  return false;
1020  }
1021 
1022  // If this a GPR ZEXT that we want to just reduce down into a copy.
1023  // The sizes will be mismatched with the source < 32b but that's ok.
1024  if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1025  I.setDesc(TII.get(AArch64::COPY));
1026  assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1027  return selectCopy(I, TII, MRI, TRI, RBI);
1028  }
1029 
1030  I.setDesc(TII.get(AArch64::COPY));
1031  return true;
1032 }
1033 
1034 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1035  if (!DstTy.isScalar() || !SrcTy.isScalar())
1036  return GenericOpc;
1037 
1038  const unsigned DstSize = DstTy.getSizeInBits();
1039  const unsigned SrcSize = SrcTy.getSizeInBits();
1040 
1041  switch (DstSize) {
1042  case 32:
1043  switch (SrcSize) {
1044  case 32:
1045  switch (GenericOpc) {
1046  case TargetOpcode::G_SITOFP:
1047  return AArch64::SCVTFUWSri;
1048  case TargetOpcode::G_UITOFP:
1049  return AArch64::UCVTFUWSri;
1050  case TargetOpcode::G_FPTOSI:
1051  return AArch64::FCVTZSUWSr;
1052  case TargetOpcode::G_FPTOUI:
1053  return AArch64::FCVTZUUWSr;
1054  default:
1055  return GenericOpc;
1056  }
1057  case 64:
1058  switch (GenericOpc) {
1059  case TargetOpcode::G_SITOFP:
1060  return AArch64::SCVTFUXSri;
1061  case TargetOpcode::G_UITOFP:
1062  return AArch64::UCVTFUXSri;
1063  case TargetOpcode::G_FPTOSI:
1064  return AArch64::FCVTZSUWDr;
1065  case TargetOpcode::G_FPTOUI:
1066  return AArch64::FCVTZUUWDr;
1067  default:
1068  return GenericOpc;
1069  }
1070  default:
1071  return GenericOpc;
1072  }
1073  case 64:
1074  switch (SrcSize) {
1075  case 32:
1076  switch (GenericOpc) {
1077  case TargetOpcode::G_SITOFP:
1078  return AArch64::SCVTFUWDri;
1079  case TargetOpcode::G_UITOFP:
1080  return AArch64::UCVTFUWDri;
1081  case TargetOpcode::G_FPTOSI:
1082  return AArch64::FCVTZSUXSr;
1083  case TargetOpcode::G_FPTOUI:
1084  return AArch64::FCVTZUUXSr;
1085  default:
1086  return GenericOpc;
1087  }
1088  case 64:
1089  switch (GenericOpc) {
1090  case TargetOpcode::G_SITOFP:
1091  return AArch64::SCVTFUXDri;
1092  case TargetOpcode::G_UITOFP:
1093  return AArch64::UCVTFUXDri;
1094  case TargetOpcode::G_FPTOSI:
1095  return AArch64::FCVTZSUXDr;
1096  case TargetOpcode::G_FPTOUI:
1097  return AArch64::FCVTZUUXDr;
1098  default:
1099  return GenericOpc;
1100  }
1101  default:
1102  return GenericOpc;
1103  }
1104  default:
1105  return GenericOpc;
1106  };
1107  return GenericOpc;
1108 }
1109 
1110 MachineInstr *
1111 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1113  MachineIRBuilder &MIB) const {
1114  MachineRegisterInfo &MRI = *MIB.getMRI();
1115  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1116  RBI.getRegBank(True, MRI, TRI)->getID() &&
1117  "Expected both select operands to have the same regbank?");
1118  LLT Ty = MRI.getType(True);
1119  if (Ty.isVector())
1120  return nullptr;
1121  const unsigned Size = Ty.getSizeInBits();
1122  assert((Size == 32 || Size == 64) &&
1123  "Expected 32 bit or 64 bit select only?");
1124  const bool Is32Bit = Size == 32;
1125  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1126  unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1127  auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1128  constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1129  return &*FCSel;
1130  }
1131 
1132  // By default, we'll try and emit a CSEL.
1133  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1134  bool Optimized = false;
1135  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1136  &Optimized](Register &Reg, Register &OtherReg,
1137  bool Invert) {
1138  if (Optimized)
1139  return false;
1140 
1141  // Attempt to fold:
1142  //
1143  // %sub = G_SUB 0, %x
1144  // %select = G_SELECT cc, %reg, %sub
1145  //
1146  // Into:
1147  // %select = CSNEG %reg, %x, cc
1148  Register MatchReg;
1149  if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1150  Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1151  Reg = MatchReg;
1152  if (Invert) {
1154  std::swap(Reg, OtherReg);
1155  }
1156  return true;
1157  }
1158 
1159  // Attempt to fold:
1160  //
1161  // %xor = G_XOR %x, -1
1162  // %select = G_SELECT cc, %reg, %xor
1163  //
1164  // Into:
1165  // %select = CSINV %reg, %x, cc
1166  if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1167  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1168  Reg = MatchReg;
1169  if (Invert) {
1171  std::swap(Reg, OtherReg);
1172  }
1173  return true;
1174  }
1175 
1176  // Attempt to fold:
1177  //
1178  // %add = G_ADD %x, 1
1179  // %select = G_SELECT cc, %reg, %add
1180  //
1181  // Into:
1182  // %select = CSINC %reg, %x, cc
1183  if (mi_match(Reg, MRI,
1184  m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1185  m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1186  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1187  Reg = MatchReg;
1188  if (Invert) {
1190  std::swap(Reg, OtherReg);
1191  }
1192  return true;
1193  }
1194 
1195  return false;
1196  };
1197 
1198  // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1199  // true/false values are constants.
1200  // FIXME: All of these patterns already exist in tablegen. We should be
1201  // able to import these.
1202  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1203  &Optimized]() {
1204  if (Optimized)
1205  return false;
1206  auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1207  auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1208  if (!TrueCst && !FalseCst)
1209  return false;
1210 
1211  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1212  if (TrueCst && FalseCst) {
1213  int64_t T = TrueCst->Value.getSExtValue();
1214  int64_t F = FalseCst->Value.getSExtValue();
1215 
1216  if (T == 0 && F == 1) {
1217  // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1218  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1219  True = ZReg;
1220  False = ZReg;
1221  return true;
1222  }
1223 
1224  if (T == 0 && F == -1) {
1225  // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1226  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1227  True = ZReg;
1228  False = ZReg;
1229  return true;
1230  }
1231  }
1232 
1233  if (TrueCst) {
1234  int64_t T = TrueCst->Value.getSExtValue();
1235  if (T == 1) {
1236  // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1237  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1238  True = False;
1239  False = ZReg;
1241  return true;
1242  }
1243 
1244  if (T == -1) {
1245  // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1246  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1247  True = False;
1248  False = ZReg;
1250  return true;
1251  }
1252  }
1253 
1254  if (FalseCst) {
1255  int64_t F = FalseCst->Value.getSExtValue();
1256  if (F == 1) {
1257  // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1258  Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1259  False = ZReg;
1260  return true;
1261  }
1262 
1263  if (F == -1) {
1264  // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1265  Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1266  False = ZReg;
1267  return true;
1268  }
1269  }
1270  return false;
1271  };
1272 
1273  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1274  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1275  Optimized |= TryOptSelectCst();
1276  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1278  return &*SelectInst;
1279 }
1280 
1282  switch (P) {
1283  default:
1284  llvm_unreachable("Unknown condition code!");
1285  case CmpInst::ICMP_NE:
1286  return AArch64CC::NE;
1287  case CmpInst::ICMP_EQ:
1288  return AArch64CC::EQ;
1289  case CmpInst::ICMP_SGT:
1290  return AArch64CC::GT;
1291  case CmpInst::ICMP_SGE:
1292  return AArch64CC::GE;
1293  case CmpInst::ICMP_SLT:
1294  return AArch64CC::LT;
1295  case CmpInst::ICMP_SLE:
1296  return AArch64CC::LE;
1297  case CmpInst::ICMP_UGT:
1298  return AArch64CC::HI;
1299  case CmpInst::ICMP_UGE:
1300  return AArch64CC::HS;
1301  case CmpInst::ICMP_ULT:
1302  return AArch64CC::LO;
1303  case CmpInst::ICMP_ULE:
1304  return AArch64CC::LS;
1305  }
1306 }
1307 
1308 /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1311  AArch64CC::CondCode &CondCode2) {
1312  CondCode2 = AArch64CC::AL;
1313  switch (CC) {
1314  default:
1315  llvm_unreachable("Unknown FP condition!");
1316  case CmpInst::FCMP_OEQ:
1318  break;
1319  case CmpInst::FCMP_OGT:
1321  break;
1322  case CmpInst::FCMP_OGE:
1324  break;
1325  case CmpInst::FCMP_OLT:
1327  break;
1328  case CmpInst::FCMP_OLE:
1330  break;
1331  case CmpInst::FCMP_ONE:
1333  CondCode2 = AArch64CC::GT;
1334  break;
1335  case CmpInst::FCMP_ORD:
1337  break;
1338  case CmpInst::FCMP_UNO:
1340  break;
1341  case CmpInst::FCMP_UEQ:
1343  CondCode2 = AArch64CC::VS;
1344  break;
1345  case CmpInst::FCMP_UGT:
1347  break;
1348  case CmpInst::FCMP_UGE:
1350  break;
1351  case CmpInst::FCMP_ULT:
1353  break;
1354  case CmpInst::FCMP_ULE:
1356  break;
1357  case CmpInst::FCMP_UNE:
1359  break;
1360  }
1361 }
1362 
1363 /// Convert an IR fp condition code to an AArch64 CC.
1364 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1365 /// should be AND'ed instead of OR'ed.
1368  AArch64CC::CondCode &CondCode2) {
1369  CondCode2 = AArch64CC::AL;
1370  switch (CC) {
1371  default:
1372  changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1373  assert(CondCode2 == AArch64CC::AL);
1374  break;
1375  case CmpInst::FCMP_ONE:
1376  // (a one b)
1377  // == ((a olt b) || (a ogt b))
1378  // == ((a ord b) && (a une b))
1380  CondCode2 = AArch64CC::NE;
1381  break;
1382  case CmpInst::FCMP_UEQ:
1383  // (a ueq b)
1384  // == ((a uno b) || (a oeq b))
1385  // == ((a ule b) && (a uge b))
1387  CondCode2 = AArch64CC::LE;
1388  break;
1389  }
1390 }
1391 
1392 /// Return a register which can be used as a bit to test in a TB(N)Z.
1395  assert(Reg.isValid() && "Expected valid register!");
1396  bool HasZext = false;
1397  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1398  unsigned Opc = MI->getOpcode();
1399 
1400  if (!MI->getOperand(0).isReg() ||
1401  !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1402  break;
1403 
1404  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1405  //
1406  // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1407  // on the truncated x is the same as the bit number on x.
1408  if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1409  Opc == TargetOpcode::G_TRUNC) {
1410  if (Opc == TargetOpcode::G_ZEXT)
1411  HasZext = true;
1412 
1413  Register NextReg = MI->getOperand(1).getReg();
1414  // Did we find something worth folding?
1415  if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1416  break;
1417 
1418  // NextReg is worth folding. Keep looking.
1419  Reg = NextReg;
1420  continue;
1421  }
1422 
1423  // Attempt to find a suitable operation with a constant on one side.
1424  std::optional<uint64_t> C;
1425  Register TestReg;
1426  switch (Opc) {
1427  default:
1428  break;
1429  case TargetOpcode::G_AND:
1430  case TargetOpcode::G_XOR: {
1431  TestReg = MI->getOperand(1).getReg();
1432  Register ConstantReg = MI->getOperand(2).getReg();
1433  auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1434  if (!VRegAndVal) {
1435  // AND commutes, check the other side for a constant.
1436  // FIXME: Can we canonicalize the constant so that it's always on the
1437  // same side at some point earlier?
1438  std::swap(ConstantReg, TestReg);
1439  VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1440  }
1441  if (VRegAndVal) {
1442  if (HasZext)
1443  C = VRegAndVal->Value.getZExtValue();
1444  else
1445  C = VRegAndVal->Value.getSExtValue();
1446  }
1447  break;
1448  }
1449  case TargetOpcode::G_ASHR:
1450  case TargetOpcode::G_LSHR:
1451  case TargetOpcode::G_SHL: {
1452  TestReg = MI->getOperand(1).getReg();
1453  auto VRegAndVal =
1454  getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1455  if (VRegAndVal)
1456  C = VRegAndVal->Value.getSExtValue();
1457  break;
1458  }
1459  }
1460 
1461  // Didn't find a constant or viable register. Bail out of the loop.
1462  if (!C || !TestReg.isValid())
1463  break;
1464 
1465  // We found a suitable instruction with a constant. Check to see if we can
1466  // walk through the instruction.
1467  Register NextReg;
1468  unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1469  switch (Opc) {
1470  default:
1471  break;
1472  case TargetOpcode::G_AND:
1473  // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1474  if ((*C >> Bit) & 1)
1475  NextReg = TestReg;
1476  break;
1477  case TargetOpcode::G_SHL:
1478  // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1479  // the type of the register.
1480  if (*C <= Bit && (Bit - *C) < TestRegSize) {
1481  NextReg = TestReg;
1482  Bit = Bit - *C;
1483  }
1484  break;
1485  case TargetOpcode::G_ASHR:
1486  // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1487  // in x
1488  NextReg = TestReg;
1489  Bit = Bit + *C;
1490  if (Bit >= TestRegSize)
1491  Bit = TestRegSize - 1;
1492  break;
1493  case TargetOpcode::G_LSHR:
1494  // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1495  if ((Bit + *C) < TestRegSize) {
1496  NextReg = TestReg;
1497  Bit = Bit + *C;
1498  }
1499  break;
1500  case TargetOpcode::G_XOR:
1501  // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1502  // appropriate.
1503  //
1504  // e.g. If x' = xor x, c, and the b-th bit is set in c then
1505  //
1506  // tbz x', b -> tbnz x, b
1507  //
1508  // Because x' only has the b-th bit set if x does not.
1509  if ((*C >> Bit) & 1)
1510  Invert = !Invert;
1511  NextReg = TestReg;
1512  break;
1513  }
1514 
1515  // Check if we found anything worth folding.
1516  if (!NextReg.isValid())
1517  return Reg;
1518  Reg = NextReg;
1519  }
1520 
1521  return Reg;
1522 }
1523 
1524 MachineInstr *AArch64InstructionSelector::emitTestBit(
1525  Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1526  MachineIRBuilder &MIB) const {
1527  assert(TestReg.isValid());
1528  assert(ProduceNonFlagSettingCondBr &&
1529  "Cannot emit TB(N)Z with speculation tracking!");
1530  MachineRegisterInfo &MRI = *MIB.getMRI();
1531 
1532  // Attempt to optimize the test bit by walking over instructions.
1533  TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1534  LLT Ty = MRI.getType(TestReg);
1535  unsigned Size = Ty.getSizeInBits();
1536  assert(!Ty.isVector() && "Expected a scalar!");
1537  assert(Bit < 64 && "Bit is too large!");
1538 
1539  // When the test register is a 64-bit register, we have to narrow to make
1540  // TBNZW work.
1541  bool UseWReg = Bit < 32;
1542  unsigned NecessarySize = UseWReg ? 32 : 64;
1543  if (Size != NecessarySize)
1544  TestReg = moveScalarRegClass(
1545  TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1546  MIB);
1547 
1548  static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1549  {AArch64::TBZW, AArch64::TBNZW}};
1550  unsigned Opc = OpcTable[UseWReg][IsNegative];
1551  auto TestBitMI =
1552  MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1553  constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1554  return &*TestBitMI;
1555 }
1556 
1557 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1558  MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1559  MachineIRBuilder &MIB) const {
1560  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1561  // Given something like this:
1562  //
1563  // %x = ...Something...
1564  // %one = G_CONSTANT i64 1
1565  // %zero = G_CONSTANT i64 0
1566  // %and = G_AND %x, %one
1567  // %cmp = G_ICMP intpred(ne), %and, %zero
1568  // %cmp_trunc = G_TRUNC %cmp
1569  // G_BRCOND %cmp_trunc, %bb.3
1570  //
1571  // We want to try and fold the AND into the G_BRCOND and produce either a
1572  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1573  //
1574  // In this case, we'd get
1575  //
1576  // TBNZ %x %bb.3
1577  //
1578 
1579  // Check if the AND has a constant on its RHS which we can use as a mask.
1580  // If it's a power of 2, then it's the same as checking a specific bit.
1581  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1582  auto MaybeBit = getIConstantVRegValWithLookThrough(
1583  AndInst.getOperand(2).getReg(), *MIB.getMRI());
1584  if (!MaybeBit)
1585  return false;
1586 
1587  int32_t Bit = MaybeBit->Value.exactLogBase2();
1588  if (Bit < 0)
1589  return false;
1590 
1591  Register TestReg = AndInst.getOperand(1).getReg();
1592 
1593  // Emit a TB(N)Z.
1594  emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1595  return true;
1596 }
1597 
1598 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1599  bool IsNegative,
1600  MachineBasicBlock *DestMBB,
1601  MachineIRBuilder &MIB) const {
1602  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1603  MachineRegisterInfo &MRI = *MIB.getMRI();
1604  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1605  AArch64::GPRRegBankID &&
1606  "Expected GPRs only?");
1607  auto Ty = MRI.getType(CompareReg);
1608  unsigned Width = Ty.getSizeInBits();
1609  assert(!Ty.isVector() && "Expected scalar only?");
1610  assert(Width <= 64 && "Expected width to be at most 64?");
1611  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1612  {AArch64::CBNZW, AArch64::CBNZX}};
1613  unsigned Opc = OpcTable[IsNegative][Width == 64];
1614  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1615  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1616  return &*BranchMI;
1617 }
1618 
1619 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1620  MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1621  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1622  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1623  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1624  // totally clean. Some of them require two branches to implement.
1625  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1626  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1627  Pred);
1628  AArch64CC::CondCode CC1, CC2;
1629  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1630  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1631  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1632  if (CC2 != AArch64CC::AL)
1633  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1634  I.eraseFromParent();
1635  return true;
1636 }
1637 
1638 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1639  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1640  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1641  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1642  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1643  //
1644  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1645  // instructions will not be produced, as they are conditional branch
1646  // instructions that do not set flags.
1647  if (!ProduceNonFlagSettingCondBr)
1648  return false;
1649 
1650  MachineRegisterInfo &MRI = *MIB.getMRI();
1651  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1652  auto Pred =
1653  static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1654  Register LHS = ICmp.getOperand(2).getReg();
1655  Register RHS = ICmp.getOperand(3).getReg();
1656 
1657  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1658  auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1659  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1660 
1661  // When we can emit a TB(N)Z, prefer that.
1662  //
1663  // Handle non-commutative condition codes first.
1664  // Note that we don't want to do this when we have a G_AND because it can
1665  // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1666  if (VRegAndVal && !AndInst) {
1667  int64_t C = VRegAndVal->Value.getSExtValue();
1668 
1669  // When we have a greater-than comparison, we can just test if the msb is
1670  // zero.
1671  if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1673  emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1674  I.eraseFromParent();
1675  return true;
1676  }
1677 
1678  // When we have a less than comparison, we can just test if the msb is not
1679  // zero.
1680  if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1682  emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1683  I.eraseFromParent();
1684  return true;
1685  }
1686 
1687  // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1688  // we can test if the msb is zero.
1689  if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1691  emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1692  I.eraseFromParent();
1693  return true;
1694  }
1695  }
1696 
1697  // Attempt to handle commutative condition codes. Right now, that's only
1698  // eq/ne.
1699  if (ICmpInst::isEquality(Pred)) {
1700  if (!VRegAndVal) {
1701  std::swap(RHS, LHS);
1703  AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1704  }
1705 
1706  if (VRegAndVal && VRegAndVal->Value == 0) {
1707  // If there's a G_AND feeding into this branch, try to fold it away by
1708  // emitting a TB(N)Z instead.
1709  //
1710  // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1711  // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1712  // would be redundant.
1713  if (AndInst &&
1714  tryOptAndIntoCompareBranch(
1715  *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1716  I.eraseFromParent();
1717  return true;
1718  }
1719 
1720  // Otherwise, try to emit a CB(N)Z instead.
1721  auto LHSTy = MRI.getType(LHS);
1722  if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1723  emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1724  I.eraseFromParent();
1725  return true;
1726  }
1727  }
1728  }
1729 
1730  return false;
1731 }
1732 
1733 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1734  MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1735  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1736  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1737  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1738  return true;
1739 
1740  // Couldn't optimize. Emit a compare + a Bcc.
1741  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1742  auto PredOp = ICmp.getOperand(1);
1743  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1745  static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1746  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1747  I.eraseFromParent();
1748  return true;
1749 }
1750 
1751 bool AArch64InstructionSelector::selectCompareBranch(
1753  Register CondReg = I.getOperand(0).getReg();
1754  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1755  // Try to select the G_BRCOND using whatever is feeding the condition if
1756  // possible.
1757  unsigned CCMIOpc = CCMI->getOpcode();
1758  if (CCMIOpc == TargetOpcode::G_FCMP)
1759  return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1760  if (CCMIOpc == TargetOpcode::G_ICMP)
1761  return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1762 
1763  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1764  // instructions will not be produced, as they are conditional branch
1765  // instructions that do not set flags.
1766  if (ProduceNonFlagSettingCondBr) {
1767  emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1768  I.getOperand(1).getMBB(), MIB);
1769  I.eraseFromParent();
1770  return true;
1771  }
1772 
1773  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1774  auto TstMI =
1775  MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1776  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1777  auto Bcc = MIB.buildInstr(AArch64::Bcc)
1779  .addMBB(I.getOperand(1).getMBB());
1780  I.eraseFromParent();
1781  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1782 }
1783 
1784 /// Returns the element immediate value of a vector shift operand if found.
1785 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1788  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1789  MachineInstr *OpMI = MRI.getVRegDef(Reg);
1790  return getAArch64VectorSplatScalar(*OpMI, MRI);
1791 }
1792 
1793 /// Matches and returns the shift immediate value for a SHL instruction given
1794 /// a shift operand.
1797  if (!ShiftImm)
1798  return None;
1799  // Check the immediate is in range for a SHL.
1800  int64_t Imm = *ShiftImm;
1801  if (Imm < 0)
1802  return None;
1803  switch (SrcTy.getElementType().getSizeInBits()) {
1804  default:
1805  LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1806  return None;
1807  case 8:
1808  if (Imm > 7)
1809  return None;
1810  break;
1811  case 16:
1812  if (Imm > 15)
1813  return None;
1814  break;
1815  case 32:
1816  if (Imm > 31)
1817  return None;
1818  break;
1819  case 64:
1820  if (Imm > 63)
1821  return None;
1822  break;
1823  }
1824  return Imm;
1825 }
1826 
1827 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1829  assert(I.getOpcode() == TargetOpcode::G_SHL);
1830  Register DstReg = I.getOperand(0).getReg();
1831  const LLT Ty = MRI.getType(DstReg);
1832  Register Src1Reg = I.getOperand(1).getReg();
1833  Register Src2Reg = I.getOperand(2).getReg();
1834 
1835  if (!Ty.isVector())
1836  return false;
1837 
1838  // Check if we have a vector of constants on RHS that we can select as the
1839  // immediate form.
1840  Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1841 
1842  unsigned Opc = 0;
1843  if (Ty == LLT::fixed_vector(2, 64)) {
1844  Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1845  } else if (Ty == LLT::fixed_vector(4, 32)) {
1846  Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1847  } else if (Ty == LLT::fixed_vector(2, 32)) {
1848  Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1849  } else if (Ty == LLT::fixed_vector(4, 16)) {
1850  Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1851  } else if (Ty == LLT::fixed_vector(8, 16)) {
1852  Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1853  } else if (Ty == LLT::fixed_vector(16, 8)) {
1854  Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1855  } else if (Ty == LLT::fixed_vector(8, 8)) {
1856  Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1857  } else {
1858  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1859  return false;
1860  }
1861 
1862  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1863  if (ImmVal)
1864  Shl.addImm(*ImmVal);
1865  else
1866  Shl.addUse(Src2Reg);
1868  I.eraseFromParent();
1869  return true;
1870 }
1871 
1872 bool AArch64InstructionSelector::selectVectorAshrLshr(
1874  assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1875  I.getOpcode() == TargetOpcode::G_LSHR);
1876  Register DstReg = I.getOperand(0).getReg();
1877  const LLT Ty = MRI.getType(DstReg);
1878  Register Src1Reg = I.getOperand(1).getReg();
1879  Register Src2Reg = I.getOperand(2).getReg();
1880 
1881  if (!Ty.isVector())
1882  return false;
1883 
1884  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1885 
1886  // We expect the immediate case to be lowered in the PostLegalCombiner to
1887  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1888 
1889  // There is not a shift right register instruction, but the shift left
1890  // register instruction takes a signed value, where negative numbers specify a
1891  // right shift.
1892 
1893  unsigned Opc = 0;
1894  unsigned NegOpc = 0;
1895  const TargetRegisterClass *RC =
1896  getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1897  if (Ty == LLT::fixed_vector(2, 64)) {
1898  Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1899  NegOpc = AArch64::NEGv2i64;
1900  } else if (Ty == LLT::fixed_vector(4, 32)) {
1901  Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1902  NegOpc = AArch64::NEGv4i32;
1903  } else if (Ty == LLT::fixed_vector(2, 32)) {
1904  Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1905  NegOpc = AArch64::NEGv2i32;
1906  } else if (Ty == LLT::fixed_vector(4, 16)) {
1907  Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1908  NegOpc = AArch64::NEGv4i16;
1909  } else if (Ty == LLT::fixed_vector(8, 16)) {
1910  Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1911  NegOpc = AArch64::NEGv8i16;
1912  } else if (Ty == LLT::fixed_vector(16, 8)) {
1913  Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1914  NegOpc = AArch64::NEGv16i8;
1915  } else if (Ty == LLT::fixed_vector(8, 8)) {
1916  Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1917  NegOpc = AArch64::NEGv8i8;
1918  } else {
1919  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1920  return false;
1921  }
1922 
1923  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1925  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1927  I.eraseFromParent();
1928  return true;
1929 }
1930 
1931 bool AArch64InstructionSelector::selectVaStartAAPCS(
1933  return false;
1934 }
1935 
1936 bool AArch64InstructionSelector::selectVaStartDarwin(
1939  Register ListReg = I.getOperand(0).getReg();
1940 
1941  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1942 
1943  auto MIB =
1944  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1945  .addDef(ArgsAddrReg)
1946  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1947  .addImm(0)
1948  .addImm(0);
1949 
1951 
1952  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1953  .addUse(ArgsAddrReg)
1954  .addUse(ListReg)
1955  .addImm(0)
1956  .addMemOperand(*I.memoperands_begin());
1957 
1959  I.eraseFromParent();
1960  return true;
1961 }
1962 
1963 void AArch64InstructionSelector::materializeLargeCMVal(
1964  MachineInstr &I, const Value *V, unsigned OpFlags) {
1965  MachineBasicBlock &MBB = *I.getParent();
1966  MachineFunction &MF = *MBB.getParent();
1968 
1969  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1970  MovZ->addOperand(MF, I.getOperand(1));
1971  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1973  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1975 
1976  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1977  Register ForceDstReg) {
1978  Register DstReg = ForceDstReg
1979  ? ForceDstReg
1980  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1981  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1982  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1983  MovI->addOperand(MF, MachineOperand::CreateGA(
1984  GV, MovZ->getOperand(1).getOffset(), Flags));
1985  } else {
1986  MovI->addOperand(
1987  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1988  MovZ->getOperand(1).getOffset(), Flags));
1989  }
1990  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1992  return DstReg;
1993  };
1994  Register DstReg = BuildMovK(MovZ.getReg(0),
1996  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1997  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1998 }
1999 
2000 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2001  MachineBasicBlock &MBB = *I.getParent();
2002  MachineFunction &MF = *MBB.getParent();
2004 
2005  switch (I.getOpcode()) {
2006  case TargetOpcode::G_STORE: {
2007  bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2008  MachineOperand &SrcOp = I.getOperand(0);
2009  if (MRI.getType(SrcOp.getReg()).isPointer()) {
2010  // Allow matching with imported patterns for stores of pointers. Unlike
2011  // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2012  // and constrain.
2013  auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2014  Register NewSrc = Copy.getReg(0);
2015  SrcOp.setReg(NewSrc);
2016  RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2017  Changed = true;
2018  }
2019  return Changed;
2020  }
2021  case TargetOpcode::G_PTR_ADD:
2022  return convertPtrAddToAdd(I, MRI);
2023  case TargetOpcode::G_LOAD: {
2024  // For scalar loads of pointers, we try to convert the dest type from p0
2025  // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2026  // conversion, this should be ok because all users should have been
2027  // selected already, so the type doesn't matter for them.
2028  Register DstReg = I.getOperand(0).getReg();
2029  const LLT DstTy = MRI.getType(DstReg);
2030  if (!DstTy.isPointer())
2031  return false;
2032  MRI.setType(DstReg, LLT::scalar(64));
2033  return true;
2034  }
2035  case AArch64::G_DUP: {
2036  // Convert the type from p0 to s64 to help selection.
2037  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2038  if (!DstTy.getElementType().isPointer())
2039  return false;
2040  auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2041  MRI.setType(I.getOperand(0).getReg(),
2042  DstTy.changeElementType(LLT::scalar(64)));
2043  MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2044  I.getOperand(1).setReg(NewSrc.getReg(0));
2045  return true;
2046  }
2047  case TargetOpcode::G_UITOFP:
2048  case TargetOpcode::G_SITOFP: {
2049  // If both source and destination regbanks are FPR, then convert the opcode
2050  // to G_SITOF so that the importer can select it to an fpr variant.
2051  // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2052  // copy.
2053  Register SrcReg = I.getOperand(1).getReg();
2054  LLT SrcTy = MRI.getType(SrcReg);
2055  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2056  if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2057  return false;
2058 
2059  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2060  if (I.getOpcode() == TargetOpcode::G_SITOFP)
2061  I.setDesc(TII.get(AArch64::G_SITOF));
2062  else
2063  I.setDesc(TII.get(AArch64::G_UITOF));
2064  return true;
2065  }
2066  return false;
2067  }
2068  default:
2069  return false;
2070  }
2071 }
2072 
2073 /// This lowering tries to look for G_PTR_ADD instructions and then converts
2074 /// them to a standard G_ADD with a COPY on the source.
2075 ///
2076 /// The motivation behind this is to expose the add semantics to the imported
2077 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2078 /// because the selector works bottom up, uses before defs. By the time we
2079 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2080 /// fold this into addressing modes and were therefore unsuccessful.
2081 bool AArch64InstructionSelector::convertPtrAddToAdd(
2083  assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2084  Register DstReg = I.getOperand(0).getReg();
2085  Register AddOp1Reg = I.getOperand(1).getReg();
2086  const LLT PtrTy = MRI.getType(DstReg);
2087  if (PtrTy.getAddressSpace() != 0)
2088  return false;
2089 
2090  const LLT CastPtrTy =
2091  PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2092  auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2093  // Set regbanks on the registers.
2094  if (PtrTy.isVector())
2095  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2096  else
2097  MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2098 
2099  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2100  // %dst(intty) = G_ADD %intbase, off
2101  I.setDesc(TII.get(TargetOpcode::G_ADD));
2102  MRI.setType(DstReg, CastPtrTy);
2103  I.getOperand(1).setReg(PtrToInt.getReg(0));
2104  if (!select(*PtrToInt)) {
2105  LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2106  return false;
2107  }
2108 
2109  // Also take the opportunity here to try to do some optimization.
2110  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2111  Register NegatedReg;
2112  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2113  return true;
2114  I.getOperand(2).setReg(NegatedReg);
2115  I.setDesc(TII.get(TargetOpcode::G_SUB));
2116  return true;
2117 }
2118 
2119 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2121  // We try to match the immediate variant of LSL, which is actually an alias
2122  // for a special case of UBFM. Otherwise, we fall back to the imported
2123  // selector which will match the register variant.
2124  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2125  const auto &MO = I.getOperand(2);
2126  auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2127  if (!VRegAndVal)
2128  return false;
2129 
2130  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2131  if (DstTy.isVector())
2132  return false;
2133  bool Is64Bit = DstTy.getSizeInBits() == 64;
2134  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2135  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2136 
2137  if (!Imm1Fn || !Imm2Fn)
2138  return false;
2139 
2140  auto NewI =
2141  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2142  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2143 
2144  for (auto &RenderFn : *Imm1Fn)
2145  RenderFn(NewI);
2146  for (auto &RenderFn : *Imm2Fn)
2147  RenderFn(NewI);
2148 
2149  I.eraseFromParent();
2150  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2151 }
2152 
2153 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2155  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2156  // If we're storing a scalar, it doesn't matter what register bank that
2157  // scalar is on. All that matters is the size.
2158  //
2159  // So, if we see something like this (with a 32-bit scalar as an example):
2160  //
2161  // %x:gpr(s32) = ... something ...
2162  // %y:fpr(s32) = COPY %x:gpr(s32)
2163  // G_STORE %y:fpr(s32)
2164  //
2165  // We can fix this up into something like this:
2166  //
2167  // G_STORE %x:gpr(s32)
2168  //
2169  // And then continue the selection process normally.
2170  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2171  if (!DefDstReg.isValid())
2172  return false;
2173  LLT DefDstTy = MRI.getType(DefDstReg);
2174  Register StoreSrcReg = I.getOperand(0).getReg();
2175  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2176 
2177  // If we get something strange like a physical register, then we shouldn't
2178  // go any further.
2179  if (!DefDstTy.isValid())
2180  return false;
2181 
2182  // Are the source and dst types the same size?
2183  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2184  return false;
2185 
2186  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2187  RBI.getRegBank(DefDstReg, MRI, TRI))
2188  return false;
2189 
2190  // We have a cross-bank copy, which is entering a store. Let's fold it.
2191  I.getOperand(0).setReg(DefDstReg);
2192  return true;
2193 }
2194 
2195 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2196  assert(I.getParent() && "Instruction should be in a basic block!");
2197  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2198 
2199  MachineBasicBlock &MBB = *I.getParent();
2200  MachineFunction &MF = *MBB.getParent();
2202 
2203  switch (I.getOpcode()) {
2204  case AArch64::G_DUP: {
2205  // Before selecting a DUP instruction, check if it is better selected as a
2206  // MOV or load from a constant pool.
2207  Register Src = I.getOperand(1).getReg();
2208  auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2209  if (!ValAndVReg)
2210  return false;
2211  LLVMContext &Ctx = MF.getFunction().getContext();
2212  Register Dst = I.getOperand(0).getReg();
2213  auto *CV = ConstantDataVector::getSplat(
2214  MRI.getType(Dst).getNumElements(),
2215  ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2216  ValAndVReg->Value));
2217  if (!emitConstantVector(Dst, CV, MIB, MRI))
2218  return false;
2219  I.eraseFromParent();
2220  return true;
2221  }
2222  case TargetOpcode::G_SEXT:
2223  // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2224  // over a normal extend.
2225  if (selectUSMovFromExtend(I, MRI))
2226  return true;
2227  return false;
2228  case TargetOpcode::G_BR:
2229  return false;
2230  case TargetOpcode::G_SHL:
2231  return earlySelectSHL(I, MRI);
2232  case TargetOpcode::G_CONSTANT: {
2233  bool IsZero = false;
2234  if (I.getOperand(1).isCImm())
2235  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2236  else if (I.getOperand(1).isImm())
2237  IsZero = I.getOperand(1).getImm() == 0;
2238 
2239  if (!IsZero)
2240  return false;
2241 
2242  Register DefReg = I.getOperand(0).getReg();
2243  LLT Ty = MRI.getType(DefReg);
2244  if (Ty.getSizeInBits() == 64) {
2245  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2246  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2247  } else if (Ty.getSizeInBits() == 32) {
2248  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2249  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2250  } else
2251  return false;
2252 
2253  I.setDesc(TII.get(TargetOpcode::COPY));
2254  return true;
2255  }
2256 
2257  case TargetOpcode::G_ADD: {
2258  // Check if this is being fed by a G_ICMP on either side.
2259  //
2260  // (cmp pred, x, y) + z
2261  //
2262  // In the above case, when the cmp is true, we increment z by 1. So, we can
2263  // fold the add into the cset for the cmp by using cinc.
2264  //
2265  // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2266  Register AddDst = I.getOperand(0).getReg();
2267  Register AddLHS = I.getOperand(1).getReg();
2268  Register AddRHS = I.getOperand(2).getReg();
2269  // Only handle scalars.
2270  LLT Ty = MRI.getType(AddLHS);
2271  if (Ty.isVector())
2272  return false;
2273  // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2274  // bits.
2275  unsigned Size = Ty.getSizeInBits();
2276  if (Size != 32 && Size != 64)
2277  return false;
2278  auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2279  if (!MRI.hasOneNonDBGUse(Reg))
2280  return nullptr;
2281  // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2282  // compare.
2283  if (Size == 32)
2284  return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2285  // We model scalar compares using 32-bit destinations right now.
2286  // If it's a 64-bit compare, it'll have 64-bit sources.
2287  Register ZExt;
2288  if (!mi_match(Reg, MRI,
2290  return nullptr;
2291  auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2292  if (!Cmp ||
2293  MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2294  return nullptr;
2295  return Cmp;
2296  };
2297  // Try to match
2298  // z + (cmp pred, x, y)
2299  MachineInstr *Cmp = MatchCmp(AddRHS);
2300  if (!Cmp) {
2301  // (cmp pred, x, y) + z
2302  std::swap(AddLHS, AddRHS);
2303  Cmp = MatchCmp(AddRHS);
2304  if (!Cmp)
2305  return false;
2306  }
2307  auto &PredOp = Cmp->getOperand(1);
2308  auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2309  const AArch64CC::CondCode InvCC =
2310  changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2311  MIB.setInstrAndDebugLoc(I);
2312  emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2313  /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2314  emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2315  I.eraseFromParent();
2316  return true;
2317  }
2318  case TargetOpcode::G_OR: {
2319  // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2320  // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2321  // shifting and masking that we can replace with a BFI (encoded as a BFM).
2322  Register Dst = I.getOperand(0).getReg();
2323  LLT Ty = MRI.getType(Dst);
2324 
2325  if (!Ty.isScalar())
2326  return false;
2327 
2328  unsigned Size = Ty.getSizeInBits();
2329  if (Size != 32 && Size != 64)
2330  return false;
2331 
2332  Register ShiftSrc;
2333  int64_t ShiftImm;
2334  Register MaskSrc;
2335  int64_t MaskImm;
2336  if (!mi_match(
2337  Dst, MRI,
2338  m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2339  m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2340  return false;
2341 
2342  if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2343  return false;
2344 
2345  int64_t Immr = Size - ShiftImm;
2346  int64_t Imms = Size - ShiftImm - 1;
2347  unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2348  emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2349  I.eraseFromParent();
2350  return true;
2351  }
2352  case TargetOpcode::G_FENCE: {
2353  if (I.getOperand(1).getImm() == 0)
2354  BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::CompilerBarrier))
2355  .addImm(I.getOperand(0).getImm());
2356  else
2357  BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2358  .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2359  I.eraseFromParent();
2360  return true;
2361  }
2362  default:
2363  return false;
2364  }
2365 }
2366 
2368  assert(I.getParent() && "Instruction should be in a basic block!");
2369  assert(I.getParent()->getParent() && "Instruction should be in a function!");
2370 
2371  MachineBasicBlock &MBB = *I.getParent();
2372  MachineFunction &MF = *MBB.getParent();
2374 
2375  const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2376  if (Subtarget->requiresStrictAlign()) {
2377  // We don't support this feature yet.
2378  LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2379  return false;
2380  }
2381 
2382  MIB.setInstrAndDebugLoc(I);
2383 
2384  unsigned Opcode = I.getOpcode();
2385  // G_PHI requires same handling as PHI
2386  if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2387  // Certain non-generic instructions also need some special handling.
2388 
2389  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2390  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2391 
2392  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2393  const Register DefReg = I.getOperand(0).getReg();
2394  const LLT DefTy = MRI.getType(DefReg);
2395 
2396  const RegClassOrRegBank &RegClassOrBank =
2397  MRI.getRegClassOrRegBank(DefReg);
2398 
2399  const TargetRegisterClass *DefRC
2400  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2401  if (!DefRC) {
2402  if (!DefTy.isValid()) {
2403  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2404  return false;
2405  }
2406  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2407  DefRC = getRegClassForTypeOnBank(DefTy, RB);
2408  if (!DefRC) {
2409  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2410  return false;
2411  }
2412  }
2413 
2414  I.setDesc(TII.get(TargetOpcode::PHI));
2415 
2416  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2417  }
2418 
2419  if (I.isCopy())
2420  return selectCopy(I, TII, MRI, TRI, RBI);
2421 
2422  if (I.isDebugInstr())
2423  return selectDebugInstr(I, MRI, RBI);
2424 
2425  return true;
2426  }
2427 
2428 
2429  if (I.getNumOperands() != I.getNumExplicitOperands()) {
2430  LLVM_DEBUG(
2431  dbgs() << "Generic instruction has unexpected implicit operands\n");
2432  return false;
2433  }
2434 
2435  // Try to do some lowering before we start instruction selecting. These
2436  // lowerings are purely transformations on the input G_MIR and so selection
2437  // must continue after any modification of the instruction.
2438  if (preISelLower(I)) {
2439  Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2440  }
2441 
2442  // There may be patterns where the importer can't deal with them optimally,
2443  // but does select it to a suboptimal sequence so our custom C++ selection
2444  // code later never has a chance to work on it. Therefore, we have an early
2445  // selection attempt here to give priority to certain selection routines
2446  // over the imported ones.
2447  if (earlySelect(I))
2448  return true;
2449 
2450  if (selectImpl(I, *CoverageInfo))
2451  return true;
2452 
2453  LLT Ty =
2454  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2455 
2456  switch (Opcode) {
2457  case TargetOpcode::G_SBFX:
2458  case TargetOpcode::G_UBFX: {
2459  static const unsigned OpcTable[2][2] = {
2460  {AArch64::UBFMWri, AArch64::UBFMXri},
2461  {AArch64::SBFMWri, AArch64::SBFMXri}};
2462  bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2463  unsigned Size = Ty.getSizeInBits();
2464  unsigned Opc = OpcTable[IsSigned][Size == 64];
2465  auto Cst1 =
2466  getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2467  assert(Cst1 && "Should have gotten a constant for src 1?");
2468  auto Cst2 =
2469  getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2470  assert(Cst2 && "Should have gotten a constant for src 2?");
2471  auto LSB = Cst1->Value.getZExtValue();
2472  auto Width = Cst2->Value.getZExtValue();
2473  auto BitfieldInst =
2474  MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2475  .addImm(LSB)
2476  .addImm(LSB + Width - 1);
2477  I.eraseFromParent();
2478  return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2479  }
2480  case TargetOpcode::G_BRCOND:
2481  return selectCompareBranch(I, MF, MRI);
2482 
2483  case TargetOpcode::G_BRINDIRECT: {
2484  I.setDesc(TII.get(AArch64::BR));
2485  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2486  }
2487 
2488  case TargetOpcode::G_BRJT:
2489  return selectBrJT(I, MRI);
2490 
2491  case AArch64::G_ADD_LOW: {
2492  // This op may have been separated from it's ADRP companion by the localizer
2493  // or some other code motion pass. Given that many CPUs will try to
2494  // macro fuse these operations anyway, select this into a MOVaddr pseudo
2495  // which will later be expanded into an ADRP+ADD pair after scheduling.
2496  MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2497  if (BaseMI->getOpcode() != AArch64::ADRP) {
2498  I.setDesc(TII.get(AArch64::ADDXri));
2499  I.addOperand(MachineOperand::CreateImm(0));
2500  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2501  }
2502  assert(TM.getCodeModel() == CodeModel::Small &&
2503  "Expected small code model");
2504  auto Op1 = BaseMI->getOperand(1);
2505  auto Op2 = I.getOperand(2);
2506  auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2507  .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2508  Op1.getTargetFlags())
2509  .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2510  Op2.getTargetFlags());
2511  I.eraseFromParent();
2512  return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2513  }
2514 
2515  case TargetOpcode::G_BSWAP: {
2516  // Handle vector types for G_BSWAP directly.
2517  Register DstReg = I.getOperand(0).getReg();
2518  LLT DstTy = MRI.getType(DstReg);
2519 
2520  // We should only get vector types here; everything else is handled by the
2521  // importer right now.
2522  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2523  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2524  return false;
2525  }
2526 
2527  // Only handle 4 and 2 element vectors for now.
2528  // TODO: 16-bit elements.
2529  unsigned NumElts = DstTy.getNumElements();
2530  if (NumElts != 4 && NumElts != 2) {
2531  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2532  return false;
2533  }
2534 
2535  // Choose the correct opcode for the supported types. Right now, that's
2536  // v2s32, v4s32, and v2s64.
2537  unsigned Opc = 0;
2538  unsigned EltSize = DstTy.getElementType().getSizeInBits();
2539  if (EltSize == 32)
2540  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2541  : AArch64::REV32v16i8;
2542  else if (EltSize == 64)
2543  Opc = AArch64::REV64v16i8;
2544 
2545  // We should always get something by the time we get here...
2546  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2547 
2548  I.setDesc(TII.get(Opc));
2549  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2550  }
2551 
2552  case TargetOpcode::G_FCONSTANT:
2553  case TargetOpcode::G_CONSTANT: {
2554  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2555 
2556  const LLT s8 = LLT::scalar(8);
2557  const LLT s16 = LLT::scalar(16);
2558  const LLT s32 = LLT::scalar(32);
2559  const LLT s64 = LLT::scalar(64);
2560  const LLT s128 = LLT::scalar(128);
2561  const LLT p0 = LLT::pointer(0, 64);
2562 
2563  const Register DefReg = I.getOperand(0).getReg();
2564  const LLT DefTy = MRI.getType(DefReg);
2565  const unsigned DefSize = DefTy.getSizeInBits();
2566  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2567 
2568  // FIXME: Redundant check, but even less readable when factored out.
2569  if (isFP) {
2570  if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2571  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2572  << " constant, expected: " << s16 << " or " << s32
2573  << " or " << s64 << " or " << s128 << '\n');
2574  return false;
2575  }
2576 
2577  if (RB.getID() != AArch64::FPRRegBankID) {
2578  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2579  << " constant on bank: " << RB
2580  << ", expected: FPR\n");
2581  return false;
2582  }
2583 
2584  // The case when we have 0.0 is covered by tablegen. Reject it here so we
2585  // can be sure tablegen works correctly and isn't rescued by this code.
2586  // 0.0 is not covered by tablegen for FP128. So we will handle this
2587  // scenario in the code here.
2588  if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2589  return false;
2590  } else {
2591  // s32 and s64 are covered by tablegen.
2592  if (Ty != p0 && Ty != s8 && Ty != s16) {
2593  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2594  << " constant, expected: " << s32 << ", " << s64
2595  << ", or " << p0 << '\n');
2596  return false;
2597  }
2598 
2599  if (RB.getID() != AArch64::GPRRegBankID) {
2600  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2601  << " constant on bank: " << RB
2602  << ", expected: GPR\n");
2603  return false;
2604  }
2605  }
2606 
2607  if (isFP) {
2608  const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2609  // For 16, 64, and 128b values, emit a constant pool load.
2610  switch (DefSize) {
2611  default:
2612  llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2613  case 32:
2614  // For s32, use a cp load if we have optsize/minsize.
2615  if (!shouldOptForSize(&MF))
2616  break;
2617  [[fallthrough]];
2618  case 16:
2619  case 64:
2620  case 128: {
2621  auto *FPImm = I.getOperand(1).getFPImm();
2622  auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2623  if (!LoadMI) {
2624  LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2625  return false;
2626  }
2627  MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2628  I.eraseFromParent();
2629  return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2630  }
2631  }
2632 
2633  // Either emit a FMOV, or emit a copy to emit a normal mov.
2634  assert(DefSize == 32 &&
2635  "Expected constant pool loads for all sizes other than 32!");
2636  const Register DefGPRReg =
2637  MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2638  MachineOperand &RegOp = I.getOperand(0);
2639  RegOp.setReg(DefGPRReg);
2640  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2641  MIB.buildCopy({DefReg}, {DefGPRReg});
2642 
2643  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2644  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2645  return false;
2646  }
2647 
2648  MachineOperand &ImmOp = I.getOperand(1);
2649  // FIXME: Is going through int64_t always correct?
2650  ImmOp.ChangeToImmediate(
2652  } else if (I.getOperand(1).isCImm()) {
2653  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2654  I.getOperand(1).ChangeToImmediate(Val);
2655  } else if (I.getOperand(1).isImm()) {
2656  uint64_t Val = I.getOperand(1).getImm();
2657  I.getOperand(1).ChangeToImmediate(Val);
2658  }
2659 
2660  const unsigned MovOpc =
2661  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2662  I.setDesc(TII.get(MovOpc));
2664  return true;
2665  }
2666  case TargetOpcode::G_EXTRACT: {
2667  Register DstReg = I.getOperand(0).getReg();
2668  Register SrcReg = I.getOperand(1).getReg();
2669  LLT SrcTy = MRI.getType(SrcReg);
2670  LLT DstTy = MRI.getType(DstReg);
2671  (void)DstTy;
2672  unsigned SrcSize = SrcTy.getSizeInBits();
2673 
2674  if (SrcTy.getSizeInBits() > 64) {
2675  // This should be an extract of an s128, which is like a vector extract.
2676  if (SrcTy.getSizeInBits() != 128)
2677  return false;
2678  // Only support extracting 64 bits from an s128 at the moment.
2679  if (DstTy.getSizeInBits() != 64)
2680  return false;
2681 
2682  unsigned Offset = I.getOperand(2).getImm();
2683  if (Offset % 64 != 0)
2684  return false;
2685 
2686  // Check we have the right regbank always.
2687  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2688  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2689  assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2690 
2691  if (SrcRB.getID() == AArch64::GPRRegBankID) {
2692  auto NewI =
2693  MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2694  .addUse(SrcReg, 0,
2695  Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2696  constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2697  AArch64::GPR64RegClass, NewI->getOperand(0));
2698  I.eraseFromParent();
2699  return true;
2700  }
2701 
2702  // Emit the same code as a vector extract.
2703  // Offset must be a multiple of 64.
2704  unsigned LaneIdx = Offset / 64;
2705  MachineInstr *Extract = emitExtractVectorElt(
2706  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2707  if (!Extract)
2708  return false;
2709  I.eraseFromParent();
2710  return true;
2711  }
2712 
2713  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2714  MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2715  Ty.getSizeInBits() - 1);
2716 
2717  if (SrcSize < 64) {
2718  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2719  "unexpected G_EXTRACT types");
2720  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2721  }
2722 
2723  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2724  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2725  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2726  .addReg(DstReg, 0, AArch64::sub_32);
2727  RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2728  AArch64::GPR32RegClass, MRI);
2729  I.getOperand(0).setReg(DstReg);
2730 
2731  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2732  }
2733 
2734  case TargetOpcode::G_INSERT: {
2735  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2736  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2737  unsigned DstSize = DstTy.getSizeInBits();
2738  // Larger inserts are vectors, same-size ones should be something else by
2739  // now (split up or turned into COPYs).
2740  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2741  return false;
2742 
2743  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2744  unsigned LSB = I.getOperand(3).getImm();
2745  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2746  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2747  MachineInstrBuilder(MF, I).addImm(Width - 1);
2748 
2749  if (DstSize < 64) {
2750  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2751  "unexpected G_INSERT types");
2752  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2753  }
2754 
2755  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2756  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2757  TII.get(AArch64::SUBREG_TO_REG))
2758  .addDef(SrcReg)
2759  .addImm(0)
2760  .addUse(I.getOperand(2).getReg())
2761  .addImm(AArch64::sub_32);
2762  RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2763  AArch64::GPR32RegClass, MRI);
2764  I.getOperand(2).setReg(SrcReg);
2765 
2766  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2767  }
2768  case TargetOpcode::G_FRAME_INDEX: {
2769  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2770  if (Ty != LLT::pointer(0, 64)) {
2771  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2772  << ", expected: " << LLT::pointer(0, 64) << '\n');
2773  return false;
2774  }
2775  I.setDesc(TII.get(AArch64::ADDXri));
2776 
2777  // MOs for a #0 shifted immediate.
2778  I.addOperand(MachineOperand::CreateImm(0));
2779  I.addOperand(MachineOperand::CreateImm(0));
2780 
2781  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2782  }
2783 
2784  case TargetOpcode::G_GLOBAL_VALUE: {
2785  auto GV = I.getOperand(1).getGlobal();
2786  if (GV->isThreadLocal())
2787  return selectTLSGlobalValue(I, MRI);
2788 
2789  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2790  if (OpFlags & AArch64II::MO_GOT) {
2791  I.setDesc(TII.get(AArch64::LOADgot));
2792  I.getOperand(1).setTargetFlags(OpFlags);
2793  } else if (TM.getCodeModel() == CodeModel::Large) {
2794  // Materialize the global using movz/movk instructions.
2795  materializeLargeCMVal(I, GV, OpFlags);
2796  I.eraseFromParent();
2797  return true;
2798  } else if (TM.getCodeModel() == CodeModel::Tiny) {
2799  I.setDesc(TII.get(AArch64::ADR));
2800  I.getOperand(1).setTargetFlags(OpFlags);
2801  } else {
2802  I.setDesc(TII.get(AArch64::MOVaddr));
2803  I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2804  MachineInstrBuilder MIB(MF, I);
2805  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2807  }
2808  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2809  }
2810 
2811  case TargetOpcode::G_ZEXTLOAD:
2812  case TargetOpcode::G_LOAD:
2813  case TargetOpcode::G_STORE: {
2814  GLoadStore &LdSt = cast<GLoadStore>(I);
2815  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2816  LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2817 
2818  if (PtrTy != LLT::pointer(0, 64)) {
2819  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2820  << ", expected: " << LLT::pointer(0, 64) << '\n');
2821  return false;
2822  }
2823 
2824  uint64_t MemSizeInBytes = LdSt.getMemSize();
2825  unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2826  AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2827 
2828  // Need special instructions for atomics that affect ordering.
2829  if (Order != AtomicOrdering::NotAtomic &&
2830  Order != AtomicOrdering::Unordered &&
2831  Order != AtomicOrdering::Monotonic) {
2832  assert(!isa<GZExtLoad>(LdSt));
2833  if (MemSizeInBytes > 64)
2834  return false;
2835 
2836  if (isa<GLoad>(LdSt)) {
2837  static constexpr unsigned LDAPROpcodes[] = {
2838  AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2839  static constexpr unsigned LDAROpcodes[] = {
2840  AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2841  ArrayRef<unsigned> Opcodes =
2842  STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2843  ? LDAPROpcodes
2844  : LDAROpcodes;
2845  I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2846  } else {
2847  static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2848  AArch64::STLRW, AArch64::STLRX};
2849  Register ValReg = LdSt.getReg(0);
2850  if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2851  // Emit a subreg copy of 32 bits.
2852  Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2853  MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2854  .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2855  I.getOperand(0).setReg(NewVal);
2856  }
2857  I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2858  }
2860  return true;
2861  }
2862 
2863 #ifndef NDEBUG
2864  const Register PtrReg = LdSt.getPointerReg();
2865  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2866  // Check that the pointer register is valid.
2867  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2868  "Load/Store pointer operand isn't a GPR");
2869  assert(MRI.getType(PtrReg).isPointer() &&
2870  "Load/Store pointer operand isn't a pointer");
2871 #endif
2872 
2873  const Register ValReg = LdSt.getReg(0);
2874  const LLT ValTy = MRI.getType(ValReg);
2875  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2876 
2877  // The code below doesn't support truncating stores, so we need to split it
2878  // again.
2879  if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2880  unsigned SubReg;
2881  LLT MemTy = LdSt.getMMO().getMemoryType();
2882  auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2883  if (!getSubRegForClass(RC, TRI, SubReg))
2884  return false;
2885 
2886  // Generate a subreg copy.
2887  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2888  .addReg(ValReg, 0, SubReg)
2889  .getReg(0);
2890  RBI.constrainGenericRegister(Copy, *RC, MRI);
2891  LdSt.getOperand(0).setReg(Copy);
2892  } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2893  // If this is an any-extending load from the FPR bank, split it into a regular
2894  // load + extend.
2895  if (RB.getID() == AArch64::FPRRegBankID) {
2896  unsigned SubReg;
2897  LLT MemTy = LdSt.getMMO().getMemoryType();
2898  auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2899  if (!getSubRegForClass(RC, TRI, SubReg))
2900  return false;
2901  Register OldDst = LdSt.getReg(0);
2902  Register NewDst =
2904  LdSt.getOperand(0).setReg(NewDst);
2905  MRI.setRegBank(NewDst, RB);
2906  // Generate a SUBREG_TO_REG to extend it.
2907  MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2908  MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2909  .addImm(0)
2910  .addUse(NewDst)
2911  .addImm(SubReg);
2912  auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2913  RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2914  MIB.setInstr(LdSt);
2915  }
2916  }
2917 
2918  // Helper lambda for partially selecting I. Either returns the original
2919  // instruction with an updated opcode, or a new instruction.
2920  auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2921  bool IsStore = isa<GStore>(I);
2922  const unsigned NewOpc =
2923  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2924  if (NewOpc == I.getOpcode())
2925  return nullptr;
2926  // Check if we can fold anything into the addressing mode.
2927  auto AddrModeFns =
2928  selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2929  if (!AddrModeFns) {
2930  // Can't fold anything. Use the original instruction.
2931  I.setDesc(TII.get(NewOpc));
2932  I.addOperand(MachineOperand::CreateImm(0));
2933  return &I;
2934  }
2935 
2936  // Folded something. Create a new instruction and return it.
2937  auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2938  Register CurValReg = I.getOperand(0).getReg();
2939  IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2940  NewInst.cloneMemRefs(I);
2941  for (auto &Fn : *AddrModeFns)
2942  Fn(NewInst);
2943  I.eraseFromParent();
2944  return &*NewInst;
2945  };
2946 
2947  MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2948  if (!LoadStore)
2949  return false;
2950 
2951  // If we're storing a 0, use WZR/XZR.
2952  if (Opcode == TargetOpcode::G_STORE) {
2954  LoadStore->getOperand(0).getReg(), MRI);
2955  if (CVal && CVal->Value == 0) {
2956  switch (LoadStore->getOpcode()) {
2957  case AArch64::STRWui:
2958  case AArch64::STRHHui:
2959  case AArch64::STRBBui:
2960  LoadStore->getOperand(0).setReg(AArch64::WZR);
2961  break;
2962  case AArch64::STRXui:
2963  LoadStore->getOperand(0).setReg(AArch64::XZR);
2964  break;
2965  }
2966  }
2967  }
2968 
2969  if (IsZExtLoad) {
2970  // The zextload from a smaller type to i32 should be handled by the
2971  // importer.
2972  if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2973  return false;
2974  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2975  // and zero_extend with SUBREG_TO_REG.
2976  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2977  Register DstReg = LoadStore->getOperand(0).getReg();
2978  LoadStore->getOperand(0).setReg(LdReg);
2979 
2980  MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2981  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2982  .addImm(0)
2983  .addUse(LdReg)
2984  .addImm(AArch64::sub_32);
2986  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2987  MRI);
2988  }
2990  }
2991 
2992  case TargetOpcode::G_SMULH:
2993  case TargetOpcode::G_UMULH: {
2994  // Reject the various things we don't support yet.
2995  if (unsupportedBinOp(I, RBI, MRI, TRI))
2996  return false;
2997 
2998  const Register DefReg = I.getOperand(0).getReg();
2999  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3000 
3001  if (RB.getID() != AArch64::GPRRegBankID) {
3002  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
3003  return false;
3004  }
3005 
3006  if (Ty != LLT::scalar(64)) {
3007  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
3008  << ", expected: " << LLT::scalar(64) << '\n');
3009  return false;
3010  }
3011 
3012  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3013  : AArch64::UMULHrr;
3014  I.setDesc(TII.get(NewOpc));
3015 
3016  // Now that we selected an opcode, we need to constrain the register
3017  // operands to use appropriate classes.
3018  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3019  }
3020  case TargetOpcode::G_LSHR:
3021  case TargetOpcode::G_ASHR:
3022  if (MRI.getType(I.getOperand(0).getReg()).isVector())
3023  return selectVectorAshrLshr(I, MRI);
3024  [[fallthrough]];
3025  case TargetOpcode::G_SHL:
3026  if (Opcode == TargetOpcode::G_SHL &&
3027  MRI.getType(I.getOperand(0).getReg()).isVector())
3028  return selectVectorSHL(I, MRI);
3029 
3030  // These shifts were legalized to have 64 bit shift amounts because we
3031  // want to take advantage of the selection patterns that assume the
3032  // immediates are s64s, however, selectBinaryOp will assume both operands
3033  // will have the same bit size.
3034  {
3035  Register SrcReg = I.getOperand(1).getReg();
3036  Register ShiftReg = I.getOperand(2).getReg();
3037  const LLT ShiftTy = MRI.getType(ShiftReg);
3038  const LLT SrcTy = MRI.getType(SrcReg);
3039  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3040  ShiftTy.getSizeInBits() == 64) {
3041  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3042  // Insert a subregister copy to implement a 64->32 trunc
3043  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3044  .addReg(ShiftReg, 0, AArch64::sub_32);
3045  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3046  I.getOperand(2).setReg(Trunc.getReg(0));
3047  }
3048  }
3049  [[fallthrough]];
3050  case TargetOpcode::G_OR: {
3051  // Reject the various things we don't support yet.
3052  if (unsupportedBinOp(I, RBI, MRI, TRI))
3053  return false;
3054 
3055  const unsigned OpSize = Ty.getSizeInBits();
3056 
3057  const Register DefReg = I.getOperand(0).getReg();
3058  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3059 
3060  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3061  if (NewOpc == I.getOpcode())
3062  return false;
3063 
3064  I.setDesc(TII.get(NewOpc));
3065  // FIXME: Should the type be always reset in setDesc?
3066 
3067  // Now that we selected an opcode, we need to constrain the register
3068  // operands to use appropriate classes.
3069  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3070  }
3071 
3072  case TargetOpcode::G_PTR_ADD: {
3073  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3074  I.eraseFromParent();
3075  return true;
3076  }
3077  case TargetOpcode::G_SADDO:
3078  case TargetOpcode::G_UADDO:
3079  case TargetOpcode::G_SSUBO:
3080  case TargetOpcode::G_USUBO: {
3081  // Emit the operation and get the correct condition code.
3082  auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3083  I.getOperand(2), I.getOperand(3), MIB);
3084 
3085  // Now, put the overflow result in the register given by the first operand
3086  // to the overflow op. CSINC increments the result when the predicate is
3087  // false, so to get the increment when it's true, we need to use the
3088  // inverse. In this case, we want to increment when carry is set.
3089  Register ZReg = AArch64::WZR;
3090  emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3091  getInvertedCondCode(OpAndCC.second), MIB);
3092  I.eraseFromParent();
3093  return true;
3094  }
3095 
3096  case TargetOpcode::G_PTRMASK: {
3097  Register MaskReg = I.getOperand(2).getReg();
3098  Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3099  // TODO: Implement arbitrary cases
3100  if (!MaskVal || !isShiftedMask_64(*MaskVal))
3101  return false;
3102 
3103  uint64_t Mask = *MaskVal;
3104  I.setDesc(TII.get(AArch64::ANDXri));
3105  I.getOperand(2).ChangeToImmediate(
3107 
3108  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3109  }
3110  case TargetOpcode::G_PTRTOINT:
3111  case TargetOpcode::G_TRUNC: {
3112  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3113  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3114 
3115  const Register DstReg = I.getOperand(0).getReg();
3116  const Register SrcReg = I.getOperand(1).getReg();
3117 
3118  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3119  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3120 
3121  if (DstRB.getID() != SrcRB.getID()) {
3122  LLVM_DEBUG(
3123  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3124  return false;
3125  }
3126 
3127  if (DstRB.getID() == AArch64::GPRRegBankID) {
3128  const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3129  if (!DstRC)
3130  return false;
3131 
3132  const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3133  if (!SrcRC)
3134  return false;
3135 
3136  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3137  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3138  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3139  return false;
3140  }
3141 
3142  if (DstRC == SrcRC) {
3143  // Nothing to be done
3144  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3145  SrcTy == LLT::scalar(64)) {
3146  llvm_unreachable("TableGen can import this case");
3147  return false;
3148  } else if (DstRC == &AArch64::GPR32RegClass &&
3149  SrcRC == &AArch64::GPR64RegClass) {
3150  I.getOperand(1).setSubReg(AArch64::sub_32);
3151  } else {
3152  LLVM_DEBUG(
3153  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3154  return false;
3155  }
3156 
3157  I.setDesc(TII.get(TargetOpcode::COPY));
3158  return true;
3159  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3160  if (DstTy == LLT::fixed_vector(4, 16) &&
3161  SrcTy == LLT::fixed_vector(4, 32)) {
3162  I.setDesc(TII.get(AArch64::XTNv4i16));
3164  return true;
3165  }
3166 
3167  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3168  MachineInstr *Extract = emitExtractVectorElt(
3169  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3170  if (!Extract)
3171  return false;
3172  I.eraseFromParent();
3173  return true;
3174  }
3175 
3176  // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3177  if (Opcode == TargetOpcode::G_PTRTOINT) {
3178  assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3179  I.setDesc(TII.get(TargetOpcode::COPY));
3180  return selectCopy(I, TII, MRI, TRI, RBI);
3181  }
3182  }
3183 
3184  return false;
3185  }
3186 
3187  case TargetOpcode::G_ANYEXT: {
3188  if (selectUSMovFromExtend(I, MRI))
3189  return true;
3190 
3191  const Register DstReg = I.getOperand(0).getReg();
3192  const Register SrcReg = I.getOperand(1).getReg();
3193 
3194  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3195  if (RBDst.getID() != AArch64::GPRRegBankID) {
3196  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3197  << ", expected: GPR\n");
3198  return false;
3199  }
3200 
3201  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3202  if (RBSrc.getID() != AArch64::GPRRegBankID) {
3203  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3204  << ", expected: GPR\n");
3205  return false;
3206  }
3207 
3208  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3209 
3210  if (DstSize == 0) {
3211  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3212  return false;
3213  }
3214 
3215  if (DstSize != 64 && DstSize > 32) {
3216  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3217  << ", expected: 32 or 64\n");
3218  return false;
3219  }
3220  // At this point G_ANYEXT is just like a plain COPY, but we need
3221  // to explicitly form the 64-bit value if any.
3222  if (DstSize > 32) {
3223  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3224  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3225  .addDef(ExtSrc)
3226  .addImm(0)
3227  .addUse(SrcReg)
3228  .addImm(AArch64::sub_32);
3229  I.getOperand(1).setReg(ExtSrc);
3230  }
3231  return selectCopy(I, TII, MRI, TRI, RBI);
3232  }
3233 
3234  case TargetOpcode::G_ZEXT:
3235  case TargetOpcode::G_SEXT_INREG:
3236  case TargetOpcode::G_SEXT: {
3237  if (selectUSMovFromExtend(I, MRI))
3238  return true;
3239 
3240  unsigned Opcode = I.getOpcode();
3241  const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3242  const Register DefReg = I.getOperand(0).getReg();
3243  Register SrcReg = I.getOperand(1).getReg();
3244  const LLT DstTy = MRI.getType(DefReg);
3245  const LLT SrcTy = MRI.getType(SrcReg);
3246  unsigned DstSize = DstTy.getSizeInBits();
3247  unsigned SrcSize = SrcTy.getSizeInBits();
3248 
3249  // SEXT_INREG has the same src reg size as dst, the size of the value to be
3250  // extended is encoded in the imm.
3251  if (Opcode == TargetOpcode::G_SEXT_INREG)
3252  SrcSize = I.getOperand(2).getImm();
3253 
3254  if (DstTy.isVector())
3255  return false; // Should be handled by imported patterns.
3256 
3257  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3258  AArch64::GPRRegBankID &&
3259  "Unexpected ext regbank");
3260 
3261  MachineInstr *ExtI;
3262 
3263  // First check if we're extending the result of a load which has a dest type
3264  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3265  // GPR register on AArch64 and all loads which are smaller automatically
3266  // zero-extend the upper bits. E.g.
3267  // %v(s8) = G_LOAD %p, :: (load 1)
3268  // %v2(s32) = G_ZEXT %v(s8)
3269  if (!IsSigned) {
3270  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3271  bool IsGPR =
3272  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3273  if (LoadMI && IsGPR) {
3274  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3275  unsigned BytesLoaded = MemOp->getSize();
3276  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3277  return selectCopy(I, TII, MRI, TRI, RBI);
3278  }
3279 
3280  // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3281  // + SUBREG_TO_REG.
3282  if (IsGPR && SrcSize == 32 && DstSize == 64) {
3283  Register SubregToRegSrc =
3284  MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3285  const Register ZReg = AArch64::WZR;
3286  MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3287  .addImm(0);
3288 
3289  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3290  .addImm(0)
3291  .addUse(SubregToRegSrc)
3292  .addImm(AArch64::sub_32);
3293 
3294  if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3295  MRI)) {
3296  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3297  return false;
3298  }
3299 
3300  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3301  MRI)) {
3302  LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3303  return false;
3304  }
3305 
3306  I.eraseFromParent();
3307  return true;
3308  }
3309  }
3310 
3311  if (DstSize == 64) {
3312  if (Opcode != TargetOpcode::G_SEXT_INREG) {
3313  // FIXME: Can we avoid manually doing this?
3314  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3315  MRI)) {
3316  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3317  << " operand\n");
3318  return false;
3319  }
3320  SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3321  {&AArch64::GPR64RegClass}, {})
3322  .addImm(0)
3323  .addUse(SrcReg)
3324  .addImm(AArch64::sub_32)
3325  .getReg(0);
3326  }
3327 
3328  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3329  {DefReg}, {SrcReg})
3330  .addImm(0)
3331  .addImm(SrcSize - 1);
3332  } else if (DstSize <= 32) {
3333  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3334  {DefReg}, {SrcReg})
3335  .addImm(0)
3336  .addImm(SrcSize - 1);
3337  } else {
3338  return false;
3339  }
3340 
3342  I.eraseFromParent();
3343  return true;
3344  }
3345 
3346  case TargetOpcode::G_SITOFP:
3347  case TargetOpcode::G_UITOFP:
3348  case TargetOpcode::G_FPTOSI:
3349  case TargetOpcode::G_FPTOUI: {
3350  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3351  SrcTy = MRI.getType(I.getOperand(1).getReg());
3352  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3353  if (NewOpc == Opcode)
3354  return false;
3355 
3356  I.setDesc(TII.get(NewOpc));
3358  I.setFlags(MachineInstr::NoFPExcept);
3359 
3360  return true;
3361  }
3362 
3363  case TargetOpcode::G_FREEZE:
3364  return selectCopy(I, TII, MRI, TRI, RBI);
3365 
3366  case TargetOpcode::G_INTTOPTR:
3367  // The importer is currently unable to import pointer types since they
3368  // didn't exist in SelectionDAG.
3369  return selectCopy(I, TII, MRI, TRI, RBI);
3370 
3371  case TargetOpcode::G_BITCAST:
3372  // Imported SelectionDAG rules can handle every bitcast except those that
3373  // bitcast from a type to the same type. Ideally, these shouldn't occur
3374  // but we might not run an optimizer that deletes them. The other exception
3375  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3376  // of them.
3377  return selectCopy(I, TII, MRI, TRI, RBI);
3378 
3379  case TargetOpcode::G_SELECT: {
3380  auto &Sel = cast<GSelect>(I);
3381  const Register CondReg = Sel.getCondReg();
3382  const Register TReg = Sel.getTrueReg();
3383  const Register FReg = Sel.getFalseReg();
3384 
3385  if (tryOptSelect(Sel))
3386  return true;
3387 
3388  // Make sure to use an unused vreg instead of wzr, so that the peephole
3389  // optimizations will be able to optimize these.
3390  Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3391  auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3392  .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3393  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3394  if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3395  return false;
3396  Sel.eraseFromParent();
3397  return true;
3398  }
3399  case TargetOpcode::G_ICMP: {
3400  if (Ty.isVector())
3401  return selectVectorICmp(I, MRI);
3402 
3403  if (Ty != LLT::scalar(32)) {
3404  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3405  << ", expected: " << LLT::scalar(32) << '\n');
3406  return false;
3407  }
3408 
3409  auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3410  const AArch64CC::CondCode InvCC =
3411  changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3412  emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3413  emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3414  /*Src2=*/AArch64::WZR, InvCC, MIB);
3415  I.eraseFromParent();
3416  return true;
3417  }
3418 
3419  case TargetOpcode::G_FCMP: {
3420  CmpInst::Predicate Pred =
3421  static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3422  if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3423  Pred) ||
3424  !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3425  return false;
3426  I.eraseFromParent();
3427  return true;
3428  }
3429  case TargetOpcode::G_VASTART:
3430  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3431  : selectVaStartAAPCS(I, MF, MRI);
3432  case TargetOpcode::G_INTRINSIC:
3433  return selectIntrinsic(I, MRI);
3434  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3435  return selectIntrinsicWithSideEffects(I, MRI);
3436  case TargetOpcode::G_IMPLICIT_DEF: {
3437  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3438  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3439  const Register DstReg = I.getOperand(0).getReg();
3440  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3441  const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3442  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3443  return true;
3444  }
3445  case TargetOpcode::G_BLOCK_ADDR: {
3446  if (TM.getCodeModel() == CodeModel::Large) {
3447  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3448  I.eraseFromParent();
3449  return true;
3450  } else {
3451  I.setDesc(TII.get(AArch64::MOVaddrBA));
3452  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3453  I.getOperand(0).getReg())
3454  .addBlockAddress(I.getOperand(1).getBlockAddress(),
3455  /* Offset */ 0, AArch64II::MO_PAGE)
3456  .addBlockAddress(
3457  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3459  I.eraseFromParent();
3460  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3461  }
3462  }
3463  case AArch64::G_DUP: {
3464  // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3465  // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3466  // difficult because at RBS we may end up pessimizing the fpr case if we
3467  // decided to add an anyextend to fix this. Manual selection is the most
3468  // robust solution for now.
3469  if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3470  AArch64::GPRRegBankID)
3471  return false; // We expect the fpr regbank case to be imported.
3472  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3473  if (VecTy == LLT::fixed_vector(8, 8))
3474  I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3475  else if (VecTy == LLT::fixed_vector(16, 8))
3476  I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3477  else if (VecTy == LLT::fixed_vector(4, 16))
3478  I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3479  else if (VecTy == LLT::fixed_vector(8, 16))
3480  I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3481  else
3482  return false;
3483  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3484  }
3485  case TargetOpcode::G_INTRINSIC_TRUNC:
3486  return selectIntrinsicTrunc(I, MRI);
3487  case TargetOpcode::G_INTRINSIC_ROUND:
3488  return selectIntrinsicRound(I, MRI);
3489  case TargetOpcode::G_BUILD_VECTOR:
3490  return selectBuildVector(I, MRI);
3491  case TargetOpcode::G_MERGE_VALUES:
3492  return selectMergeValues(I, MRI);
3493  case TargetOpcode::G_UNMERGE_VALUES:
3494  return selectUnmergeValues(I, MRI);
3495  case TargetOpcode::G_SHUFFLE_VECTOR:
3496  return selectShuffleVector(I, MRI);
3497  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3498  return selectExtractElt(I, MRI);
3499  case TargetOpcode::G_INSERT_VECTOR_ELT:
3500  return selectInsertElt(I, MRI);
3501  case TargetOpcode::G_CONCAT_VECTORS:
3502  return selectConcatVectors(I, MRI);
3503  case TargetOpcode::G_JUMP_TABLE:
3504  return selectJumpTable(I, MRI);
3505  case TargetOpcode::G_VECREDUCE_FADD:
3506  case TargetOpcode::G_VECREDUCE_ADD:
3507  return selectReduction(I, MRI);
3508  case TargetOpcode::G_MEMCPY:
3509  case TargetOpcode::G_MEMCPY_INLINE:
3510  case TargetOpcode::G_MEMMOVE:
3511  case TargetOpcode::G_MEMSET:
3512  assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3513  return selectMOPS(I, MRI);
3514  }
3515 
3516  return false;
3517 }
3518 
3519 bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3521  Register VecReg = I.getOperand(1).getReg();
3522  LLT VecTy = MRI.getType(VecReg);
3523  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3524  // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3525  // a subregister copy afterwards.
3526  if (VecTy == LLT::fixed_vector(2, 32)) {
3527  Register DstReg = I.getOperand(0).getReg();
3528  auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3529  {VecReg, VecReg});
3530  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3531  .addReg(AddP.getReg(0), 0, AArch64::ssub)
3532  .getReg(0);
3533  RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3534  I.eraseFromParent();
3535  return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3536  }
3537 
3538  unsigned Opc = 0;
3539  if (VecTy == LLT::fixed_vector(16, 8))
3540  Opc = AArch64::ADDVv16i8v;
3541  else if (VecTy == LLT::fixed_vector(8, 16))
3542  Opc = AArch64::ADDVv8i16v;
3543  else if (VecTy == LLT::fixed_vector(4, 32))
3544  Opc = AArch64::ADDVv4i32v;
3545  else if (VecTy == LLT::fixed_vector(2, 64))
3546  Opc = AArch64::ADDPv2i64p;
3547  else {
3548  LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3549  return false;
3550  }
3551  I.setDesc(TII.get(Opc));
3552  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3553  }
3554 
3555  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3556  unsigned Opc = 0;
3557  if (VecTy == LLT::fixed_vector(2, 32))
3558  Opc = AArch64::FADDPv2i32p;
3559  else if (VecTy == LLT::fixed_vector(2, 64))
3560  Opc = AArch64::FADDPv2i64p;
3561  else {
3562  LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3563  return false;
3564  }
3565  I.setDesc(TII.get(Opc));
3566  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3567  }
3568  return false;
3569 }
3570 
3571 bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3573  unsigned Mopcode;
3574  switch (GI.getOpcode()) {
3575  case TargetOpcode::G_MEMCPY:
3576  case TargetOpcode::G_MEMCPY_INLINE:
3577  Mopcode = AArch64::MOPSMemoryCopyPseudo;
3578  break;
3579  case TargetOpcode::G_MEMMOVE:
3580  Mopcode = AArch64::MOPSMemoryMovePseudo;
3581  break;
3582  case TargetOpcode::G_MEMSET:
3583  // For tagged memset see llvm.aarch64.mops.memset.tag
3584  Mopcode = AArch64::MOPSMemorySetPseudo;
3585  break;
3586  }
3587 
3588  auto &DstPtr = GI.getOperand(0);
3589  auto &SrcOrVal = GI.getOperand(1);
3590  auto &Size = GI.getOperand(2);
3591 
3592  // Create copies of the registers that can be clobbered.
3593  const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3594  const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3595  const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3596 
3597  const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3598  const auto &SrcValRegClass =
3599  IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3600 
3601  // Constrain to specific registers
3602  RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3603  RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3604  RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3605 
3606  MIB.buildCopy(DstPtrCopy, DstPtr);
3607  MIB.buildCopy(SrcValCopy, SrcOrVal);
3608  MIB.buildCopy(SizeCopy, Size);
3609 
3610  // New instruction uses the copied registers because it must update them.
3611  // The defs are not used since they don't exist in G_MEM*. They are still
3612  // tied.
3613  // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3614  Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3615  Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3616  if (IsSet) {
3617  MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3618  {DstPtrCopy, SizeCopy, SrcValCopy});
3619  } else {
3620  Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3621  MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3622  {DstPtrCopy, SrcValCopy, SizeCopy});
3623  }
3624 
3625  GI.eraseFromParent();
3626  return true;
3627 }
3628 
3629 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3631  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3632  Register JTAddr = I.getOperand(0).getReg();
3633  unsigned JTI = I.getOperand(1).getIndex();
3634  Register Index = I.getOperand(2).getReg();
3635 
3636  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3637  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3638 
3639  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3640  auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3641  {TargetReg, ScratchReg}, {JTAddr, Index})
3642  .addJumpTableIndex(JTI);
3643  // Build the indirect branch.
3644  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3645  I.eraseFromParent();
3646  return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3647 }
3648 
3649 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3651  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3652  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3653 
3654  Register DstReg = I.getOperand(0).getReg();
3655  unsigned JTI = I.getOperand(1).getIndex();
3656  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3657  auto MovMI =
3658  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3659  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3661  I.eraseFromParent();
3662  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3663 }
3664 
3665 bool AArch64InstructionSelector::selectTLSGlobalValue(
3667  if (!STI.isTargetMachO())
3668  return false;
3669  MachineFunction &MF = *I.getParent()->getParent();
3670  MF.getFrameInfo().setAdjustsStack(true);
3671 
3672  const auto &GlobalOp = I.getOperand(1);
3673  assert(GlobalOp.getOffset() == 0 &&
3674  "Shouldn't have an offset on TLS globals!");
3675  const GlobalValue &GV = *GlobalOp.getGlobal();
3676 
3677  auto LoadGOT =
3678  MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3679  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3680 
3681  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3682  {LoadGOT.getReg(0)})
3683  .addImm(0);
3684 
3685  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3686  // TLS calls preserve all registers except those that absolutely must be
3687  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3688  // silly).
3689  MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3690  .addUse(AArch64::X0, RegState::Implicit)
3691  .addDef(AArch64::X0, RegState::Implicit)
3692  .addRegMask(TRI.getTLSCallPreservedMask());
3693 
3694  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3695  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3696  MRI);
3697  I.eraseFromParent();
3698  return true;
3699 }
3700 
3701 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3703  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3704 
3705  // Select the correct opcode.
3706  unsigned Opc = 0;
3707  if (!SrcTy.isVector()) {
3708  switch (SrcTy.getSizeInBits()) {
3709  default:
3710  case 16:
3711  Opc = AArch64::FRINTZHr;
3712  break;
3713  case 32:
3714  Opc = AArch64::FRINTZSr;
3715  break;
3716  case 64:
3717  Opc = AArch64::FRINTZDr;
3718  break;
3719  }
3720  } else {
3721  unsigned NumElts = SrcTy.getNumElements();
3722  switch (SrcTy.getElementType().getSizeInBits()) {
3723  default:
3724  break;
3725  case 16:
3726  if (NumElts == 4)
3727  Opc = AArch64::FRINTZv4f16;
3728  else if (NumElts == 8)
3729  Opc = AArch64::FRINTZv8f16;
3730  break;
3731  case 32:
3732  if (NumElts == 2)
3733  Opc = AArch64::FRINTZv2f32;
3734  else if (NumElts == 4)
3735  Opc = AArch64::FRINTZv4f32;
3736  break;
3737  case 64:
3738  if (NumElts == 2)
3739  Opc = AArch64::FRINTZv2f64;
3740  break;
3741  }
3742  }
3743 
3744  if (!Opc) {
3745  // Didn't get an opcode above, bail.
3746  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3747  return false;
3748  }
3749 
3750  // Legalization would have set us up perfectly for this; we just need to
3751  // set the opcode and move on.
3752  I.setDesc(TII.get(Opc));
3753  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3754 }
3755 
3756 bool AArch64InstructionSelector::selectIntrinsicRound(
3758  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3759 
3760  // Select the correct opcode.
3761  unsigned Opc = 0;
3762  if (!SrcTy.isVector()) {
3763  switch (SrcTy.getSizeInBits()) {
3764  default:
3765  case 16:
3766  Opc = AArch64::FRINTAHr;
3767  break;
3768  case 32:
3769  Opc = AArch64::FRINTASr;
3770  break;
3771  case 64:
3772  Opc = AArch64::FRINTADr;
3773  break;
3774  }
3775  } else {
3776  unsigned NumElts = SrcTy.getNumElements();
3777  switch (SrcTy.getElementType().getSizeInBits()) {
3778  default:
3779  break;
3780  case 16:
3781  if (NumElts == 4)
3782  Opc = AArch64::FRINTAv4f16;
3783  else if (NumElts == 8)
3784  Opc = AArch64::FRINTAv8f16;
3785  break;
3786  case 32:
3787  if (NumElts == 2)
3788  Opc = AArch64::FRINTAv2f32;
3789  else if (NumElts == 4)
3790  Opc = AArch64::FRINTAv4f32;
3791  break;
3792  case 64:
3793  if (NumElts == 2)
3794  Opc = AArch64::FRINTAv2f64;
3795  break;
3796  }
3797  }
3798 
3799  if (!Opc) {
3800  // Didn't get an opcode above, bail.
3801  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3802  return false;
3803  }
3804 
3805  // Legalization would have set us up perfectly for this; we just need to
3806  // set the opcode and move on.
3807  I.setDesc(TII.get(Opc));
3808  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3809 }
3810 
3811 bool AArch64InstructionSelector::selectVectorICmp(
3813  Register DstReg = I.getOperand(0).getReg();
3814  LLT DstTy = MRI.getType(DstReg);
3815  Register SrcReg = I.getOperand(2).getReg();
3816  Register Src2Reg = I.getOperand(3).getReg();
3817  LLT SrcTy = MRI.getType(SrcReg);
3818 
3819  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3820  unsigned NumElts = DstTy.getNumElements();
3821 
3822  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3823  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3824  // Third index is cc opcode:
3825  // 0 == eq
3826  // 1 == ugt
3827  // 2 == uge
3828  // 3 == ult
3829  // 4 == ule
3830  // 5 == sgt
3831  // 6 == sge
3832  // 7 == slt
3833  // 8 == sle
3834  // ne is done by negating 'eq' result.
3835 
3836  // This table below assumes that for some comparisons the operands will be
3837  // commuted.
3838  // ult op == commute + ugt op
3839  // ule op == commute + uge op
3840  // slt op == commute + sgt op
3841  // sle op == commute + sge op
3842  unsigned PredIdx = 0;
3843  bool SwapOperands = false;
3844  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3845  switch (Pred) {
3846  case CmpInst::ICMP_NE:
3847  case CmpInst::ICMP_EQ:
3848  PredIdx = 0;
3849  break;
3850  case CmpInst::ICMP_UGT:
3851  PredIdx = 1;
3852  break;
3853  case CmpInst::ICMP_UGE:
3854  PredIdx = 2;
3855  break;
3856  case CmpInst::ICMP_ULT:
3857  PredIdx = 3;
3858  SwapOperands = true;
3859  break;
3860  case CmpInst::ICMP_ULE:
3861  PredIdx = 4;
3862  SwapOperands = true;
3863  break;
3864  case CmpInst::ICMP_SGT:
3865  PredIdx = 5;
3866  break;
3867  case CmpInst::ICMP_SGE:
3868  PredIdx = 6;
3869  break;
3870  case CmpInst::ICMP_SLT:
3871  PredIdx = 7;
3872  SwapOperands = true;
3873  break;
3874  case CmpInst::ICMP_SLE:
3875  PredIdx = 8;
3876  SwapOperands = true;
3877  break;
3878  default:
3879  llvm_unreachable("Unhandled icmp predicate");
3880  return false;
3881  }
3882 
3883  // This table obviously should be tablegen'd when we have our GISel native
3884  // tablegen selector.
3885 
3886  static const unsigned OpcTable[4][4][9] = {
3887  {
3888  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3889  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3890  0 /* invalid */},
3891  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3892  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3893  0 /* invalid */},
3894  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3895  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3896  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3897  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3898  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3899  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3900  },
3901  {
3902  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3903  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3904  0 /* invalid */},
3905  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3906  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3907  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3908  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3909  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3910  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3911  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3912  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3913  0 /* invalid */}
3914  },
3915  {
3916  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3917  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3918  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3919  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3920  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3921  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3922  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3923  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3924  0 /* invalid */},
3925  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3926  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3927  0 /* invalid */}
3928  },
3929  {
3930  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3931  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3932  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3933  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3934  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3935  0 /* invalid */},
3936  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3937  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3938  0 /* invalid */},
3939  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3940  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3941  0 /* invalid */}
3942  },
3943  };
3944  unsigned EltIdx = Log2_32(SrcEltSize / 8);
3945  unsigned NumEltsIdx = Log2_32(NumElts / 2);
3946  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3947  if (!Opc) {
3948  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3949  return false;
3950  }
3951 
3952  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3953  const TargetRegisterClass *SrcRC =
3954  getRegClassForTypeOnBank(SrcTy, VecRB, true);
3955  if (!SrcRC) {
3956  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3957  return false;
3958  }
3959 
3960  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3961  if (SrcTy.getSizeInBits() == 128)
3962  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3963 
3964  if (SwapOperands)
3965  std::swap(SrcReg, Src2Reg);
3966 
3967  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3969 
3970  // Invert if we had a 'ne' cc.
3971  if (NotOpc) {
3972  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3974  } else {
3975  MIB.buildCopy(DstReg, Cmp.getReg(0));
3976  }
3977  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3978  I.eraseFromParent();
3979  return true;
3980 }
3981 
3982 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3983  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3984  MachineIRBuilder &MIRBuilder) const {
3985  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3986 
3987  auto BuildFn = [&](unsigned SubregIndex) {
3988  auto Ins =
3989  MIRBuilder
3990  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3991  .addImm(SubregIndex);
3994  return &*Ins;
3995  };
3996 
3997  switch (EltSize) {
3998  case 16:
3999  return BuildFn(AArch64::hsub);
4000  case 32:
4001  return BuildFn(AArch64::ssub);
4002  case 64:
4003  return BuildFn(AArch64::dsub);
4004  default:
4005  return nullptr;
4006  }
4007 }
4008 
4011  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
4012  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4013  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4014  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
4015  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4016 
4017  if (I.getNumOperands() != 3)
4018  return false;
4019 
4020  // Merging 2 s64s into an s128.
4021  if (DstTy == LLT::scalar(128)) {
4022  if (SrcTy.getSizeInBits() != 64)
4023  return false;
4024  Register DstReg = I.getOperand(0).getReg();
4025  Register Src1Reg = I.getOperand(1).getReg();
4026  Register Src2Reg = I.getOperand(2).getReg();
4027  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4028  MachineInstr *InsMI =
4029  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
4030  if (!InsMI)
4031  return false;
4032  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4033  Src2Reg, /* LaneIdx */ 1, RB, MIB);
4034  if (!Ins2MI)
4035  return false;
4036  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4037  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4038  I.eraseFromParent();
4039  return true;
4040  }
4041 
4042  if (RB.getID() != AArch64::GPRRegBankID)
4043  return false;
4044 
4045  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4046  return false;
4047 
4048  auto *DstRC = &AArch64::GPR64RegClass;
4049  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4050  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4051  TII.get(TargetOpcode::SUBREG_TO_REG))
4052  .addDef(SubToRegDef)
4053  .addImm(0)
4054  .addUse(I.getOperand(1).getReg())
4055  .addImm(AArch64::sub_32);
4056  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4057  // Need to anyext the second scalar before we can use bfm
4058  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4059  TII.get(TargetOpcode::SUBREG_TO_REG))
4060  .addDef(SubToRegDef2)
4061  .addImm(0)
4062  .addUse(I.getOperand(2).getReg())
4063  .addImm(AArch64::sub_32);
4064  MachineInstr &BFM =
4065  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4066  .addDef(I.getOperand(0).getReg())
4067  .addUse(SubToRegDef)
4068  .addUse(SubToRegDef2)
4069  .addImm(32)
4070  .addImm(31);
4071  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4072  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4074  I.eraseFromParent();
4075  return true;
4076 }
4077 
4078 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4079  const unsigned EltSize) {
4080  // Choose a lane copy opcode and subregister based off of the size of the
4081  // vector's elements.
4082  switch (EltSize) {
4083  case 8:
4084  CopyOpc = AArch64::DUPi8;
4085  ExtractSubReg = AArch64::bsub;
4086  break;
4087  case 16:
4088  CopyOpc = AArch64::DUPi16;
4089  ExtractSubReg = AArch64::hsub;
4090  break;
4091  case 32:
4092  CopyOpc = AArch64::DUPi32;
4093  ExtractSubReg = AArch64::ssub;
4094  break;
4095  case 64:
4096  CopyOpc = AArch64::DUPi64;
4097  ExtractSubReg = AArch64::dsub;
4098  break;
4099  default:
4100  // Unknown size, bail out.
4101  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
4102  return false;
4103  }
4104  return true;
4105 }
4106 
4107 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4108  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4109  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4110  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4111  unsigned CopyOpc = 0;
4112  unsigned ExtractSubReg = 0;
4113  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4114  LLVM_DEBUG(
4115  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4116  return nullptr;
4117  }
4118 
4119  const TargetRegisterClass *DstRC =
4120  getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4121  if (!DstRC) {
4122  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4123  return nullptr;
4124  }
4125 
4126  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4127  const LLT &VecTy = MRI.getType(VecReg);
4128  const TargetRegisterClass *VecRC =
4129  getRegClassForTypeOnBank(VecTy, VecRB, true);
4130  if (!VecRC) {
4131  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4132  return nullptr;
4133  }
4134 
4135  // The register that we're going to copy into.
4136  Register InsertReg = VecReg;
4137  if (!DstReg)
4138  DstReg = MRI.createVirtualRegister(DstRC);
4139  // If the lane index is 0, we just use a subregister COPY.
4140  if (LaneIdx == 0) {
4141  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4142  .addReg(VecReg, 0, ExtractSubReg);
4143  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4144  return &*Copy;
4145  }
4146 
4147  // Lane copies require 128-bit wide registers. If we're dealing with an
4148  // unpacked vector, then we need to move up to that width. Insert an implicit
4149  // def and a subregister insert to get us there.
4150  if (VecTy.getSizeInBits() != 128) {
4151  MachineInstr *ScalarToVector = emitScalarToVector(
4152  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4153  if (!ScalarToVector)
4154  return nullptr;
4155  InsertReg = ScalarToVector->getOperand(0).getReg();
4156  }
4157 
4158  MachineInstr *LaneCopyMI =
4159  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4160  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4161 
4162  // Make sure that we actually constrain the initial copy.
4163  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4164  return LaneCopyMI;
4165 }
4166 
4167 bool AArch64InstructionSelector::selectExtractElt(
4169  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4170  "unexpected opcode!");
4171  Register DstReg = I.getOperand(0).getReg();
4172  const LLT NarrowTy = MRI.getType(DstReg);
4173  const Register SrcReg = I.getOperand(1).getReg();
4174  const LLT WideTy = MRI.getType(SrcReg);
4175  (void)WideTy;
4176  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4177  "source register size too small!");
4178  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4179 
4180  // Need the lane index to determine the correct copy opcode.
4181  MachineOperand &LaneIdxOp = I.getOperand(2);
4182  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4183 
4184  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4185  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4186  return false;
4187  }
4188 
4189  // Find the index to extract from.
4190  auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4191  if (!VRegAndVal)
4192  return false;
4193  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4194 
4195 
4196  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4197  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4198  LaneIdx, MIB);
4199  if (!Extract)
4200  return false;
4201 
4202  I.eraseFromParent();
4203  return true;
4204 }
4205 
4206 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4208  unsigned NumElts = I.getNumOperands() - 1;
4209  Register SrcReg = I.getOperand(NumElts).getReg();
4210  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4211  const LLT SrcTy = MRI.getType(SrcReg);
4212 
4213  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4214  if (SrcTy.getSizeInBits() > 128) {
4215  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4216  return false;
4217  }
4218 
4219  // We implement a split vector operation by treating the sub-vectors as
4220  // scalars and extracting them.
4221  const RegisterBank &DstRB =
4222  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4223  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4224  Register Dst = I.getOperand(OpIdx).getReg();
4225  MachineInstr *Extract =
4226  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4227  if (!Extract)
4228  return false;
4229  }
4230  I.eraseFromParent();
4231  return true;
4232 }
4233 
4236  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4237  "unexpected opcode");
4238 
4239  // TODO: Handle unmerging into GPRs and from scalars to scalars.
4240  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4241  AArch64::FPRRegBankID ||
4242  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4243  AArch64::FPRRegBankID) {
4244  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4245  "currently unsupported.\n");
4246  return false;
4247  }
4248 
4249  // The last operand is the vector source register, and every other operand is
4250  // a register to unpack into.
4251  unsigned NumElts = I.getNumOperands() - 1;
4252  Register SrcReg = I.getOperand(NumElts).getReg();
4253  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4254  const LLT WideTy = MRI.getType(SrcReg);
4255  (void)WideTy;
4256  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4257  "can only unmerge from vector or s128 types!");
4258  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4259  "source register size too small!");
4260 
4261  if (!NarrowTy.isScalar())
4262  return selectSplitVectorUnmerge(I, MRI);
4263 
4264  // Choose a lane copy opcode and subregister based off of the size of the
4265  // vector's elements.
4266  unsigned CopyOpc = 0;
4267  unsigned ExtractSubReg = 0;
4268  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4269  return false;
4270 
4271  // Set up for the lane copies.
4272  MachineBasicBlock &MBB = *I.getParent();
4273 
4274  // Stores the registers we'll be copying from.
4275  SmallVector<Register, 4> InsertRegs;
4276 
4277  // We'll use the first register twice, so we only need NumElts-1 registers.
4278  unsigned NumInsertRegs = NumElts - 1;
4279 
4280  // If our elements fit into exactly 128 bits, then we can copy from the source
4281  // directly. Otherwise, we need to do a bit of setup with some subregister
4282  // inserts.
4283  if (NarrowTy.getSizeInBits() * NumElts == 128) {
4284  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4285  } else {
4286  // No. We have to perform subregister inserts. For each insert, create an
4287  // implicit def and a subregister insert, and save the register we create.
4288  const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4289  LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4290  *RBI.getRegBank(SrcReg, MRI, TRI));
4291  unsigned SubReg = 0;
4292  bool Found = getSubRegForClass(RC, TRI, SubReg);
4293  (void)Found;
4294  assert(Found && "expected to find last operand's subeg idx");
4295  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4296  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4297  MachineInstr &ImpDefMI =
4298  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4299  ImpDefReg);
4300 
4301  // Now, create the subregister insert from SrcReg.
4302  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4303  MachineInstr &InsMI =
4304  *BuildMI(MBB, I, I.getDebugLoc(),
4305  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4306  .addUse(ImpDefReg)
4307  .addUse(SrcReg)
4308  .addImm(SubReg);
4309 
4310  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4312 
4313  // Save the register so that we can copy from it after.
4314  InsertRegs.push_back(InsertReg);
4315  }
4316  }
4317 
4318  // Now that we've created any necessary subregister inserts, we can
4319  // create the copies.
4320  //
4321  // Perform the first copy separately as a subregister copy.
4322  Register CopyTo = I.getOperand(0).getReg();
4323  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4324  .addReg(InsertRegs[0], 0, ExtractSubReg);
4325  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4326 
4327  // Now, perform the remaining copies as vector lane copies.
4328  unsigned LaneIdx = 1;
4329  for (Register InsReg : InsertRegs) {
4330  Register CopyTo = I.getOperand(LaneIdx).getReg();
4331  MachineInstr &CopyInst =
4332  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4333  .addUse(InsReg)
4334  .addImm(LaneIdx);
4335  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4336  ++LaneIdx;
4337  }
4338 
4339  // Separately constrain the first copy's destination. Because of the
4340  // limitation in constrainOperandRegClass, we can't guarantee that this will
4341  // actually be constrained. So, do it ourselves using the second operand.
4342  const TargetRegisterClass *RC =
4343  MRI.getRegClassOrNull(I.getOperand(1).getReg());
4344  if (!RC) {
4345  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4346  return false;
4347  }
4348 
4349  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4350  I.eraseFromParent();
4351  return true;
4352 }
4353 
4354 bool AArch64InstructionSelector::selectConcatVectors(
4356  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4357  "Unexpected opcode");
4358  Register Dst = I.getOperand(0).getReg();
4359  Register Op1 = I.getOperand(1).getReg();
4360  Register Op2 = I.getOperand(2).getReg();
4361  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4362  if (!ConcatMI)
4363  return false;
4364  I.eraseFromParent();
4365  return true;
4366 }
4367 
4368 unsigned
4369 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4370  MachineFunction &MF) const {
4371  Type *CPTy = CPVal->getType();
4372  Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4373 
4375  return MCP->getConstantPoolIndex(CPVal, Alignment);
4376 }
4377 
4378 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4379  const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4380  auto &MF = MIRBuilder.getMF();
4381  unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4382 
4383  auto Adrp =
4384  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4385  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4386 
4387  MachineInstr *LoadMI = nullptr;
4388  MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4389  unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4390  switch (Size) {
4391  case 16:
4392  LoadMI =
4393  &*MIRBuilder
4394  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4395  .addConstantPoolIndex(CPIdx, 0,
4397  break;
4398  case 8:
4399  LoadMI =
4400  &*MIRBuilder
4401  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4402  .addConstantPoolIndex(CPIdx, 0,
4404  break;
4405  case 4:
4406  LoadMI =
4407  &*MIRBuilder
4408  .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4409  .addConstantPoolIndex(CPIdx, 0,
4411  break;
4412  case 2:
4413  LoadMI =
4414  &*MIRBuilder
4415  .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4416  .addConstantPoolIndex(CPIdx, 0,
4418  break;
4419  default:
4420  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4421  << *CPVal->getType());
4422  return nullptr;
4423  }
4424  LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4425  MachineMemOperand::MOLoad,
4426  Size, Align(Size)));
4428  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4429  return LoadMI;
4430 }
4431 
4432 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4433 /// size and RB.
4434 static std::pair<unsigned, unsigned>
4435 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4436  unsigned Opc, SubregIdx;
4437  if (RB.getID() == AArch64::GPRRegBankID) {
4438  if (EltSize == 16) {
4439  Opc = AArch64::INSvi16gpr;
4440  SubregIdx = AArch64::ssub;
4441  } else if (EltSize == 32) {
4442  Opc = AArch64::INSvi32gpr;
4443  SubregIdx = AArch64::ssub;
4444  } else if (EltSize == 64) {
4445  Opc = AArch64::INSvi64gpr;
4446  SubregIdx = AArch64::dsub;
4447  } else {
4448  llvm_unreachable("invalid elt size!");
4449  }
4450  } else {
4451  if (EltSize == 8) {
4452  Opc = AArch64::INSvi8lane;
4453  SubregIdx = AArch64::bsub;
4454  } else if (EltSize == 16) {
4455  Opc = AArch64::INSvi16lane;
4456  SubregIdx = AArch64::hsub;
4457  } else if (EltSize == 32) {
4458  Opc = AArch64::INSvi32lane;
4459  SubregIdx = AArch64::ssub;
4460  } else if (EltSize == 64) {
4461  Opc = AArch64::INSvi64lane;
4462  SubregIdx = AArch64::dsub;
4463  } else {
4464  llvm_unreachable("invalid elt size!");
4465  }
4466  }
4467  return std::make_pair(Opc, SubregIdx);
4468 }
4469 
4470 MachineInstr *AArch64InstructionSelector::emitInstr(
4471  unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4472  std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4473  const ComplexRendererFns &RenderFns) const {
4474  assert(Opcode && "Expected an opcode?");
4475  assert(!isPreISelGenericOpcode(Opcode) &&
4476  "Function should only be used to produce selected instructions!");
4477  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4478  if (RenderFns)
4479  for (auto &Fn : *RenderFns)
4480  Fn(MI);
4482  return &*MI;
4483 }
4484 
4485 MachineInstr *AArch64InstructionSelector::emitAddSub(
4486  const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4488  MachineIRBuilder &MIRBuilder) const {
4489  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4490  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4491  auto Ty = MRI.getType(LHS.getReg());
4492  assert(!Ty.isVector() && "Expected a scalar or pointer?");
4493  unsigned Size = Ty.getSizeInBits();
4494  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4495  bool Is32Bit = Size == 32;
4496 
4497  // INSTRri form with positive arithmetic immediate.
4498  if (auto Fns = selectArithImmed(RHS))
4499  return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4500  MIRBuilder, Fns);
4501 
4502  // INSTRri form with negative arithmetic immediate.
4503  if (auto Fns = selectNegArithImmed(RHS))
4504  return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4505  MIRBuilder, Fns);
4506 
4507  // INSTRrx form.
4508  if (auto Fns = selectArithExtendedRegister(RHS))
4509  return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4510  MIRBuilder, Fns);
4511 
4512  // INSTRrs form.
4513  if (auto Fns = selectShiftedRegister(RHS))
4514  return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4515  MIRBuilder, Fns);
4516  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4517  MIRBuilder);
4518 }
4519 
4520 MachineInstr *
4521 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4523  MachineIRBuilder &MIRBuilder) const {
4524  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4525  {{AArch64::ADDXri, AArch64::ADDWri},
4526  {AArch64::ADDXrs, AArch64::ADDWrs},
4527  {AArch64::ADDXrr, AArch64::ADDWrr},
4528  {AArch64::SUBXri, AArch64::SUBWri},
4529  {AArch64::ADDXrx, AArch64::ADDWrx}}};
4530  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4531 }
4532 
4533 MachineInstr *
4534 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4536  MachineIRBuilder &MIRBuilder) const {
4537  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4538  {{AArch64::ADDSXri, AArch64::ADDSWri},
4539  {AArch64::ADDSXrs, AArch64::ADDSWrs},
4540  {AArch64::ADDSXrr, AArch64::ADDSWrr},
4541  {AArch64::SUBSXri, AArch64::SUBSWri},
4542  {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4543  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4544 }
4545 
4546 MachineInstr *
4547 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4549  MachineIRBuilder &MIRBuilder) const {
4550  const std::array<std::array<unsigned, 2>, 5> OpcTable{
4551  {{AArch64::SUBSXri, AArch64::SUBSWri},
4552  {AArch64::SUBSXrs, AArch64::SUBSWrs},
4553  {AArch64::SUBSXrr, AArch64::SUBSWrr},
4554  {AArch64::ADDSXri, AArch64::ADDSWri},
4555  {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4556  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4557 }
4558 
4559 MachineInstr *
4560 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4561  MachineIRBuilder &MIRBuilder) const {
4562  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4563  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4564  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4565  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4566 }
4567 
4568 MachineInstr *
4569 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4570  MachineIRBuilder &MIRBuilder) const {
4571  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4572  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4573  LLT Ty = MRI.getType(LHS.getReg());
4574  unsigned RegSize = Ty.getSizeInBits();
4575  bool Is32Bit = (RegSize == 32);
4576  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4577  {AArch64::ANDSXrs, AArch64::ANDSWrs},
4578  {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4579  // ANDS needs a logical immediate for its immediate form. Check if we can
4580  // fold one in.
4581  if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4582  int64_t Imm = ValAndVReg->Value.getSExtValue();
4583 
4585  auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4587  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4588  return &*TstMI;
4589  }
4590  }
4591 
4592  if (auto Fns = selectLogicalShiftedRegister(RHS))
4593  return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4594  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4595 }
4596 
4597 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4599  MachineIRBuilder &MIRBuilder) const {
4600  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4601  assert(Predicate.isPredicate() && "Expected predicate?");
4602  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4603  LLT CmpTy = MRI.getType(LHS.getReg());
4604  assert(!CmpTy.isVector() && "Expected scalar or pointer");
4605  unsigned Size = CmpTy.getSizeInBits();
4606  (void)Size;
4607  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4608  // Fold the compare into a cmn or tst if possible.
4609  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4610  return FoldCmp;
4611  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4612  return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4613 }
4614 
4615 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4616  Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4617  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4618 #ifndef NDEBUG
4619  LLT Ty = MRI.getType(Dst);
4620  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4621  "Expected a 32-bit scalar register?");
4622 #endif
4623  const Register ZReg = AArch64::WZR;
4624  AArch64CC::CondCode CC1, CC2;
4625  changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4626  auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4627  if (CC2 == AArch64CC::AL)
4628  return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4629  MIRBuilder);
4630  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4631  Register Def1Reg = MRI.createVirtualRegister(RC);
4632  Register Def2Reg = MRI.createVirtualRegister(RC);
4633  auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4634  emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4635  emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4636  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4638  return &*OrMI;
4639 }
4640 
4641 MachineInstr *
4642 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4643  MachineIRBuilder &MIRBuilder,
4644  Optional<CmpInst::Predicate> Pred) const {
4645  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4646  LLT Ty = MRI.getType(LHS);
4647  if (Ty.isVector())
4648  return nullptr;
4649  unsigned OpSize = Ty.getSizeInBits();
4650  if (OpSize != 32 && OpSize != 64)
4651  return nullptr;
4652 
4653  // If this is a compare against +0.0, then we don't have
4654  // to explicitly materialize a constant.
4655  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4656  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4657 
4658  auto IsEqualityPred = [](CmpInst::Predicate P) {
4659  return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4660  P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4661  };
4662  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4663  // Try commutating the operands.
4664  const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4665  if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4666  ShouldUseImm = true;
4667  std::swap(LHS, RHS);
4668  }
4669  }
4670  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4671  {AArch64::FCMPSri, AArch64::FCMPDri}};
4672  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4673 
4674  // Partially build the compare. Decide if we need to add a use for the
4675  // third operand based off whether or not we're comparing against 0.0.
4676  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4677  CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4678  if (!ShouldUseImm)
4679  CmpMI.addUse(RHS);
4680  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4681  return &*CmpMI;
4682 }
4683 
4684 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4685  Optional<Register> Dst, Register Op1, Register Op2,
4686  MachineIRBuilder &MIRBuilder) const {
4687  // We implement a vector concat by:
4688  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4689  // 2. Insert the upper vector into the destination's upper element
4690  // TODO: some of this code is common with G_BUILD_VECTOR handling.
4691  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4692 
4693  const LLT Op1Ty = MRI.getType(Op1);
4694  const LLT Op2Ty = MRI.getType(Op2);
4695 
4696  if (Op1Ty != Op2Ty) {
4697  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4698  return nullptr;
4699  }
4700  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4701 
4702  if (Op1Ty.getSizeInBits() >= 128) {
4703  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4704  return nullptr;
4705  }
4706 
4707  // At the moment we just support 64 bit vector concats.
4708  if (Op1Ty.getSizeInBits() != 64) {
4709  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4710  return nullptr;
4711  }
4712 
4713  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4714  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4715  const TargetRegisterClass *DstRC =
4716  getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4717 
4718  MachineInstr *WidenedOp1 =
4719  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4720  MachineInstr *WidenedOp2 =
4721  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4722  if (!WidenedOp1 || !WidenedOp2) {
4723  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4724  return nullptr;
4725  }
4726 
4727  // Now do the insert of the upper element.
4728  unsigned InsertOpc, InsSubRegIdx;
4729  std::tie(InsertOpc, InsSubRegIdx) =
4730  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4731 
4732  if (!Dst)
4733  Dst = MRI.createVirtualRegister(DstRC);
4734  auto InsElt =
4735  MIRBuilder
4736  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4737  .addImm(1) /* Lane index */
4738  .addUse(WidenedOp2->getOperand(0).getReg())
4739  .addImm(0);
4740  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4741  return &*InsElt;
4742 }
4743 
4744 MachineInstr *
4745 AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4746  Register Src2, AArch64CC::CondCode Pred,
4747  MachineIRBuilder &MIRBuilder) const {
4748  auto &MRI = *MIRBuilder.getMRI();
4749  const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4750  // If we used a register class, then this won't necessarily have an LLT.
4751  // Compute the size based off whether or not we have a class or bank.
4752  unsigned Size;
4753  if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4754  Size = TRI.getRegSizeInBits(*RC);
4755  else
4756  Size = MRI.getType(Dst).getSizeInBits();
4757  // Some opcodes use s1.
4758  assert(Size <= 64 && "Expected 64 bits or less only!");
4759  static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4760  unsigned Opc = OpcTable[Size == 64];
4761  auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4763  return &*CSINC;
4764 }
4765 
4766 std::pair<MachineInstr *, AArch64CC::CondCode>
4767 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4770  MachineIRBuilder &MIRBuilder) const {
4771  switch (Opcode) {
4772  default:
4773  llvm_unreachable("Unexpected opcode!");
4774  case TargetOpcode::G_SADDO:
4775  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4776  case TargetOpcode::G_UADDO:
4777  return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4778  case TargetOpcode::G_SSUBO:
4779  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4780  case TargetOpcode::G_USUBO:
4781  return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4782  }
4783 }
4784 
4785 /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4786 /// expressed as a conjunction.
4787 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
4788 /// changing the conditions on the CMP tests.
4789 /// (this means we can call emitConjunctionRec() with
4790 /// Negate==true on this sub-tree)
4791 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
4792 /// cannot do the negation naturally. We are required to
4793 /// emit the subtree first in this case.
4794 /// \param WillNegate Is true if are called when the result of this
4795 /// subexpression must be negated. This happens when the
4796 /// outer expression is an OR. We can use this fact to know
4797 /// that we have a double negation (or (or ...) ...) that
4798 /// can be implemented for free.
4799 static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4800  bool WillNegate, MachineRegisterInfo &MRI,
4801  unsigned Depth = 0) {
4802  if (!MRI.hasOneNonDBGUse(Val))
4803  return false;
4804  MachineInstr *ValDef = MRI.getVRegDef(Val);
4805  unsigned Opcode = ValDef->getOpcode();
4806  if (isa<GAnyCmp>(ValDef)) {
4807  CanNegate = true;
4808  MustBeFirst = false;
4809  return true;
4810  }
4811  // Protect against exponential runtime and stack overflow.
4812  if (Depth > 6)
4813  return false;
4814  if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4815  bool IsOR = Opcode == TargetOpcode::G_OR;
4816  Register O0 = ValDef->getOperand(1).getReg();
4817  Register O1 = ValDef->getOperand(2).getReg();
4818  bool CanNegateL;
4819  bool MustBeFirstL;
4820  if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4821  return false;
4822  bool CanNegateR;
4823  bool MustBeFirstR;
4824  if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4825  return false;
4826 
4827  if (MustBeFirstL && MustBeFirstR)
4828  return false;
4829 
4830  if (IsOR) {
4831  // For an OR expression we need to be able to naturally negate at least
4832  // one side or we cannot do the transformation at all.
4833  if (!CanNegateL && !CanNegateR)
4834  return false;
4835  // If we the result of the OR will be negated and we can naturally negate
4836  // the leaves, then this sub-tree as a whole negates naturally.
4837  CanNegate = WillNegate && CanNegateL && CanNegateR;
4838  // If we cannot naturally negate the whole sub-tree, then this must be
4839  // emitted first.
4840  MustBeFirst = !CanNegate;
4841  } else {
4842  assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4843  // We cannot naturally negate an AND operation.
4844  CanNegate = false;
4845  MustBeFirst = MustBeFirstL || MustBeFirstR;
4846  }
4847  return true;
4848  }
4849  return false;
4850 }
4851 
4855  MachineIRBuilder &MIB) const {
4856  // TODO: emit CMN as an optimization.
4857  auto &MRI = *MIB.getMRI();
4858  LLT OpTy = MRI.getType(LHS);
4859  assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4860  unsigned CCmpOpc;
4862  if (CmpInst::isIntPredicate(CC)) {
4864  if (C && C->Value.ult(32))
4865  CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4866  else
4867  CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4868  } else {
4869  switch (OpTy.getSizeInBits()) {
4870  case 16:
4871  CCmpOpc = AArch64::FCCMPHrr;
4872  break;
4873  case 32:
4874  CCmpOpc = AArch64::FCCMPSrr;
4875  break;
4876  case 64:
4877  CCmpOpc = AArch64::FCCMPDrr;
4878  break;
4879  default:
4880  return nullptr;
4881  }
4882  }
4884  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4885  auto CCmp =
4886  MIB.buildInstr(CCmpOpc, {}, {LHS});
4887  if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4888  CCmp.addImm(C->Value.getZExtValue());
4889  else
4890  CCmp.addReg(RHS);
4891  CCmp.addImm(NZCV).addImm(Predicate);
4893  return &*CCmp;
4894 }
4895 
4897  Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4899  // We're at a tree leaf, produce a conditional comparison operation.
4900  auto &MRI = *MIB.getMRI();
4901  MachineInstr *ValDef = MRI.getVRegDef(Val);
4902  unsigned Opcode = ValDef->getOpcode();
4903  if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4904  Register LHS = Cmp->getLHSReg();
4905  Register RHS = Cmp->getRHSReg();
4906  CmpInst::Predicate CC = Cmp->getCond();
4907  if (Negate)
4908  CC = CmpInst::getInversePredicate(CC);
4909  if (isa<GICmp>(Cmp)) {
4910  OutCC = changeICMPPredToAArch64CC(CC);
4911  } else {
4912  // Handle special FP cases.
4913  AArch64CC::CondCode ExtraCC;
4914  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4915  // Some floating point conditions can't be tested with a single condition
4916  // code. Construct an additional comparison in this case.
4917  if (ExtraCC != AArch64CC::AL) {
4918  MachineInstr *ExtraCmp;
4919  if (!CCOp)
4920  ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4921  else
4922  ExtraCmp =
4923  emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4924  CCOp = ExtraCmp->getOperand(0).getReg();
4925  Predicate = ExtraCC;
4926  }
4927  }
4928 
4929  // Produce a normal comparison if we are first in the chain
4930  if (!CCOp) {
4931  auto Dst = MRI.cloneVirtualRegister(LHS);
4932  if (isa<GICmp>(Cmp))
4933  return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4934  return emitFPCompare(Cmp->getOperand(2).getReg(),
4935  Cmp->getOperand(3).getReg(), MIB);
4936  }
4937  // Otherwise produce a ccmp.
4938  return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4939  }
4940  assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4941 
4942  bool IsOR = Opcode == TargetOpcode::G_OR;
4943 
4944  Register LHS = ValDef->getOperand(1).getReg();
4945  bool CanNegateL;
4946  bool MustBeFirstL;
4947  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4948