LLVM  9.0.0svn
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
21 #include "llvm/ADT/Optional.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "aarch64-isel"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
47 
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51  const AArch64Subtarget &STI,
52  const AArch64RegisterBankInfo &RBI);
53 
54  bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
55  static const char *getName() { return DEBUG_TYPE; }
56 
57 private:
58  /// tblgen-erated 'select' implementation, used as the initial selector for
59  /// the patterns that don't require complex C++.
60  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
61 
62  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63  MachineRegisterInfo &MRI) const;
64  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65  MachineRegisterInfo &MRI) const;
66 
67  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68  MachineRegisterInfo &MRI) const;
69 
70  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72 
73  // Helper to generate an equivalent of scalar_to_vector into a new register,
74  // returned via 'Dst'.
75  MachineInstr *emitScalarToVector(unsigned EltSize,
76  const TargetRegisterClass *DstRC,
77  unsigned Scalar,
78  MachineIRBuilder &MIRBuilder) const;
79 
80  /// Emit a lane insert into \p DstReg, or a new vector register if None is
81  /// provided.
82  ///
83  /// The lane inserted into is defined by \p LaneIdx. The vector source
84  /// register is given by \p SrcReg. The register containing the element is
85  /// given by \p EltReg.
86  MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
87  unsigned EltReg, unsigned LaneIdx,
88  const RegisterBank &RB,
89  MachineIRBuilder &MIRBuilder) const;
90  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
91  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
94 
95  void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
96  SmallVectorImpl<Optional<int>> &Idxs) const;
97  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
98  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
99  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
100  bool selectSplitVectorUnmerge(MachineInstr &I,
101  MachineRegisterInfo &MRI) const;
102  bool selectIntrinsicWithSideEffects(MachineInstr &I,
103  MachineRegisterInfo &MRI) const;
104  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
105  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
106  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
107  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
108  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
109  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
110  MachineIRBuilder &MIRBuilder) const;
111 
112  // Emit a vector concat operation.
113  MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
114  unsigned Op2,
115  MachineIRBuilder &MIRBuilder) const;
116  MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
117  const RegisterBank &DstRB, LLT ScalarTy,
118  unsigned VecReg, unsigned LaneIdx,
119  MachineIRBuilder &MIRBuilder) const;
120 
121  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
122  /// materialized using a FMOV instruction, then update MI and return it.
123  /// Otherwise, do nothing and return a nullptr.
124  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
125  MachineRegisterInfo &MRI) const;
126 
127  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
128 
129  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
130  unsigned Size) const;
131 
132  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
133  return selectAddrModeUnscaled(Root, 1);
134  }
135  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
136  return selectAddrModeUnscaled(Root, 2);
137  }
138  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
139  return selectAddrModeUnscaled(Root, 4);
140  }
141  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
142  return selectAddrModeUnscaled(Root, 8);
143  }
144  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
145  return selectAddrModeUnscaled(Root, 16);
146  }
147 
148  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
149  unsigned Size) const;
150  template <int Width>
151  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
152  return selectAddrModeIndexed(Root, Width / 8);
153  }
154 
155  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
156 
157  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
158  void materializeLargeCMVal(MachineInstr &I, const Value *V,
159  unsigned char OpFlags) const;
160 
161  // Optimization methods.
162 
163  // Helper function to check if a reg def is an MI with a given opcode and
164  // returns it if so.
165  MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
166  MachineIRBuilder &MIB) const {
167  auto *Def = MIB.getMRI()->getVRegDef(Reg);
168  if (!Def || Def->getOpcode() != Opc)
169  return nullptr;
170  return Def;
171  }
172 
173  bool tryOptVectorShuffle(MachineInstr &I) const;
174  bool tryOptVectorDup(MachineInstr &MI) const;
175  bool tryOptSelect(MachineInstr &MI) const;
176 
177  const AArch64TargetMachine &TM;
178  const AArch64Subtarget &STI;
179  const AArch64InstrInfo &TII;
180  const AArch64RegisterInfo &TRI;
181  const AArch64RegisterBankInfo &RBI;
182 
183 #define GET_GLOBALISEL_PREDICATES_DECL
184 #include "AArch64GenGlobalISel.inc"
185 #undef GET_GLOBALISEL_PREDICATES_DECL
186 
187 // We declare the temporaries used by selectImpl() in the class to minimize the
188 // cost of constructing placeholder values.
189 #define GET_GLOBALISEL_TEMPORARIES_DECL
190 #include "AArch64GenGlobalISel.inc"
191 #undef GET_GLOBALISEL_TEMPORARIES_DECL
192 };
193 
194 } // end anonymous namespace
195 
196 #define GET_GLOBALISEL_IMPL
197 #include "AArch64GenGlobalISel.inc"
198 #undef GET_GLOBALISEL_IMPL
199 
200 AArch64InstructionSelector::AArch64InstructionSelector(
201  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
202  const AArch64RegisterBankInfo &RBI)
203  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
204  TRI(*STI.getRegisterInfo()), RBI(RBI),
206 #include "AArch64GenGlobalISel.inc"
209 #include "AArch64GenGlobalISel.inc"
211 {
212 }
213 
214 // FIXME: This should be target-independent, inferred from the types declared
215 // for each class in the bank.
216 static const TargetRegisterClass *
217 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
218  const RegisterBankInfo &RBI,
219  bool GetAllRegSet = false) {
220  if (RB.getID() == AArch64::GPRRegBankID) {
221  if (Ty.getSizeInBits() <= 32)
222  return GetAllRegSet ? &AArch64::GPR32allRegClass
223  : &AArch64::GPR32RegClass;
224  if (Ty.getSizeInBits() == 64)
225  return GetAllRegSet ? &AArch64::GPR64allRegClass
226  : &AArch64::GPR64RegClass;
227  return nullptr;
228  }
229 
230  if (RB.getID() == AArch64::FPRRegBankID) {
231  if (Ty.getSizeInBits() <= 16)
232  return &AArch64::FPR16RegClass;
233  if (Ty.getSizeInBits() == 32)
234  return &AArch64::FPR32RegClass;
235  if (Ty.getSizeInBits() == 64)
236  return &AArch64::FPR64RegClass;
237  if (Ty.getSizeInBits() == 128)
238  return &AArch64::FPR128RegClass;
239  return nullptr;
240  }
241 
242  return nullptr;
243 }
244 
245 /// Given a register bank, and size in bits, return the smallest register class
246 /// that can represent that combination.
247 static const TargetRegisterClass *
248 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
249  bool GetAllRegSet = false) {
250  unsigned RegBankID = RB.getID();
251 
252  if (RegBankID == AArch64::GPRRegBankID) {
253  if (SizeInBits <= 32)
254  return GetAllRegSet ? &AArch64::GPR32allRegClass
255  : &AArch64::GPR32RegClass;
256  if (SizeInBits == 64)
257  return GetAllRegSet ? &AArch64::GPR64allRegClass
258  : &AArch64::GPR64RegClass;
259  }
260 
261  if (RegBankID == AArch64::FPRRegBankID) {
262  switch (SizeInBits) {
263  default:
264  return nullptr;
265  case 8:
266  return &AArch64::FPR8RegClass;
267  case 16:
268  return &AArch64::FPR16RegClass;
269  case 32:
270  return &AArch64::FPR32RegClass;
271  case 64:
272  return &AArch64::FPR64RegClass;
273  case 128:
274  return &AArch64::FPR128RegClass;
275  }
276  }
277 
278  return nullptr;
279 }
280 
281 /// Returns the correct subregister to use for a given register class.
283  const TargetRegisterInfo &TRI, unsigned &SubReg) {
284  switch (TRI.getRegSizeInBits(*RC)) {
285  case 8:
286  SubReg = AArch64::bsub;
287  break;
288  case 16:
289  SubReg = AArch64::hsub;
290  break;
291  case 32:
292  if (RC == &AArch64::GPR32RegClass)
293  SubReg = AArch64::sub_32;
294  else
295  SubReg = AArch64::ssub;
296  break;
297  case 64:
298  SubReg = AArch64::dsub;
299  break;
300  default:
301  LLVM_DEBUG(
302  dbgs() << "Couldn't find appropriate subregister for register class.");
303  return false;
304  }
305 
306  return true;
307 }
308 
309 /// Check whether \p I is a currently unsupported binary operation:
310 /// - it has an unsized type
311 /// - an operand is not a vreg
312 /// - all operands are not in the same bank
313 /// These are checks that should someday live in the verifier, but right now,
314 /// these are mostly limitations of the aarch64 selector.
315 static bool unsupportedBinOp(const MachineInstr &I,
316  const AArch64RegisterBankInfo &RBI,
317  const MachineRegisterInfo &MRI,
318  const AArch64RegisterInfo &TRI) {
319  LLT Ty = MRI.getType(I.getOperand(0).getReg());
320  if (!Ty.isValid()) {
321  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
322  return true;
323  }
324 
325  const RegisterBank *PrevOpBank = nullptr;
326  for (auto &MO : I.operands()) {
327  // FIXME: Support non-register operands.
328  if (!MO.isReg()) {
329  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
330  return true;
331  }
332 
333  // FIXME: Can generic operations have physical registers operands? If
334  // so, this will need to be taught about that, and we'll need to get the
335  // bank out of the minimal class for the register.
336  // Either way, this needs to be documented (and possibly verified).
337  if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
338  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
339  return true;
340  }
341 
342  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
343  if (!OpBank) {
344  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
345  return true;
346  }
347 
348  if (PrevOpBank && OpBank != PrevOpBank) {
349  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
350  return true;
351  }
352  PrevOpBank = OpBank;
353  }
354  return false;
355 }
356 
357 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
358 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
359 /// and of size \p OpSize.
360 /// \returns \p GenericOpc if the combination is unsupported.
361 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
362  unsigned OpSize) {
363  switch (RegBankID) {
364  case AArch64::GPRRegBankID:
365  if (OpSize == 32) {
366  switch (GenericOpc) {
367  case TargetOpcode::G_SHL:
368  return AArch64::LSLVWr;
369  case TargetOpcode::G_LSHR:
370  return AArch64::LSRVWr;
371  case TargetOpcode::G_ASHR:
372  return AArch64::ASRVWr;
373  default:
374  return GenericOpc;
375  }
376  } else if (OpSize == 64) {
377  switch (GenericOpc) {
378  case TargetOpcode::G_GEP:
379  return AArch64::ADDXrr;
380  case TargetOpcode::G_SHL:
381  return AArch64::LSLVXr;
382  case TargetOpcode::G_LSHR:
383  return AArch64::LSRVXr;
384  case TargetOpcode::G_ASHR:
385  return AArch64::ASRVXr;
386  default:
387  return GenericOpc;
388  }
389  }
390  break;
391  case AArch64::FPRRegBankID:
392  switch (OpSize) {
393  case 32:
394  switch (GenericOpc) {
395  case TargetOpcode::G_FADD:
396  return AArch64::FADDSrr;
397  case TargetOpcode::G_FSUB:
398  return AArch64::FSUBSrr;
399  case TargetOpcode::G_FMUL:
400  return AArch64::FMULSrr;
401  case TargetOpcode::G_FDIV:
402  return AArch64::FDIVSrr;
403  default:
404  return GenericOpc;
405  }
406  case 64:
407  switch (GenericOpc) {
408  case TargetOpcode::G_FADD:
409  return AArch64::FADDDrr;
410  case TargetOpcode::G_FSUB:
411  return AArch64::FSUBDrr;
412  case TargetOpcode::G_FMUL:
413  return AArch64::FMULDrr;
414  case TargetOpcode::G_FDIV:
415  return AArch64::FDIVDrr;
416  case TargetOpcode::G_OR:
417  return AArch64::ORRv8i8;
418  default:
419  return GenericOpc;
420  }
421  }
422  break;
423  }
424  return GenericOpc;
425 }
426 
427 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
428 /// appropriate for the (value) register bank \p RegBankID and of memory access
429 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
430 /// addressing mode (e.g., LDRXui).
431 /// \returns \p GenericOpc if the combination is unsupported.
432 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
433  unsigned OpSize) {
434  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
435  switch (RegBankID) {
436  case AArch64::GPRRegBankID:
437  switch (OpSize) {
438  case 8:
439  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
440  case 16:
441  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
442  case 32:
443  return isStore ? AArch64::STRWui : AArch64::LDRWui;
444  case 64:
445  return isStore ? AArch64::STRXui : AArch64::LDRXui;
446  }
447  break;
448  case AArch64::FPRRegBankID:
449  switch (OpSize) {
450  case 8:
451  return isStore ? AArch64::STRBui : AArch64::LDRBui;
452  case 16:
453  return isStore ? AArch64::STRHui : AArch64::LDRHui;
454  case 32:
455  return isStore ? AArch64::STRSui : AArch64::LDRSui;
456  case 64:
457  return isStore ? AArch64::STRDui : AArch64::LDRDui;
458  }
459  break;
460  }
461  return GenericOpc;
462 }
463 
464 #ifndef NDEBUG
465 /// Helper function that verifies that we have a valid copy at the end of
466 /// selectCopy. Verifies that the source and dest have the expected sizes and
467 /// then returns true.
468 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
469  const MachineRegisterInfo &MRI,
470  const TargetRegisterInfo &TRI,
471  const RegisterBankInfo &RBI) {
472  const unsigned DstReg = I.getOperand(0).getReg();
473  const unsigned SrcReg = I.getOperand(1).getReg();
474  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
475  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
476 
477  // Make sure the size of the source and dest line up.
478  assert(
479  (DstSize == SrcSize ||
480  // Copies are a mean to setup initial types, the number of
481  // bits may not exactly match.
482  (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
483  // Copies are a mean to copy bits around, as long as we are
484  // on the same register class, that's fine. Otherwise, that
485  // means we need some SUBREG_TO_REG or AND & co.
486  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
487  "Copy with different width?!");
488 
489  // Check the size of the destination.
490  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
491  "GPRs cannot get more than 64-bit width values");
492 
493  return true;
494 }
495 #endif
496 
497 /// Helper function for selectCopy. Inserts a subregister copy from
498 /// \p *From to \p *To, linking it up to \p I.
499 ///
500 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
501 ///
502 /// CopyReg (From class) = COPY SrcReg
503 /// SubRegCopy (To class) = COPY CopyReg:SubReg
504 /// Dst = COPY SubRegCopy
506  const RegisterBankInfo &RBI, unsigned SrcReg,
507  const TargetRegisterClass *From,
508  const TargetRegisterClass *To,
509  unsigned SubReg) {
510  MachineIRBuilder MIB(I);
511  auto Copy = MIB.buildCopy({From}, {SrcReg});
512  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
513  .addReg(Copy.getReg(0), 0, SubReg);
514  MachineOperand &RegOp = I.getOperand(1);
515  RegOp.setReg(SubRegCopy.getReg(0));
516 
517  // It's possible that the destination register won't be constrained. Make
518  // sure that happens.
519  if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
520  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
521 
522  return true;
523 }
524 
525 /// Helper function to get the source and destination register classes for a
526 /// copy. Returns a std::pair containing the source register class for the
527 /// copy, and the destination register class for the copy. If a register class
528 /// cannot be determined, then it will be nullptr.
529 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
532  const RegisterBankInfo &RBI) {
533  unsigned DstReg = I.getOperand(0).getReg();
534  unsigned SrcReg = I.getOperand(1).getReg();
535  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
536  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
537  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
538  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
539 
540  // Special casing for cross-bank copies of s1s. We can technically represent
541  // a 1-bit value with any size of register. The minimum size for a GPR is 32
542  // bits. So, we need to put the FPR on 32 bits as well.
543  //
544  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
545  // then we can pull it into the helpers that get the appropriate class for a
546  // register bank. Or make a new helper that carries along some constraint
547  // information.
548  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
549  SrcSize = DstSize = 32;
550 
551  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
552  getMinClassForRegBank(DstRegBank, DstSize, true)};
553 }
554 
557  const RegisterBankInfo &RBI) {
558 
559  unsigned DstReg = I.getOperand(0).getReg();
560  unsigned SrcReg = I.getOperand(1).getReg();
561  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
562  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
563 
564  // Find the correct register classes for the source and destination registers.
565  const TargetRegisterClass *SrcRC;
566  const TargetRegisterClass *DstRC;
567  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
568 
569  if (!DstRC) {
570  LLVM_DEBUG(dbgs() << "Unexpected dest size "
571  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
572  return false;
573  }
574 
575  // A couple helpers below, for making sure that the copy we produce is valid.
576 
577  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
578  // to verify that the src and dst are the same size, since that's handled by
579  // the SUBREG_TO_REG.
580  bool KnownValid = false;
581 
582  // Returns true, or asserts if something we don't expect happens. Instead of
583  // returning true, we return isValidCopy() to ensure that we verify the
584  // result.
585  auto CheckCopy = [&]() {
586  // If we have a bitcast or something, we can't have physical registers.
587  assert(
588  (I.isCopy() ||
589  (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
590  !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
591  "No phys reg on generic operator!");
592  assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
593  (void)KnownValid;
594  return true;
595  };
596 
597  // Is this a copy? If so, then we may need to insert a subregister copy, or
598  // a SUBREG_TO_REG.
599  if (I.isCopy()) {
600  // Yes. Check if there's anything to fix up.
601  if (!SrcRC) {
602  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
603  return false;
604  }
605 
606  // Is this a cross-bank copy?
607  if (DstRegBank.getID() != SrcRegBank.getID()) {
608  // If we're doing a cross-bank copy on different-sized registers, we need
609  // to do a bit more work.
610  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
611  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
612 
613  if (SrcSize > DstSize) {
614  // We're doing a cross-bank copy into a smaller register. We need a
615  // subregister copy. First, get a register class that's on the same bank
616  // as the destination, but the same size as the source.
617  const TargetRegisterClass *SubregRC =
618  getMinClassForRegBank(DstRegBank, SrcSize, true);
619  assert(SubregRC && "Didn't get a register class for subreg?");
620 
621  // Get the appropriate subregister for the destination.
622  unsigned SubReg = 0;
623  if (!getSubRegForClass(DstRC, TRI, SubReg)) {
624  LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
625  return false;
626  }
627 
628  // Now, insert a subregister copy using the new register class.
629  selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
630  return CheckCopy();
631  }
632 
633  else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
634  SrcSize == 16) {
635  // Special case for FPR16 to GPR32.
636  // FIXME: This can probably be generalized like the above case.
637  unsigned PromoteReg =
638  MRI.createVirtualRegister(&AArch64::FPR32RegClass);
639  BuildMI(*I.getParent(), I, I.getDebugLoc(),
640  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
641  .addImm(0)
642  .addUse(SrcReg)
643  .addImm(AArch64::hsub);
644  MachineOperand &RegOp = I.getOperand(1);
645  RegOp.setReg(PromoteReg);
646 
647  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
648  KnownValid = true;
649  }
650  }
651 
652  // If the destination is a physical register, then there's nothing to
653  // change, so we're done.
654  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
655  return CheckCopy();
656  }
657 
658  // No need to constrain SrcReg. It will get constrained when we hit another
659  // of its use or its defs. Copies do not have constraints.
660  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
661  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
662  << " operand\n");
663  return false;
664  }
665  I.setDesc(TII.get(AArch64::COPY));
666  return CheckCopy();
667 }
668 
669 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
670  if (!DstTy.isScalar() || !SrcTy.isScalar())
671  return GenericOpc;
672 
673  const unsigned DstSize = DstTy.getSizeInBits();
674  const unsigned SrcSize = SrcTy.getSizeInBits();
675 
676  switch (DstSize) {
677  case 32:
678  switch (SrcSize) {
679  case 32:
680  switch (GenericOpc) {
681  case TargetOpcode::G_SITOFP:
682  return AArch64::SCVTFUWSri;
683  case TargetOpcode::G_UITOFP:
684  return AArch64::UCVTFUWSri;
685  case TargetOpcode::G_FPTOSI:
686  return AArch64::FCVTZSUWSr;
687  case TargetOpcode::G_FPTOUI:
688  return AArch64::FCVTZUUWSr;
689  default:
690  return GenericOpc;
691  }
692  case 64:
693  switch (GenericOpc) {
694  case TargetOpcode::G_SITOFP:
695  return AArch64::SCVTFUXSri;
696  case TargetOpcode::G_UITOFP:
697  return AArch64::UCVTFUXSri;
698  case TargetOpcode::G_FPTOSI:
699  return AArch64::FCVTZSUWDr;
700  case TargetOpcode::G_FPTOUI:
701  return AArch64::FCVTZUUWDr;
702  default:
703  return GenericOpc;
704  }
705  default:
706  return GenericOpc;
707  }
708  case 64:
709  switch (SrcSize) {
710  case 32:
711  switch (GenericOpc) {
712  case TargetOpcode::G_SITOFP:
713  return AArch64::SCVTFUWDri;
714  case TargetOpcode::G_UITOFP:
715  return AArch64::UCVTFUWDri;
716  case TargetOpcode::G_FPTOSI:
717  return AArch64::FCVTZSUXSr;
718  case TargetOpcode::G_FPTOUI:
719  return AArch64::FCVTZUUXSr;
720  default:
721  return GenericOpc;
722  }
723  case 64:
724  switch (GenericOpc) {
725  case TargetOpcode::G_SITOFP:
726  return AArch64::SCVTFUXDri;
727  case TargetOpcode::G_UITOFP:
728  return AArch64::UCVTFUXDri;
729  case TargetOpcode::G_FPTOSI:
730  return AArch64::FCVTZSUXDr;
731  case TargetOpcode::G_FPTOUI:
732  return AArch64::FCVTZUUXDr;
733  default:
734  return GenericOpc;
735  }
736  default:
737  return GenericOpc;
738  }
739  default:
740  return GenericOpc;
741  };
742  return GenericOpc;
743 }
744 
746  const RegisterBankInfo &RBI) {
748  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
749  AArch64::GPRRegBankID);
750  LLT Ty = MRI.getType(I.getOperand(0).getReg());
751  if (Ty == LLT::scalar(32))
752  return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
753  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
754  return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
755  return 0;
756 }
757 
758 /// Helper function to select the opcode for a G_FCMP.
760  // If this is a compare against +0.0, then we don't have to explicitly
761  // materialize a constant.
762  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
763  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
764  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
765  if (OpSize != 32 && OpSize != 64)
766  return 0;
767  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
768  {AArch64::FCMPSri, AArch64::FCMPDri}};
769  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
770 }
771 
773  switch (P) {
774  default:
775  llvm_unreachable("Unknown condition code!");
776  case CmpInst::ICMP_NE:
777  return AArch64CC::NE;
778  case CmpInst::ICMP_EQ:
779  return AArch64CC::EQ;
780  case CmpInst::ICMP_SGT:
781  return AArch64CC::GT;
782  case CmpInst::ICMP_SGE:
783  return AArch64CC::GE;
784  case CmpInst::ICMP_SLT:
785  return AArch64CC::LT;
786  case CmpInst::ICMP_SLE:
787  return AArch64CC::LE;
788  case CmpInst::ICMP_UGT:
789  return AArch64CC::HI;
790  case CmpInst::ICMP_UGE:
791  return AArch64CC::HS;
792  case CmpInst::ICMP_ULT:
793  return AArch64CC::LO;
794  case CmpInst::ICMP_ULE:
795  return AArch64CC::LS;
796  }
797 }
798 
801  AArch64CC::CondCode &CondCode2) {
802  CondCode2 = AArch64CC::AL;
803  switch (P) {
804  default:
805  llvm_unreachable("Unknown FP condition!");
806  case CmpInst::FCMP_OEQ:
807  CondCode = AArch64CC::EQ;
808  break;
809  case CmpInst::FCMP_OGT:
810  CondCode = AArch64CC::GT;
811  break;
812  case CmpInst::FCMP_OGE:
813  CondCode = AArch64CC::GE;
814  break;
815  case CmpInst::FCMP_OLT:
816  CondCode = AArch64CC::MI;
817  break;
818  case CmpInst::FCMP_OLE:
819  CondCode = AArch64CC::LS;
820  break;
821  case CmpInst::FCMP_ONE:
822  CondCode = AArch64CC::MI;
823  CondCode2 = AArch64CC::GT;
824  break;
825  case CmpInst::FCMP_ORD:
826  CondCode = AArch64CC::VC;
827  break;
828  case CmpInst::FCMP_UNO:
829  CondCode = AArch64CC::VS;
830  break;
831  case CmpInst::FCMP_UEQ:
832  CondCode = AArch64CC::EQ;
833  CondCode2 = AArch64CC::VS;
834  break;
835  case CmpInst::FCMP_UGT:
836  CondCode = AArch64CC::HI;
837  break;
838  case CmpInst::FCMP_UGE:
839  CondCode = AArch64CC::PL;
840  break;
841  case CmpInst::FCMP_ULT:
842  CondCode = AArch64CC::LT;
843  break;
844  case CmpInst::FCMP_ULE:
845  CondCode = AArch64CC::LE;
846  break;
847  case CmpInst::FCMP_UNE:
848  CondCode = AArch64CC::NE;
849  break;
850  }
851 }
852 
853 bool AArch64InstructionSelector::selectCompareBranch(
855 
856  const unsigned CondReg = I.getOperand(0).getReg();
857  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
858  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
859  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
860  CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
861  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
862  return false;
863 
864  unsigned LHS = CCMI->getOperand(2).getReg();
865  unsigned RHS = CCMI->getOperand(3).getReg();
866  if (!getConstantVRegVal(RHS, MRI))
867  std::swap(RHS, LHS);
868 
869  const auto RHSImm = getConstantVRegVal(RHS, MRI);
870  if (!RHSImm || *RHSImm != 0)
871  return false;
872 
873  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
874  if (RB.getID() != AArch64::GPRRegBankID)
875  return false;
876 
877  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
878  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
879  return false;
880 
881  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
882  unsigned CBOpc = 0;
883  if (CmpWidth <= 32)
884  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
885  else if (CmpWidth == 64)
886  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
887  else
888  return false;
889 
890  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
891  .addUse(LHS)
892  .addMBB(DestMBB)
893  .constrainAllUses(TII, TRI, RBI);
894 
895  I.eraseFromParent();
896  return true;
897 }
898 
899 bool AArch64InstructionSelector::selectVectorSHL(
900  MachineInstr &I, MachineRegisterInfo &MRI) const {
901  assert(I.getOpcode() == TargetOpcode::G_SHL);
902  unsigned DstReg = I.getOperand(0).getReg();
903  const LLT Ty = MRI.getType(DstReg);
904  unsigned Src1Reg = I.getOperand(1).getReg();
905  unsigned Src2Reg = I.getOperand(2).getReg();
906 
907  if (!Ty.isVector())
908  return false;
909 
910  unsigned Opc = 0;
911  if (Ty == LLT::vector(4, 32)) {
912  Opc = AArch64::USHLv4i32;
913  } else if (Ty == LLT::vector(2, 32)) {
914  Opc = AArch64::USHLv2i32;
915  } else {
916  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
917  return false;
918  }
919 
920  MachineIRBuilder MIB(I);
921  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
923  I.eraseFromParent();
924  return true;
925 }
926 
927 bool AArch64InstructionSelector::selectVectorASHR(
928  MachineInstr &I, MachineRegisterInfo &MRI) const {
929  assert(I.getOpcode() == TargetOpcode::G_ASHR);
930  unsigned DstReg = I.getOperand(0).getReg();
931  const LLT Ty = MRI.getType(DstReg);
932  unsigned Src1Reg = I.getOperand(1).getReg();
933  unsigned Src2Reg = I.getOperand(2).getReg();
934 
935  if (!Ty.isVector())
936  return false;
937 
938  // There is not a shift right register instruction, but the shift left
939  // register instruction takes a signed value, where negative numbers specify a
940  // right shift.
941 
942  unsigned Opc = 0;
943  unsigned NegOpc = 0;
944  const TargetRegisterClass *RC = nullptr;
945  if (Ty == LLT::vector(4, 32)) {
946  Opc = AArch64::SSHLv4i32;
947  NegOpc = AArch64::NEGv4i32;
948  RC = &AArch64::FPR128RegClass;
949  } else if (Ty == LLT::vector(2, 32)) {
950  Opc = AArch64::SSHLv2i32;
951  NegOpc = AArch64::NEGv2i32;
952  RC = &AArch64::FPR64RegClass;
953  } else {
954  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
955  return false;
956  }
957 
958  MachineIRBuilder MIB(I);
959  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
961  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
963  I.eraseFromParent();
964  return true;
965 }
966 
967 bool AArch64InstructionSelector::selectVaStartAAPCS(
968  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
969  return false;
970 }
971 
972 bool AArch64InstructionSelector::selectVaStartDarwin(
973  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
975  unsigned ListReg = I.getOperand(0).getReg();
976 
977  unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
978 
979  auto MIB =
980  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
981  .addDef(ArgsAddrReg)
982  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
983  .addImm(0)
984  .addImm(0);
985 
987 
988  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
989  .addUse(ArgsAddrReg)
990  .addUse(ListReg)
991  .addImm(0)
993 
995  I.eraseFromParent();
996  return true;
997 }
998 
999 void AArch64InstructionSelector::materializeLargeCMVal(
1000  MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1001  MachineBasicBlock &MBB = *I.getParent();
1002  MachineFunction &MF = *MBB.getParent();
1003  MachineRegisterInfo &MRI = MF.getRegInfo();
1004  MachineIRBuilder MIB(I);
1005 
1006  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1007  MovZ->addOperand(MF, I.getOperand(1));
1008  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1010  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1012 
1013  auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
1014  unsigned ForceDstReg) {
1015  unsigned DstReg = ForceDstReg
1016  ? ForceDstReg
1017  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1018  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1019  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1020  MovI->addOperand(MF, MachineOperand::CreateGA(
1021  GV, MovZ->getOperand(1).getOffset(), Flags));
1022  } else {
1023  MovI->addOperand(
1024  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1025  MovZ->getOperand(1).getOffset(), Flags));
1026  }
1027  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1029  return DstReg;
1030  };
1031  unsigned DstReg = BuildMovK(MovZ.getReg(0),
1033  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1034  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1035  return;
1036 }
1037 
1038 bool AArch64InstructionSelector::select(MachineInstr &I,
1039  CodeGenCoverage &CoverageInfo) const {
1040  assert(I.getParent() && "Instruction should be in a basic block!");
1041  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1042 
1043  MachineBasicBlock &MBB = *I.getParent();
1044  MachineFunction &MF = *MBB.getParent();
1045  MachineRegisterInfo &MRI = MF.getRegInfo();
1046 
1047  unsigned Opcode = I.getOpcode();
1048  // G_PHI requires same handling as PHI
1049  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1050  // Certain non-generic instructions also need some special handling.
1051 
1052  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1053  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1054 
1055  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1056  const unsigned DefReg = I.getOperand(0).getReg();
1057  const LLT DefTy = MRI.getType(DefReg);
1058 
1059  const TargetRegisterClass *DefRC = nullptr;
1060  if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
1061  DefRC = TRI.getRegClass(DefReg);
1062  } else {
1063  const RegClassOrRegBank &RegClassOrBank =
1064  MRI.getRegClassOrRegBank(DefReg);
1065 
1066  DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1067  if (!DefRC) {
1068  if (!DefTy.isValid()) {
1069  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1070  return false;
1071  }
1072  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1073  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1074  if (!DefRC) {
1075  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1076  return false;
1077  }
1078  }
1079  }
1080  I.setDesc(TII.get(TargetOpcode::PHI));
1081 
1082  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1083  }
1084 
1085  if (I.isCopy())
1086  return selectCopy(I, TII, MRI, TRI, RBI);
1087 
1088  return true;
1089  }
1090 
1091 
1092  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1093  LLVM_DEBUG(
1094  dbgs() << "Generic instruction has unexpected implicit operands\n");
1095  return false;
1096  }
1097 
1098  if (selectImpl(I, CoverageInfo))
1099  return true;
1100 
1101  LLT Ty =
1102  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1103 
1104  MachineIRBuilder MIB(I);
1105 
1106  switch (Opcode) {
1107  case TargetOpcode::G_BRCOND: {
1108  if (Ty.getSizeInBits() > 32) {
1109  // We shouldn't need this on AArch64, but it would be implemented as an
1110  // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1111  // bit being tested is < 32.
1112  LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1113  << ", expected at most 32-bits");
1114  return false;
1115  }
1116 
1117  const unsigned CondReg = I.getOperand(0).getReg();
1118  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1119 
1120  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1121  // instructions will not be produced, as they are conditional branch
1122  // instructions that do not set flags.
1123  bool ProduceNonFlagSettingCondBr =
1124  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1125  if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1126  return true;
1127 
1128  if (ProduceNonFlagSettingCondBr) {
1129  auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1130  .addUse(CondReg)
1131  .addImm(/*bit offset=*/0)
1132  .addMBB(DestMBB);
1133 
1134  I.eraseFromParent();
1135  return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1136  } else {
1137  auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1138  .addDef(AArch64::WZR)
1139  .addUse(CondReg)
1140  .addImm(1);
1141  constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1142  auto Bcc =
1143  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1144  .addImm(AArch64CC::EQ)
1145  .addMBB(DestMBB);
1146 
1147  I.eraseFromParent();
1148  return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1149  }
1150  }
1151 
1152  case TargetOpcode::G_BRINDIRECT: {
1153  I.setDesc(TII.get(AArch64::BR));
1154  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1155  }
1156 
1157  case TargetOpcode::G_BSWAP: {
1158  // Handle vector types for G_BSWAP directly.
1159  unsigned DstReg = I.getOperand(0).getReg();
1160  LLT DstTy = MRI.getType(DstReg);
1161 
1162  // We should only get vector types here; everything else is handled by the
1163  // importer right now.
1164  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1165  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1166  return false;
1167  }
1168 
1169  // Only handle 4 and 2 element vectors for now.
1170  // TODO: 16-bit elements.
1171  unsigned NumElts = DstTy.getNumElements();
1172  if (NumElts != 4 && NumElts != 2) {
1173  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1174  return false;
1175  }
1176 
1177  // Choose the correct opcode for the supported types. Right now, that's
1178  // v2s32, v4s32, and v2s64.
1179  unsigned Opc = 0;
1180  unsigned EltSize = DstTy.getElementType().getSizeInBits();
1181  if (EltSize == 32)
1182  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1183  : AArch64::REV32v16i8;
1184  else if (EltSize == 64)
1185  Opc = AArch64::REV64v16i8;
1186 
1187  // We should always get something by the time we get here...
1188  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1189 
1190  I.setDesc(TII.get(Opc));
1191  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1192  }
1193 
1194  case TargetOpcode::G_FCONSTANT:
1195  case TargetOpcode::G_CONSTANT: {
1196  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1197 
1198  const LLT s32 = LLT::scalar(32);
1199  const LLT s64 = LLT::scalar(64);
1200  const LLT p0 = LLT::pointer(0, 64);
1201 
1202  const unsigned DefReg = I.getOperand(0).getReg();
1203  const LLT DefTy = MRI.getType(DefReg);
1204  const unsigned DefSize = DefTy.getSizeInBits();
1205  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1206 
1207  // FIXME: Redundant check, but even less readable when factored out.
1208  if (isFP) {
1209  if (Ty != s32 && Ty != s64) {
1210  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1211  << " constant, expected: " << s32 << " or " << s64
1212  << '\n');
1213  return false;
1214  }
1215 
1216  if (RB.getID() != AArch64::FPRRegBankID) {
1217  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1218  << " constant on bank: " << RB
1219  << ", expected: FPR\n");
1220  return false;
1221  }
1222 
1223  // The case when we have 0.0 is covered by tablegen. Reject it here so we
1224  // can be sure tablegen works correctly and isn't rescued by this code.
1225  if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1226  return false;
1227  } else {
1228  // s32 and s64 are covered by tablegen.
1229  if (Ty != p0) {
1230  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1231  << " constant, expected: " << s32 << ", " << s64
1232  << ", or " << p0 << '\n');
1233  return false;
1234  }
1235 
1236  if (RB.getID() != AArch64::GPRRegBankID) {
1237  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1238  << " constant on bank: " << RB
1239  << ", expected: GPR\n");
1240  return false;
1241  }
1242  }
1243 
1244  const unsigned MovOpc =
1245  DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1246 
1247  if (isFP) {
1248  // Either emit a FMOV, or emit a copy to emit a normal mov.
1249  const TargetRegisterClass &GPRRC =
1250  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1251  const TargetRegisterClass &FPRRC =
1252  DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1253 
1254  // Can we use a FMOV instruction to represent the immediate?
1255  if (emitFMovForFConstant(I, MRI))
1256  return true;
1257 
1258  // Nope. Emit a copy and use a normal mov instead.
1259  const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1260  MachineOperand &RegOp = I.getOperand(0);
1261  RegOp.setReg(DefGPRReg);
1262  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1263  MIB.buildCopy({DefReg}, {DefGPRReg});
1264 
1265  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1266  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1267  return false;
1268  }
1269 
1270  MachineOperand &ImmOp = I.getOperand(1);
1271  // FIXME: Is going through int64_t always correct?
1272  ImmOp.ChangeToImmediate(
1274  } else if (I.getOperand(1).isCImm()) {
1275  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1276  I.getOperand(1).ChangeToImmediate(Val);
1277  } else if (I.getOperand(1).isImm()) {
1278  uint64_t Val = I.getOperand(1).getImm();
1279  I.getOperand(1).ChangeToImmediate(Val);
1280  }
1281 
1282  I.setDesc(TII.get(MovOpc));
1284  return true;
1285  }
1286  case TargetOpcode::G_EXTRACT: {
1287  LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1288  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1289  (void)DstTy;
1290  unsigned SrcSize = SrcTy.getSizeInBits();
1291  // Larger extracts are vectors, same-size extracts should be something else
1292  // by now (either split up or simplified to a COPY).
1293  if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1294  return false;
1295 
1296  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1298  Ty.getSizeInBits() - 1);
1299 
1300  if (SrcSize < 64) {
1301  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1302  "unexpected G_EXTRACT types");
1303  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1304  }
1305 
1306  unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1307  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1308  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1309  .addReg(DstReg, 0, AArch64::sub_32);
1311  AArch64::GPR32RegClass, MRI);
1312  I.getOperand(0).setReg(DstReg);
1313 
1314  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1315  }
1316 
1317  case TargetOpcode::G_INSERT: {
1318  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1319  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1320  unsigned DstSize = DstTy.getSizeInBits();
1321  // Larger inserts are vectors, same-size ones should be something else by
1322  // now (split up or turned into COPYs).
1323  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1324  return false;
1325 
1326  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1327  unsigned LSB = I.getOperand(3).getImm();
1328  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1329  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1330  MachineInstrBuilder(MF, I).addImm(Width - 1);
1331 
1332  if (DstSize < 64) {
1333  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1334  "unexpected G_INSERT types");
1335  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1336  }
1337 
1338  unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1339  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1340  TII.get(AArch64::SUBREG_TO_REG))
1341  .addDef(SrcReg)
1342  .addImm(0)
1343  .addUse(I.getOperand(2).getReg())
1344  .addImm(AArch64::sub_32);
1346  AArch64::GPR32RegClass, MRI);
1347  I.getOperand(2).setReg(SrcReg);
1348 
1349  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1350  }
1351  case TargetOpcode::G_FRAME_INDEX: {
1352  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1353  if (Ty != LLT::pointer(0, 64)) {
1354  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1355  << ", expected: " << LLT::pointer(0, 64) << '\n');
1356  return false;
1357  }
1358  I.setDesc(TII.get(AArch64::ADDXri));
1359 
1360  // MOs for a #0 shifted immediate.
1361  I.addOperand(MachineOperand::CreateImm(0));
1362  I.addOperand(MachineOperand::CreateImm(0));
1363 
1364  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1365  }
1366 
1367  case TargetOpcode::G_GLOBAL_VALUE: {
1368  auto GV = I.getOperand(1).getGlobal();
1369  if (GV->isThreadLocal()) {
1370  // FIXME: we don't support TLS yet.
1371  return false;
1372  }
1373  unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1374  if (OpFlags & AArch64II::MO_GOT) {
1375  I.setDesc(TII.get(AArch64::LOADgot));
1376  I.getOperand(1).setTargetFlags(OpFlags);
1377  } else if (TM.getCodeModel() == CodeModel::Large) {
1378  // Materialize the global using movz/movk instructions.
1379  materializeLargeCMVal(I, GV, OpFlags);
1380  I.eraseFromParent();
1381  return true;
1382  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1383  I.setDesc(TII.get(AArch64::ADR));
1384  I.getOperand(1).setTargetFlags(OpFlags);
1385  } else {
1386  I.setDesc(TII.get(AArch64::MOVaddr));
1388  MachineInstrBuilder MIB(MF, I);
1389  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1391  }
1392  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1393  }
1394 
1395  case TargetOpcode::G_ZEXTLOAD:
1396  case TargetOpcode::G_LOAD:
1397  case TargetOpcode::G_STORE: {
1398  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1399  MachineIRBuilder MIB(I);
1400 
1401  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1402 
1403  if (PtrTy != LLT::pointer(0, 64)) {
1404  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1405  << ", expected: " << LLT::pointer(0, 64) << '\n');
1406  return false;
1407  }
1408 
1409  auto &MemOp = **I.memoperands_begin();
1410  if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1411  LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1412  return false;
1413  }
1414  unsigned MemSizeInBits = MemOp.getSize() * 8;
1415 
1416  const unsigned PtrReg = I.getOperand(1).getReg();
1417 #ifndef NDEBUG
1418  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1419  // Sanity-check the pointer register.
1420  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1421  "Load/Store pointer operand isn't a GPR");
1422  assert(MRI.getType(PtrReg).isPointer() &&
1423  "Load/Store pointer operand isn't a pointer");
1424 #endif
1425 
1426  const unsigned ValReg = I.getOperand(0).getReg();
1427  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1428 
1429  const unsigned NewOpc =
1430  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1431  if (NewOpc == I.getOpcode())
1432  return false;
1433 
1434  I.setDesc(TII.get(NewOpc));
1435 
1436  uint64_t Offset = 0;
1437  auto *PtrMI = MRI.getVRegDef(PtrReg);
1438 
1439  // Try to fold a GEP into our unsigned immediate addressing mode.
1440  if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1441  if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1442  int64_t Imm = *COff;
1443  const unsigned Size = MemSizeInBits / 8;
1444  const unsigned Scale = Log2_32(Size);
1445  if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1446  unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1447  I.getOperand(1).setReg(Ptr2Reg);
1448  PtrMI = MRI.getVRegDef(Ptr2Reg);
1449  Offset = Imm / Size;
1450  }
1451  }
1452  }
1453 
1454  // If we haven't folded anything into our addressing mode yet, try to fold
1455  // a frame index into the base+offset.
1456  if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1457  I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1458 
1459  I.addOperand(MachineOperand::CreateImm(Offset));
1460 
1461  // If we're storing a 0, use WZR/XZR.
1462  if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1463  if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1464  if (I.getOpcode() == AArch64::STRWui)
1465  I.getOperand(0).setReg(AArch64::WZR);
1466  else if (I.getOpcode() == AArch64::STRXui)
1467  I.getOperand(0).setReg(AArch64::XZR);
1468  }
1469  }
1470 
1471  if (IsZExtLoad) {
1472  // The zextload from a smaller type to i32 should be handled by the importer.
1473  if (MRI.getType(ValReg).getSizeInBits() != 64)
1474  return false;
1475  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1476  //and zero_extend with SUBREG_TO_REG.
1477  unsigned LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1478  unsigned DstReg = I.getOperand(0).getReg();
1479  I.getOperand(0).setReg(LdReg);
1480 
1481  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1482  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1483  .addImm(0)
1484  .addUse(LdReg)
1485  .addImm(AArch64::sub_32);
1487  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1488  MRI);
1489  }
1490  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1491  }
1492 
1493  case TargetOpcode::G_SMULH:
1494  case TargetOpcode::G_UMULH: {
1495  // Reject the various things we don't support yet.
1496  if (unsupportedBinOp(I, RBI, MRI, TRI))
1497  return false;
1498 
1499  const unsigned DefReg = I.getOperand(0).getReg();
1500  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1501 
1502  if (RB.getID() != AArch64::GPRRegBankID) {
1503  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1504  return false;
1505  }
1506 
1507  if (Ty != LLT::scalar(64)) {
1508  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1509  << ", expected: " << LLT::scalar(64) << '\n');
1510  return false;
1511  }
1512 
1513  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1514  : AArch64::UMULHrr;
1515  I.setDesc(TII.get(NewOpc));
1516 
1517  // Now that we selected an opcode, we need to constrain the register
1518  // operands to use appropriate classes.
1519  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1520  }
1521  case TargetOpcode::G_FADD:
1522  case TargetOpcode::G_FSUB:
1523  case TargetOpcode::G_FMUL:
1524  case TargetOpcode::G_FDIV:
1525 
1526  case TargetOpcode::G_ASHR:
1527  if (MRI.getType(I.getOperand(0).getReg()).isVector())
1528  return selectVectorASHR(I, MRI);
1530  case TargetOpcode::G_SHL:
1531  if (Opcode == TargetOpcode::G_SHL &&
1532  MRI.getType(I.getOperand(0).getReg()).isVector())
1533  return selectVectorSHL(I, MRI);
1535  case TargetOpcode::G_OR:
1536  case TargetOpcode::G_LSHR:
1537  case TargetOpcode::G_GEP: {
1538  // Reject the various things we don't support yet.
1539  if (unsupportedBinOp(I, RBI, MRI, TRI))
1540  return false;
1541 
1542  const unsigned OpSize = Ty.getSizeInBits();
1543 
1544  const unsigned DefReg = I.getOperand(0).getReg();
1545  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1546 
1547  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1548  if (NewOpc == I.getOpcode())
1549  return false;
1550 
1551  I.setDesc(TII.get(NewOpc));
1552  // FIXME: Should the type be always reset in setDesc?
1553 
1554  // Now that we selected an opcode, we need to constrain the register
1555  // operands to use appropriate classes.
1556  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1557  }
1558 
1559  case TargetOpcode::G_UADDO: {
1560  // TODO: Support other types.
1561  unsigned OpSize = Ty.getSizeInBits();
1562  if (OpSize != 32 && OpSize != 64) {
1563  LLVM_DEBUG(
1564  dbgs()
1565  << "G_UADDO currently only supported for 32 and 64 b types.\n");
1566  return false;
1567  }
1568 
1569  // TODO: Support vectors.
1570  if (Ty.isVector()) {
1571  LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1572  return false;
1573  }
1574 
1575  // Add and set the set condition flag.
1576  unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1577  MachineIRBuilder MIRBuilder(I);
1578  auto AddsMI = MIRBuilder.buildInstr(
1579  AddsOpc, {I.getOperand(0).getReg()},
1580  {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1581  constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1582 
1583  // Now, put the overflow result in the register given by the first operand
1584  // to the G_UADDO. CSINC increments the result when the predicate is false,
1585  // so to get the increment when it's true, we need to use the inverse. In
1586  // this case, we want to increment when carry is set.
1587  auto CsetMI = MIRBuilder
1588  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1589  {AArch64::WZR, AArch64::WZR})
1591  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1592  I.eraseFromParent();
1593  return true;
1594  }
1595 
1596  case TargetOpcode::G_PTR_MASK: {
1597  uint64_t Align = I.getOperand(2).getImm();
1598  if (Align >= 64 || Align == 0)
1599  return false;
1600 
1601  uint64_t Mask = ~((1ULL << Align) - 1);
1602  I.setDesc(TII.get(AArch64::ANDXri));
1604 
1605  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1606  }
1607  case TargetOpcode::G_PTRTOINT:
1608  case TargetOpcode::G_TRUNC: {
1609  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1610  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1611 
1612  const unsigned DstReg = I.getOperand(0).getReg();
1613  const unsigned SrcReg = I.getOperand(1).getReg();
1614 
1615  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1616  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1617 
1618  if (DstRB.getID() != SrcRB.getID()) {
1619  LLVM_DEBUG(
1620  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1621  return false;
1622  }
1623 
1624  if (DstRB.getID() == AArch64::GPRRegBankID) {
1625  const TargetRegisterClass *DstRC =
1626  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1627  if (!DstRC)
1628  return false;
1629 
1630  const TargetRegisterClass *SrcRC =
1631  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1632  if (!SrcRC)
1633  return false;
1634 
1635  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1636  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1637  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1638  return false;
1639  }
1640 
1641  if (DstRC == SrcRC) {
1642  // Nothing to be done
1643  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1644  SrcTy == LLT::scalar(64)) {
1645  llvm_unreachable("TableGen can import this case");
1646  return false;
1647  } else if (DstRC == &AArch64::GPR32RegClass &&
1648  SrcRC == &AArch64::GPR64RegClass) {
1649  I.getOperand(1).setSubReg(AArch64::sub_32);
1650  } else {
1651  LLVM_DEBUG(
1652  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1653  return false;
1654  }
1655 
1656  I.setDesc(TII.get(TargetOpcode::COPY));
1657  return true;
1658  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1659  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1660  I.setDesc(TII.get(AArch64::XTNv4i16));
1662  return true;
1663  }
1664  }
1665 
1666  return false;
1667  }
1668 
1669  case TargetOpcode::G_ANYEXT: {
1670  const unsigned DstReg = I.getOperand(0).getReg();
1671  const unsigned SrcReg = I.getOperand(1).getReg();
1672 
1673  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1674  if (RBDst.getID() != AArch64::GPRRegBankID) {
1675  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1676  << ", expected: GPR\n");
1677  return false;
1678  }
1679 
1680  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1681  if (RBSrc.getID() != AArch64::GPRRegBankID) {
1682  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1683  << ", expected: GPR\n");
1684  return false;
1685  }
1686 
1687  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1688 
1689  if (DstSize == 0) {
1690  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
1691  return false;
1692  }
1693 
1694  if (DstSize != 64 && DstSize > 32) {
1695  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1696  << ", expected: 32 or 64\n");
1697  return false;
1698  }
1699  // At this point G_ANYEXT is just like a plain COPY, but we need
1700  // to explicitly form the 64-bit value if any.
1701  if (DstSize > 32) {
1702  unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1703  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1704  .addDef(ExtSrc)
1705  .addImm(0)
1706  .addUse(SrcReg)
1707  .addImm(AArch64::sub_32);
1708  I.getOperand(1).setReg(ExtSrc);
1709  }
1710  return selectCopy(I, TII, MRI, TRI, RBI);
1711  }
1712 
1713  case TargetOpcode::G_ZEXT:
1714  case TargetOpcode::G_SEXT: {
1715  unsigned Opcode = I.getOpcode();
1716  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1717  SrcTy = MRI.getType(I.getOperand(1).getReg());
1718  const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1719  const unsigned DefReg = I.getOperand(0).getReg();
1720  const unsigned SrcReg = I.getOperand(1).getReg();
1721  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1722 
1723  if (RB.getID() != AArch64::GPRRegBankID) {
1724  LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1725  << ", expected: GPR\n");
1726  return false;
1727  }
1728 
1729  MachineInstr *ExtI;
1730  if (DstTy == LLT::scalar(64)) {
1731  // FIXME: Can we avoid manually doing this?
1732  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
1733  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1734  << " operand\n");
1735  return false;
1736  }
1737 
1738  const unsigned SrcXReg =
1739  MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1740  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1741  .addDef(SrcXReg)
1742  .addImm(0)
1743  .addUse(SrcReg)
1744  .addImm(AArch64::sub_32);
1745 
1746  const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1747  ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1748  .addDef(DefReg)
1749  .addUse(SrcXReg)
1750  .addImm(0)
1751  .addImm(SrcTy.getSizeInBits() - 1);
1752  } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
1753  const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1754  ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1755  .addDef(DefReg)
1756  .addUse(SrcReg)
1757  .addImm(0)
1758  .addImm(SrcTy.getSizeInBits() - 1);
1759  } else {
1760  return false;
1761  }
1762 
1764 
1765  I.eraseFromParent();
1766  return true;
1767  }
1768 
1769  case TargetOpcode::G_SITOFP:
1770  case TargetOpcode::G_UITOFP:
1771  case TargetOpcode::G_FPTOSI:
1772  case TargetOpcode::G_FPTOUI: {
1773  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1774  SrcTy = MRI.getType(I.getOperand(1).getReg());
1775  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1776  if (NewOpc == Opcode)
1777  return false;
1778 
1779  I.setDesc(TII.get(NewOpc));
1781 
1782  return true;
1783  }
1784 
1785 
1786  case TargetOpcode::G_INTTOPTR:
1787  // The importer is currently unable to import pointer types since they
1788  // didn't exist in SelectionDAG.
1789  return selectCopy(I, TII, MRI, TRI, RBI);
1790 
1791  case TargetOpcode::G_BITCAST:
1792  // Imported SelectionDAG rules can handle every bitcast except those that
1793  // bitcast from a type to the same type. Ideally, these shouldn't occur
1794  // but we might not run an optimizer that deletes them. The other exception
1795  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1796  // of them.
1797  return selectCopy(I, TII, MRI, TRI, RBI);
1798 
1799  case TargetOpcode::G_SELECT: {
1800  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
1801  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1802  << ", expected: " << LLT::scalar(1) << '\n');
1803  return false;
1804  }
1805 
1806  const unsigned CondReg = I.getOperand(1).getReg();
1807  const unsigned TReg = I.getOperand(2).getReg();
1808  const unsigned FReg = I.getOperand(3).getReg();
1809 
1810  // If we have a floating-point result, then we should use a floating point
1811  // select instead of an integer select.
1812  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
1813  AArch64::GPRRegBankID);
1814 
1815  if (IsFP && tryOptSelect(I))
1816  return true;
1817 
1818  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
1819  MachineInstr &TstMI =
1820  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1821  .addDef(AArch64::WZR)
1822  .addUse(CondReg)
1824 
1825  MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1826  .addDef(I.getOperand(0).getReg())
1827  .addUse(TReg)
1828  .addUse(FReg)
1830 
1832  constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1833 
1834  I.eraseFromParent();
1835  return true;
1836  }
1837  case TargetOpcode::G_ICMP: {
1838  if (Ty.isVector())
1839  return selectVectorICmp(I, MRI);
1840 
1841  if (Ty != LLT::scalar(32)) {
1842  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1843  << ", expected: " << LLT::scalar(32) << '\n');
1844  return false;
1845  }
1846 
1847  unsigned CmpOpc = 0;
1848  unsigned ZReg = 0;
1849 
1850  LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1851  if (CmpTy == LLT::scalar(32)) {
1852  CmpOpc = AArch64::SUBSWrr;
1853  ZReg = AArch64::WZR;
1854  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1855  CmpOpc = AArch64::SUBSXrr;
1856  ZReg = AArch64::XZR;
1857  } else {
1858  return false;
1859  }
1860 
1861  // Try to match immediate forms.
1862  auto ImmFns = selectArithImmed(I.getOperand(3));
1863  if (ImmFns)
1864  CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
1865 
1866  // CSINC increments the result by one when the condition code is false.
1867  // Therefore, we have to invert the predicate to get an increment by 1 when
1868  // the predicate is true.
1869  const AArch64CC::CondCode invCC =
1870  changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1872 
1873  auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1874  .addDef(ZReg)
1875  .addUse(I.getOperand(2).getReg());
1876 
1877  // If we matched a valid constant immediate, add those operands.
1878  if (ImmFns) {
1879  for (auto &RenderFn : *ImmFns)
1880  RenderFn(CmpMI);
1881  } else {
1882  CmpMI.addUse(I.getOperand(3).getReg());
1883  }
1884 
1885  MachineInstr &CSetMI =
1886  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1887  .addDef(I.getOperand(0).getReg())
1888  .addUse(AArch64::WZR)
1889  .addUse(AArch64::WZR)
1890  .addImm(invCC);
1891 
1892  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
1893  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1894 
1895  I.eraseFromParent();
1896  return true;
1897  }
1898 
1899  case TargetOpcode::G_FCMP: {
1900  if (Ty != LLT::scalar(32)) {
1901  LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1902  << ", expected: " << LLT::scalar(32) << '\n');
1903  return false;
1904  }
1905 
1906  unsigned CmpOpc = selectFCMPOpc(I, MRI);
1907  if (!CmpOpc)
1908  return false;
1909 
1910  // FIXME: regbank
1911 
1912  AArch64CC::CondCode CC1, CC2;
1914  (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1915 
1916  // Partially build the compare. Decide if we need to add a use for the
1917  // third operand based off whether or not we're comparing against 0.0.
1918  auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1919  .addUse(I.getOperand(2).getReg());
1920 
1921  // If we don't have an immediate compare, then we need to add a use of the
1922  // register which wasn't used for the immediate.
1923  // Note that the immediate will always be the last operand.
1924  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
1925  CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
1926 
1927  const unsigned DefReg = I.getOperand(0).getReg();
1928  unsigned Def1Reg = DefReg;
1929  if (CC2 != AArch64CC::AL)
1930  Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1931 
1932  MachineInstr &CSetMI =
1933  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1934  .addDef(Def1Reg)
1935  .addUse(AArch64::WZR)
1936  .addUse(AArch64::WZR)
1937  .addImm(getInvertedCondCode(CC1));
1938 
1939  if (CC2 != AArch64CC::AL) {
1940  unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1941  MachineInstr &CSet2MI =
1942  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1943  .addDef(Def2Reg)
1944  .addUse(AArch64::WZR)
1945  .addUse(AArch64::WZR)
1946  .addImm(getInvertedCondCode(CC2));
1947  MachineInstr &OrMI =
1948  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1949  .addDef(DefReg)
1950  .addUse(Def1Reg)
1951  .addUse(Def2Reg);
1953  constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1954  }
1955  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
1956  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1957 
1958  I.eraseFromParent();
1959  return true;
1960  }
1961  case TargetOpcode::G_VASTART:
1962  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1963  : selectVaStartAAPCS(I, MF, MRI);
1964  case TargetOpcode::G_INTRINSIC:
1965  return selectIntrinsic(I, MRI);
1966  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1967  return selectIntrinsicWithSideEffects(I, MRI);
1968  case TargetOpcode::G_IMPLICIT_DEF: {
1969  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
1970  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1971  const unsigned DstReg = I.getOperand(0).getReg();
1972  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1973  const TargetRegisterClass *DstRC =
1974  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1975  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1976  return true;
1977  }
1978  case TargetOpcode::G_BLOCK_ADDR: {
1979  if (TM.getCodeModel() == CodeModel::Large) {
1980  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1981  I.eraseFromParent();
1982  return true;
1983  } else {
1984  I.setDesc(TII.get(AArch64::MOVaddrBA));
1985  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1986  I.getOperand(0).getReg())
1987  .addBlockAddress(I.getOperand(1).getBlockAddress(),
1988  /* Offset */ 0, AArch64II::MO_PAGE)
1989  .addBlockAddress(
1990  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1992  I.eraseFromParent();
1993  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1994  }
1995  }
1996  case TargetOpcode::G_INTRINSIC_TRUNC:
1997  return selectIntrinsicTrunc(I, MRI);
1998  case TargetOpcode::G_INTRINSIC_ROUND:
1999  return selectIntrinsicRound(I, MRI);
2000  case TargetOpcode::G_BUILD_VECTOR:
2001  return selectBuildVector(I, MRI);
2002  case TargetOpcode::G_MERGE_VALUES:
2003  return selectMergeValues(I, MRI);
2004  case TargetOpcode::G_UNMERGE_VALUES:
2005  return selectUnmergeValues(I, MRI);
2006  case TargetOpcode::G_SHUFFLE_VECTOR:
2007  return selectShuffleVector(I, MRI);
2008  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2009  return selectExtractElt(I, MRI);
2010  case TargetOpcode::G_INSERT_VECTOR_ELT:
2011  return selectInsertElt(I, MRI);
2012  case TargetOpcode::G_CONCAT_VECTORS:
2013  return selectConcatVectors(I, MRI);
2014  }
2015 
2016  return false;
2017 }
2018 
2019 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2020  MachineInstr &I, MachineRegisterInfo &MRI) const {
2021  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2022 
2023  // Select the correct opcode.
2024  unsigned Opc = 0;
2025  if (!SrcTy.isVector()) {
2026  switch (SrcTy.getSizeInBits()) {
2027  default:
2028  case 16:
2029  Opc = AArch64::FRINTZHr;
2030  break;
2031  case 32:
2032  Opc = AArch64::FRINTZSr;
2033  break;
2034  case 64:
2035  Opc = AArch64::FRINTZDr;
2036  break;
2037  }
2038  } else {
2039  unsigned NumElts = SrcTy.getNumElements();
2040  switch (SrcTy.getElementType().getSizeInBits()) {
2041  default:
2042  break;
2043  case 16:
2044  if (NumElts == 4)
2045  Opc = AArch64::FRINTZv4f16;
2046  else if (NumElts == 8)
2047  Opc = AArch64::FRINTZv8f16;
2048  break;
2049  case 32:
2050  if (NumElts == 2)
2051  Opc = AArch64::FRINTZv2f32;
2052  else if (NumElts == 4)
2053  Opc = AArch64::FRINTZv4f32;
2054  break;
2055  case 64:
2056  if (NumElts == 2)
2057  Opc = AArch64::FRINTZv2f64;
2058  break;
2059  }
2060  }
2061 
2062  if (!Opc) {
2063  // Didn't get an opcode above, bail.
2064  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2065  return false;
2066  }
2067 
2068  // Legalization would have set us up perfectly for this; we just need to
2069  // set the opcode and move on.
2070  I.setDesc(TII.get(Opc));
2071  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2072 }
2073 
2074 bool AArch64InstructionSelector::selectIntrinsicRound(
2075  MachineInstr &I, MachineRegisterInfo &MRI) const {
2076  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2077 
2078  // Select the correct opcode.
2079  unsigned Opc = 0;
2080  if (!SrcTy.isVector()) {
2081  switch (SrcTy.getSizeInBits()) {
2082  default:
2083  case 16:
2084  Opc = AArch64::FRINTAHr;
2085  break;
2086  case 32:
2087  Opc = AArch64::FRINTASr;
2088  break;
2089  case 64:
2090  Opc = AArch64::FRINTADr;
2091  break;
2092  }
2093  } else {
2094  unsigned NumElts = SrcTy.getNumElements();
2095  switch (SrcTy.getElementType().getSizeInBits()) {
2096  default:
2097  break;
2098  case 16:
2099  if (NumElts == 4)
2100  Opc = AArch64::FRINTAv4f16;
2101  else if (NumElts == 8)
2102  Opc = AArch64::FRINTAv8f16;
2103  break;
2104  case 32:
2105  if (NumElts == 2)
2106  Opc = AArch64::FRINTAv2f32;
2107  else if (NumElts == 4)
2108  Opc = AArch64::FRINTAv4f32;
2109  break;
2110  case 64:
2111  if (NumElts == 2)
2112  Opc = AArch64::FRINTAv2f64;
2113  break;
2114  }
2115  }
2116 
2117  if (!Opc) {
2118  // Didn't get an opcode above, bail.
2119  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2120  return false;
2121  }
2122 
2123  // Legalization would have set us up perfectly for this; we just need to
2124  // set the opcode and move on.
2125  I.setDesc(TII.get(Opc));
2126  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2127 }
2128 
2129 bool AArch64InstructionSelector::selectVectorICmp(
2130  MachineInstr &I, MachineRegisterInfo &MRI) const {
2131  unsigned DstReg = I.getOperand(0).getReg();
2132  LLT DstTy = MRI.getType(DstReg);
2133  unsigned SrcReg = I.getOperand(2).getReg();
2134  unsigned Src2Reg = I.getOperand(3).getReg();
2135  LLT SrcTy = MRI.getType(SrcReg);
2136 
2137  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2138  unsigned NumElts = DstTy.getNumElements();
2139 
2140  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2141  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2142  // Third index is cc opcode:
2143  // 0 == eq
2144  // 1 == ugt
2145  // 2 == uge
2146  // 3 == ult
2147  // 4 == ule
2148  // 5 == sgt
2149  // 6 == sge
2150  // 7 == slt
2151  // 8 == sle
2152  // ne is done by negating 'eq' result.
2153 
2154  // This table below assumes that for some comparisons the operands will be
2155  // commuted.
2156  // ult op == commute + ugt op
2157  // ule op == commute + uge op
2158  // slt op == commute + sgt op
2159  // sle op == commute + sge op
2160  unsigned PredIdx = 0;
2161  bool SwapOperands = false;
2163  switch (Pred) {
2164  case CmpInst::ICMP_NE:
2165  case CmpInst::ICMP_EQ:
2166  PredIdx = 0;
2167  break;
2168  case CmpInst::ICMP_UGT:
2169  PredIdx = 1;
2170  break;
2171  case CmpInst::ICMP_UGE:
2172  PredIdx = 2;
2173  break;
2174  case CmpInst::ICMP_ULT:
2175  PredIdx = 3;
2176  SwapOperands = true;
2177  break;
2178  case CmpInst::ICMP_ULE:
2179  PredIdx = 4;
2180  SwapOperands = true;
2181  break;
2182  case CmpInst::ICMP_SGT:
2183  PredIdx = 5;
2184  break;
2185  case CmpInst::ICMP_SGE:
2186  PredIdx = 6;
2187  break;
2188  case CmpInst::ICMP_SLT:
2189  PredIdx = 7;
2190  SwapOperands = true;
2191  break;
2192  case CmpInst::ICMP_SLE:
2193  PredIdx = 8;
2194  SwapOperands = true;
2195  break;
2196  default:
2197  llvm_unreachable("Unhandled icmp predicate");
2198  return false;
2199  }
2200 
2201  // This table obviously should be tablegen'd when we have our GISel native
2202  // tablegen selector.
2203 
2204  static const unsigned OpcTable[4][4][9] = {
2205  {
2206  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2207  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2208  0 /* invalid */},
2209  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2210  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2211  0 /* invalid */},
2212  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2213  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2214  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2215  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2216  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2217  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2218  },
2219  {
2220  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2221  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2222  0 /* invalid */},
2223  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2224  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2225  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2226  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2227  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2228  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2229  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2230  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2231  0 /* invalid */}
2232  },
2233  {
2234  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2235  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2236  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2237  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2238  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2239  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2240  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2241  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2242  0 /* invalid */},
2243  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2244  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2245  0 /* invalid */}
2246  },
2247  {
2248  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2249  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2250  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2251  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2252  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2253  0 /* invalid */},
2254  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2255  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2256  0 /* invalid */},
2257  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2258  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2259  0 /* invalid */}
2260  },
2261  };
2262  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2263  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2264  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2265  if (!Opc) {
2266  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2267  return false;
2268  }
2269 
2270  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2271  const TargetRegisterClass *SrcRC =
2272  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2273  if (!SrcRC) {
2274  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2275  return false;
2276  }
2277 
2278  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2279  if (SrcTy.getSizeInBits() == 128)
2280  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2281 
2282  if (SwapOperands)
2283  std::swap(SrcReg, Src2Reg);
2284 
2285  MachineIRBuilder MIB(I);
2286  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2288 
2289  // Invert if we had a 'ne' cc.
2290  if (NotOpc) {
2291  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2293  } else {
2294  MIB.buildCopy(DstReg, Cmp.getReg(0));
2295  }
2296  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2297  I.eraseFromParent();
2298  return true;
2299 }
2300 
2301 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2302  unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
2303  MachineIRBuilder &MIRBuilder) const {
2304  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2305 
2306  auto BuildFn = [&](unsigned SubregIndex) {
2307  auto Ins =
2308  MIRBuilder
2309  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2310  .addImm(SubregIndex);
2313  return &*Ins;
2314  };
2315 
2316  switch (EltSize) {
2317  case 16:
2318  return BuildFn(AArch64::hsub);
2319  case 32:
2320  return BuildFn(AArch64::ssub);
2321  case 64:
2322  return BuildFn(AArch64::dsub);
2323  default:
2324  return nullptr;
2325  }
2326 }
2327 
2329  MachineInstr &I, MachineRegisterInfo &MRI) const {
2330  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2331  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2332  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2333  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2334 
2335  // At the moment we only support merging two s32s into an s64.
2336  if (I.getNumOperands() != 3)
2337  return false;
2338  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2339  return false;
2340  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2341  if (RB.getID() != AArch64::GPRRegBankID)
2342  return false;
2343 
2344  auto *DstRC = &AArch64::GPR64RegClass;
2345  unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
2346  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2347  TII.get(TargetOpcode::SUBREG_TO_REG))
2348  .addDef(SubToRegDef)
2349  .addImm(0)
2350  .addUse(I.getOperand(1).getReg())
2351  .addImm(AArch64::sub_32);
2352  unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2353  // Need to anyext the second scalar before we can use bfm
2354  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2355  TII.get(TargetOpcode::SUBREG_TO_REG))
2356  .addDef(SubToRegDef2)
2357  .addImm(0)
2358  .addUse(I.getOperand(2).getReg())
2359  .addImm(AArch64::sub_32);
2360  MachineInstr &BFM =
2361  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2362  .addDef(I.getOperand(0).getReg())
2363  .addUse(SubToRegDef)
2364  .addUse(SubToRegDef2)
2365  .addImm(32)
2366  .addImm(31);
2367  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2368  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2370  I.eraseFromParent();
2371  return true;
2372 }
2373 
2374 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2375  const unsigned EltSize) {
2376  // Choose a lane copy opcode and subregister based off of the size of the
2377  // vector's elements.
2378  switch (EltSize) {
2379  case 16:
2380  CopyOpc = AArch64::CPYi16;
2381  ExtractSubReg = AArch64::hsub;
2382  break;
2383  case 32:
2384  CopyOpc = AArch64::CPYi32;
2385  ExtractSubReg = AArch64::ssub;
2386  break;
2387  case 64:
2388  CopyOpc = AArch64::CPYi64;
2389  ExtractSubReg = AArch64::dsub;
2390  break;
2391  default:
2392  // Unknown size, bail out.
2393  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2394  return false;
2395  }
2396  return true;
2397 }
2398 
2399 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2400  Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2401  unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2402  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2403  unsigned CopyOpc = 0;
2404  unsigned ExtractSubReg = 0;
2405  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2406  LLVM_DEBUG(
2407  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2408  return nullptr;
2409  }
2410 
2411  const TargetRegisterClass *DstRC =
2412  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2413  if (!DstRC) {
2414  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2415  return nullptr;
2416  }
2417 
2418  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2419  const LLT &VecTy = MRI.getType(VecReg);
2420  const TargetRegisterClass *VecRC =
2421  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2422  if (!VecRC) {
2423  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2424  return nullptr;
2425  }
2426 
2427  // The register that we're going to copy into.
2428  unsigned InsertReg = VecReg;
2429  if (!DstReg)
2430  DstReg = MRI.createVirtualRegister(DstRC);
2431  // If the lane index is 0, we just use a subregister COPY.
2432  if (LaneIdx == 0) {
2433  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2434  .addReg(VecReg, 0, ExtractSubReg);
2435  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2436  return &*Copy;
2437  }
2438 
2439  // Lane copies require 128-bit wide registers. If we're dealing with an
2440  // unpacked vector, then we need to move up to that width. Insert an implicit
2441  // def and a subregister insert to get us there.
2442  if (VecTy.getSizeInBits() != 128) {
2443  MachineInstr *ScalarToVector = emitScalarToVector(
2444  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2445  if (!ScalarToVector)
2446  return nullptr;
2447  InsertReg = ScalarToVector->getOperand(0).getReg();
2448  }
2449 
2450  MachineInstr *LaneCopyMI =
2451  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2452  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2453 
2454  // Make sure that we actually constrain the initial copy.
2455  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2456  return LaneCopyMI;
2457 }
2458 
2459 bool AArch64InstructionSelector::selectExtractElt(
2460  MachineInstr &I, MachineRegisterInfo &MRI) const {
2461  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2462  "unexpected opcode!");
2463  unsigned DstReg = I.getOperand(0).getReg();
2464  const LLT NarrowTy = MRI.getType(DstReg);
2465  const unsigned SrcReg = I.getOperand(1).getReg();
2466  const LLT WideTy = MRI.getType(SrcReg);
2467  (void)WideTy;
2468  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2469  "source register size too small!");
2470  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2471 
2472  // Need the lane index to determine the correct copy opcode.
2473  MachineOperand &LaneIdxOp = I.getOperand(2);
2474  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2475 
2476  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2477  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2478  return false;
2479  }
2480 
2481  // Find the index to extract from.
2482  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2483  if (!VRegAndVal)
2484  return false;
2485  unsigned LaneIdx = VRegAndVal->Value;
2486 
2487  MachineIRBuilder MIRBuilder(I);
2488 
2489  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2490  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2491  LaneIdx, MIRBuilder);
2492  if (!Extract)
2493  return false;
2494 
2495  I.eraseFromParent();
2496  return true;
2497 }
2498 
2499 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2500  MachineInstr &I, MachineRegisterInfo &MRI) const {
2501  unsigned NumElts = I.getNumOperands() - 1;
2502  unsigned SrcReg = I.getOperand(NumElts).getReg();
2503  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2504  const LLT SrcTy = MRI.getType(SrcReg);
2505 
2506  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2507  if (SrcTy.getSizeInBits() > 128) {
2508  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2509  return false;
2510  }
2511 
2512  MachineIRBuilder MIB(I);
2513 
2514  // We implement a split vector operation by treating the sub-vectors as
2515  // scalars and extracting them.
2516  const RegisterBank &DstRB =
2517  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2518  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2519  unsigned Dst = I.getOperand(OpIdx).getReg();
2520  MachineInstr *Extract =
2521  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2522  if (!Extract)
2523  return false;
2524  }
2525  I.eraseFromParent();
2526  return true;
2527 }
2528 
2530  MachineInstr &I, MachineRegisterInfo &MRI) const {
2531  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2532  "unexpected opcode");
2533 
2534  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2535  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2536  AArch64::FPRRegBankID ||
2537  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2538  AArch64::FPRRegBankID) {
2539  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2540  "currently unsupported.\n");
2541  return false;
2542  }
2543 
2544  // The last operand is the vector source register, and every other operand is
2545  // a register to unpack into.
2546  unsigned NumElts = I.getNumOperands() - 1;
2547  unsigned SrcReg = I.getOperand(NumElts).getReg();
2548  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2549  const LLT WideTy = MRI.getType(SrcReg);
2550  (void)WideTy;
2551  assert(WideTy.isVector() && "can only unmerge from vector types!");
2552  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2553  "source register size too small!");
2554 
2555  if (!NarrowTy.isScalar())
2556  return selectSplitVectorUnmerge(I, MRI);
2557 
2558  MachineIRBuilder MIB(I);
2559 
2560  // Choose a lane copy opcode and subregister based off of the size of the
2561  // vector's elements.
2562  unsigned CopyOpc = 0;
2563  unsigned ExtractSubReg = 0;
2564  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2565  return false;
2566 
2567  // Set up for the lane copies.
2568  MachineBasicBlock &MBB = *I.getParent();
2569 
2570  // Stores the registers we'll be copying from.
2571  SmallVector<unsigned, 4> InsertRegs;
2572 
2573  // We'll use the first register twice, so we only need NumElts-1 registers.
2574  unsigned NumInsertRegs = NumElts - 1;
2575 
2576  // If our elements fit into exactly 128 bits, then we can copy from the source
2577  // directly. Otherwise, we need to do a bit of setup with some subregister
2578  // inserts.
2579  if (NarrowTy.getSizeInBits() * NumElts == 128) {
2580  InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2581  } else {
2582  // No. We have to perform subregister inserts. For each insert, create an
2583  // implicit def and a subregister insert, and save the register we create.
2584  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2585  unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2586  MachineInstr &ImpDefMI =
2587  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2588  ImpDefReg);
2589 
2590  // Now, create the subregister insert from SrcReg.
2591  unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2592  MachineInstr &InsMI =
2593  *BuildMI(MBB, I, I.getDebugLoc(),
2594  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2595  .addUse(ImpDefReg)
2596  .addUse(SrcReg)
2597  .addImm(AArch64::dsub);
2598 
2599  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2601 
2602  // Save the register so that we can copy from it after.
2603  InsertRegs.push_back(InsertReg);
2604  }
2605  }
2606 
2607  // Now that we've created any necessary subregister inserts, we can
2608  // create the copies.
2609  //
2610  // Perform the first copy separately as a subregister copy.
2611  unsigned CopyTo = I.getOperand(0).getReg();
2612  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2613  .addReg(InsertRegs[0], 0, ExtractSubReg);
2614  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2615 
2616  // Now, perform the remaining copies as vector lane copies.
2617  unsigned LaneIdx = 1;
2618  for (unsigned InsReg : InsertRegs) {
2619  unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2620  MachineInstr &CopyInst =
2621  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2622  .addUse(InsReg)
2623  .addImm(LaneIdx);
2624  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2625  ++LaneIdx;
2626  }
2627 
2628  // Separately constrain the first copy's destination. Because of the
2629  // limitation in constrainOperandRegClass, we can't guarantee that this will
2630  // actually be constrained. So, do it ourselves using the second operand.
2631  const TargetRegisterClass *RC =
2632  MRI.getRegClassOrNull(I.getOperand(1).getReg());
2633  if (!RC) {
2634  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2635  return false;
2636  }
2637 
2638  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2639  I.eraseFromParent();
2640  return true;
2641 }
2642 
2643 bool AArch64InstructionSelector::selectConcatVectors(
2644  MachineInstr &I, MachineRegisterInfo &MRI) const {
2645  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2646  "Unexpected opcode");
2647  unsigned Dst = I.getOperand(0).getReg();
2648  unsigned Op1 = I.getOperand(1).getReg();
2649  unsigned Op2 = I.getOperand(2).getReg();
2650  MachineIRBuilder MIRBuilder(I);
2651  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2652  if (!ConcatMI)
2653  return false;
2654  I.eraseFromParent();
2655  return true;
2656 }
2657 
2658 void AArch64InstructionSelector::collectShuffleMaskIndices(
2660  SmallVectorImpl<Optional<int>> &Idxs) const {
2661  MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2662  assert(
2663  MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2664  "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2665  // Find the constant indices.
2666  for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2667  MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2668  assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2669  // Look through copies.
2670  while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2671  ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2672  assert(ScalarDef && "Could not find def of copy operand");
2673  }
2674  if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2675  // This be an undef if not a constant.
2676  assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2677  Idxs.push_back(None);
2678  } else {
2679  Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2680  }
2681  }
2682 }
2683 
2684 unsigned
2685 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2686  MachineFunction &MF) const {
2687  Type *CPTy = CPVal->getType();
2688  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2689  if (Align == 0)
2690  Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2691 
2693  return MCP->getConstantPoolIndex(CPVal, Align);
2694 }
2695 
2696 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2697  Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2698  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2699 
2700  auto Adrp =
2701  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2702  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
2703 
2704  MachineInstr *LoadMI = nullptr;
2705  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2706  case 16:
2707  LoadMI =
2708  &*MIRBuilder
2709  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2710  .addConstantPoolIndex(CPIdx, 0,
2712  break;
2713  case 8:
2714  LoadMI = &*MIRBuilder
2715  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2716  .addConstantPoolIndex(
2718  break;
2719  default:
2720  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2721  << *CPVal->getType());
2722  return nullptr;
2723  }
2725  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2726  return LoadMI;
2727 }
2728 
2729 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2730 /// size and RB.
2731 static std::pair<unsigned, unsigned>
2732 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2733  unsigned Opc, SubregIdx;
2734  if (RB.getID() == AArch64::GPRRegBankID) {
2735  if (EltSize == 32) {
2736  Opc = AArch64::INSvi32gpr;
2737  SubregIdx = AArch64::ssub;
2738  } else if (EltSize == 64) {
2739  Opc = AArch64::INSvi64gpr;
2740  SubregIdx = AArch64::dsub;
2741  } else {
2742  llvm_unreachable("invalid elt size!");
2743  }
2744  } else {
2745  if (EltSize == 8) {
2746  Opc = AArch64::INSvi8lane;
2747  SubregIdx = AArch64::bsub;
2748  } else if (EltSize == 16) {
2749  Opc = AArch64::INSvi16lane;
2750  SubregIdx = AArch64::hsub;
2751  } else if (EltSize == 32) {
2752  Opc = AArch64::INSvi32lane;
2753  SubregIdx = AArch64::ssub;
2754  } else if (EltSize == 64) {
2755  Opc = AArch64::INSvi64lane;
2756  SubregIdx = AArch64::dsub;
2757  } else {
2758  llvm_unreachable("invalid elt size!");
2759  }
2760  }
2761  return std::make_pair(Opc, SubregIdx);
2762 }
2763 
2764 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
2765  Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2766  MachineIRBuilder &MIRBuilder) const {
2767  // We implement a vector concat by:
2768  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2769  // 2. Insert the upper vector into the destination's upper element
2770  // TODO: some of this code is common with G_BUILD_VECTOR handling.
2771  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2772 
2773  const LLT Op1Ty = MRI.getType(Op1);
2774  const LLT Op2Ty = MRI.getType(Op2);
2775 
2776  if (Op1Ty != Op2Ty) {
2777  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2778  return nullptr;
2779  }
2780  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2781 
2782  if (Op1Ty.getSizeInBits() >= 128) {
2783  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2784  return nullptr;
2785  }
2786 
2787  // At the moment we just support 64 bit vector concats.
2788  if (Op1Ty.getSizeInBits() != 64) {
2789  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2790  return nullptr;
2791  }
2792 
2793  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2794  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2795  const TargetRegisterClass *DstRC =
2796  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2797 
2798  MachineInstr *WidenedOp1 =
2799  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2800  MachineInstr *WidenedOp2 =
2801  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2802  if (!WidenedOp1 || !WidenedOp2) {
2803  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2804  return nullptr;
2805  }
2806 
2807  // Now do the insert of the upper element.
2808  unsigned InsertOpc, InsSubRegIdx;
2809  std::tie(InsertOpc, InsSubRegIdx) =
2810  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2811 
2812  if (!Dst)
2813  Dst = MRI.createVirtualRegister(DstRC);
2814  auto InsElt =
2815  MIRBuilder
2816  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
2817  .addImm(1) /* Lane index */
2818  .addUse(WidenedOp2->getOperand(0).getReg())
2819  .addImm(0);
2820  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2821  return &*InsElt;
2822 }
2823 
2824 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
2825  MachineInstr &I, MachineRegisterInfo &MRI) const {
2826  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
2827  "Expected a G_FCONSTANT!");
2828  MachineOperand &ImmOp = I.getOperand(1);
2829  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
2830 
2831  // Only handle 32 and 64 bit defs for now.
2832  if (DefSize != 32 && DefSize != 64)
2833  return nullptr;
2834 
2835  // Don't handle null values using FMOV.
2836  if (ImmOp.getFPImm()->isNullValue())
2837  return nullptr;
2838 
2839  // Get the immediate representation for the FMOV.
2840  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
2841  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
2842  : AArch64_AM::getFP64Imm(ImmValAPF);
2843 
2844  // If this is -1, it means the immediate can't be represented as the requested
2845  // floating point value. Bail.
2846  if (Imm == -1)
2847  return nullptr;
2848 
2849  // Update MI to represent the new FMOV instruction, constrain it, and return.
2850  ImmOp.ChangeToImmediate(Imm);
2851  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
2852  I.setDesc(TII.get(MovOpc));
2854  return &I;
2855 }
2856 
2857 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
2858  MachineIRBuilder MIB(I);
2859  MachineRegisterInfo &MRI = *MIB.getMRI();
2861 
2862  // We want to recognize this pattern:
2863  //
2864  // $z = G_FCMP pred, $x, $y
2865  // ...
2866  // $w = G_SELECT $z, $a, $b
2867  //
2868  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
2869  // some copies/truncs in between.)
2870  //
2871  // If we see this, then we can emit something like this:
2872  //
2873  // fcmp $x, $y
2874  // fcsel $w, $a, $b, pred
2875  //
2876  // Rather than emitting both of the rather long sequences in the standard
2877  // G_FCMP/G_SELECT select methods.
2878 
2879  // First, check if the condition is defined by a compare.
2880  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
2881  while (CondDef) {
2882  // We can only fold if all of the defs have one use.
2883  if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
2884  return false;
2885 
2886  // We can skip over G_TRUNC since the condition is 1-bit.
2887  // Truncating/extending can have no impact on the value.
2888  unsigned Opc = CondDef->getOpcode();
2889  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
2890  break;
2891 
2892  // Can't see past copies from physregs.
2893  if (Opc == TargetOpcode::COPY &&
2894  TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
2895  return false;
2896 
2897  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
2898  }
2899 
2900  // Is the condition defined by a compare?
2901  // TODO: Handle G_ICMP.
2902  if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
2903  return false;
2904 
2905  // Get the condition code for the select.
2907  AArch64CC::CondCode CondCode2;
2910  CondCode2);
2911 
2912  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
2913  // instructions to emit the comparison.
2914  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
2915  // unnecessary.
2916  if (CondCode2 != AArch64CC::AL)
2917  return false;
2918 
2919  // Make sure we'll be able to select the compare.
2920  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
2921  if (!CmpOpc)
2922  return false;
2923 
2924  // Emit a new compare.
2925  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
2926  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2927  Cmp.addUse(CondDef->getOperand(3).getReg());
2928 
2929  // Emit the select.
2930  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
2931  auto CSel =
2932  MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
2933  {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
2934  .addImm(CondCode);
2935  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2936  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
2937  I.eraseFromParent();
2938  return true;
2939 }
2940 
2941 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2942  // Try to match a vector splat operation into a dup instruction.
2943  // We're looking for this pattern:
2944  // %scalar:gpr(s64) = COPY $x0
2945  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2946  // %cst0:gpr(s32) = G_CONSTANT i32 0
2947  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2948  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2949  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2950  // %zerovec(<2 x s32>)
2951  //
2952  // ...into:
2953  // %splat = DUP %scalar
2954  // We use the regbank of the scalar to determine which kind of dup to use.
2955  MachineIRBuilder MIB(I);
2956  MachineRegisterInfo &MRI = *MIB.getMRI();
2958  using namespace TargetOpcode;
2959  using namespace MIPatternMatch;
2960 
2961  // Begin matching the insert.
2962  auto *InsMI =
2963  findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2964  if (!InsMI)
2965  return false;
2966  // Match the undef vector operand.
2967  auto *UndefMI =
2968  findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2969  if (!UndefMI)
2970  return false;
2971  // Match the scalar being splatted.
2972  unsigned ScalarReg = InsMI->getOperand(2).getReg();
2973  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2974  // Match the index constant 0.
2975  int64_t Index = 0;
2976  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2977  return false;
2978 
2979  // The shuffle's second operand doesn't matter if the mask is all zero.
2980  auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2981  if (!ZeroVec)
2982  return false;
2983  int64_t Zero = 0;
2984  if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2985  return false;
2986  for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2987  if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2988  return false; // This wasn't an all zeros vector.
2989  }
2990 
2991  // We're done, now find out what kind of splat we need.
2992  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2993  LLT EltTy = VecTy.getElementType();
2994  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2995  LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2996  return false;
2997  }
2998  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2999  static const unsigned OpcTable[2][2] = {
3000  {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3001  {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3002  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3003 
3004  // For FP splats, we need to widen the scalar reg via undef too.
3005  if (IsFP) {
3006  MachineInstr *Widen = emitScalarToVector(
3007  EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3008  if (!Widen)
3009  return false;
3010  ScalarReg = Widen->getOperand(0).getReg();
3011  }
3012  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3013  if (IsFP)
3014  Dup.addImm(0);
3015  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3016  I.eraseFromParent();
3017  return true;
3018 }
3019 
3020 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3021  if (TM.getOptLevel() == CodeGenOpt::None)
3022  return false;
3023  if (tryOptVectorDup(I))
3024  return true;
3025  return false;
3026 }
3027 
3028 bool AArch64InstructionSelector::selectShuffleVector(
3029  MachineInstr &I, MachineRegisterInfo &MRI) const {
3030  if (tryOptVectorShuffle(I))
3031  return true;
3032  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3033  unsigned Src1Reg = I.getOperand(1).getReg();
3034  const LLT Src1Ty = MRI.getType(Src1Reg);
3035  unsigned Src2Reg = I.getOperand(2).getReg();
3036  const LLT Src2Ty = MRI.getType(Src2Reg);
3037 
3038  MachineBasicBlock &MBB = *I.getParent();
3039  MachineFunction &MF = *MBB.getParent();
3040  LLVMContext &Ctx = MF.getFunction().getContext();
3041 
3042  // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3043  // operand, it comes in as a normal vector value which we have to analyze to
3044  // find the mask indices. If the mask element is undef, then
3045  // collectShuffleMaskIndices() will add a None entry for that index into
3046  // the list.
3048  collectShuffleMaskIndices(I, MRI, Mask);
3049  assert(!Mask.empty() && "Expected to find mask indices");
3050 
3051  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3052  // it's originated from a <1 x T> type. Those should have been lowered into
3053  // G_BUILD_VECTOR earlier.
3054  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3055  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3056  return false;
3057  }
3058 
3059  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3060 
3062  for (auto &MaybeVal : Mask) {
3063  // For now, any undef indexes we'll just assume to be 0. This should be
3064  // optimized in future, e.g. to select DUP etc.
3065  int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
3066  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3067  unsigned Offset = Byte + Val * BytesPerElt;
3068  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3069  }
3070  }
3071 
3072  MachineIRBuilder MIRBuilder(I);
3073 
3074  // Use a constant pool to load the index vector for TBL.
3075  Constant *CPVal = ConstantVector::get(CstIdxs);
3076  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3077  if (!IndexLoad) {
3078  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3079  return false;
3080  }
3081 
3082  if (DstTy.getSizeInBits() != 128) {
3083  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3084  // This case can be done with TBL1.
3085  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3086  if (!Concat) {
3087  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3088  return false;
3089  }
3090 
3091  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3092  IndexLoad =
3093  emitScalarToVector(64, &AArch64::FPR128RegClass,
3094  IndexLoad->getOperand(0).getReg(), MIRBuilder);
3095 
3096  auto TBL1 = MIRBuilder.buildInstr(
3097  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3098  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3100 
3101  auto Copy =
3102  MIRBuilder
3103  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3104  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3105  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3106  I.eraseFromParent();
3107  return true;
3108  }
3109 
3110  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3111  // Q registers for regalloc.
3112  auto RegSeq = MIRBuilder
3113  .buildInstr(TargetOpcode::REG_SEQUENCE,
3114  {&AArch64::QQRegClass}, {Src1Reg})
3115  .addImm(AArch64::qsub0)
3116  .addUse(Src2Reg)
3117  .addImm(AArch64::qsub1);
3118 
3119  auto TBL2 =
3120  MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3121  {RegSeq, IndexLoad->getOperand(0).getReg()});
3122  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3124  I.eraseFromParent();
3125  return true;
3126 }
3127 
3128 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3129  Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
3130  unsigned LaneIdx, const RegisterBank &RB,
3131  MachineIRBuilder &MIRBuilder) const {
3132  MachineInstr *InsElt = nullptr;
3133  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3134  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3135 
3136  // Create a register to define with the insert if one wasn't passed in.
3137  if (!DstReg)
3138  DstReg = MRI.createVirtualRegister(DstRC);
3139 
3140  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3141  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3142 
3143  if (RB.getID() == AArch64::FPRRegBankID) {
3144  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3145  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3146  .addImm(LaneIdx)
3147  .addUse(InsSub->getOperand(0).getReg())
3148  .addImm(0);
3149  } else {
3150  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3151  .addImm(LaneIdx)
3152  .addUse(EltReg);
3153  }
3154 
3155  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3156  return InsElt;
3157 }
3158 
3159 bool AArch64InstructionSelector::selectInsertElt(
3160  MachineInstr &I, MachineRegisterInfo &MRI) const {
3161  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3162 
3163  // Get information on the destination.
3164  unsigned DstReg = I.getOperand(0).getReg();
3165  const LLT DstTy = MRI.getType(DstReg);
3166  unsigned VecSize = DstTy.getSizeInBits();
3167 
3168  // Get information on the element we want to insert into the destination.
3169  unsigned EltReg = I.getOperand(2).getReg();
3170  const LLT EltTy = MRI.getType(EltReg);
3171  unsigned EltSize = EltTy.getSizeInBits();
3172  if (EltSize < 16 || EltSize > 64)
3173  return false; // Don't support all element types yet.
3174 
3175  // Find the definition of the index. Bail out if it's not defined by a
3176  // G_CONSTANT.
3177  unsigned IdxReg = I.getOperand(3).getReg();
3178  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3179  if (!VRegAndVal)
3180  return false;
3181  unsigned LaneIdx = VRegAndVal->Value;
3182 
3183  // Perform the lane insert.
3184  unsigned SrcReg = I.getOperand(1).getReg();
3185  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3186  MachineIRBuilder MIRBuilder(I);
3187 
3188  if (VecSize < 128) {
3189  // If the vector we're inserting into is smaller than 128 bits, widen it
3190  // to 128 to do the insert.
3191  MachineInstr *ScalarToVec = emitScalarToVector(
3192  VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3193  if (!ScalarToVec)
3194  return false;
3195  SrcReg = ScalarToVec->getOperand(0).getReg();
3196  }
3197 
3198  // Create an insert into a new FPR128 register.
3199  // Note that if our vector is already 128 bits, we end up emitting an extra
3200  // register.
3201  MachineInstr *InsMI =
3202  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3203 
3204  if (VecSize < 128) {
3205  // If we had to widen to perform the insert, then we have to demote back to
3206  // the original size to get the result we want.
3207  unsigned DemoteVec = InsMI->getOperand(0).getReg();
3208  const TargetRegisterClass *RC =
3209  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3210  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3211  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3212  return false;
3213  }
3214  unsigned SubReg = 0;
3215  if (!getSubRegForClass(RC, TRI, SubReg))
3216  return false;
3217  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3218  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3219  << "\n");
3220  return false;
3221  }
3222  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3223  .addReg(DemoteVec, 0, SubReg);
3224  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3225  } else {
3226  // No widening needed.
3227  InsMI->getOperand(0).setReg(DstReg);
3228  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3229  }
3230 
3231  I.eraseFromParent();
3232  return true;
3233 }
3234 
3235 bool AArch64InstructionSelector::selectBuildVector(
3236  MachineInstr &I, MachineRegisterInfo &MRI) const {
3237  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3238  // Until we port more of the optimized selections, for now just use a vector
3239  // insert sequence.
3240  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3241  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3242  unsigned EltSize = EltTy.getSizeInBits();
3243  if (EltSize < 16 || EltSize > 64)
3244  return false; // Don't support all element types yet.
3245  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3246  MachineIRBuilder MIRBuilder(I);
3247 
3248  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3249  MachineInstr *ScalarToVec =
3250  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3251  I.getOperand(1).getReg(), MIRBuilder);
3252  if (!ScalarToVec)
3253  return false;
3254 
3255  unsigned DstVec = ScalarToVec->getOperand(0).getReg();
3256  unsigned DstSize = DstTy.getSizeInBits();
3257 
3258  // Keep track of the last MI we inserted. Later on, we might be able to save
3259  // a copy using it.
3260  MachineInstr *PrevMI = nullptr;
3261  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3262  // Note that if we don't do a subregister copy, we can end up making an
3263  // extra register.
3264  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3265  MIRBuilder);
3266  DstVec = PrevMI->getOperand(0).getReg();
3267  }
3268 
3269  // If DstTy's size in bits is less than 128, then emit a subregister copy
3270  // from DstVec to the last register we've defined.
3271  if (DstSize < 128) {
3272  // Force this to be FPR using the destination vector.
3273  const TargetRegisterClass *RC =
3274  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3275  if (!RC)
3276  return false;
3277  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3278  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3279  return false;
3280  }
3281 
3282  unsigned SubReg = 0;
3283  if (!getSubRegForClass(RC, TRI, SubReg))
3284  return false;
3285  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3286  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3287  << "\n");
3288  return false;
3289  }
3290 
3291  unsigned Reg = MRI.createVirtualRegister(RC);
3292  unsigned DstReg = I.getOperand(0).getReg();
3293 
3294  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3295  .addReg(DstVec, 0, SubReg);
3296  MachineOperand &RegOp = I.getOperand(1);
3297  RegOp.setReg(Reg);
3298  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3299  } else {
3300  // We don't need a subregister copy. Save a copy by re-using the
3301  // destination register on the final insert.
3302  assert(PrevMI && "PrevMI was null?");
3303  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3304  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3305  }
3306 
3307  I.eraseFromParent();
3308  return true;
3309 }
3310 
3311 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3312 /// ID if it exists, and 0 otherwise.
3313 static unsigned findIntrinsicID(MachineInstr &I) {
3314  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3315  return Op.isIntrinsicID();
3316  });
3317  if (IntrinOp == I.operands_end())
3318  return 0;
3319  return IntrinOp->getIntrinsicID();
3320 }
3321 
3322 /// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3323 /// intrinsic.
3324 static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3325  switch (NumBytesToStore) {
3326  // TODO: 1, 2, and 4 byte stores.
3327  case 8:
3328  return AArch64::STLXRX;
3329  default:
3330  LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3331  << NumBytesToStore << ")\n");
3332  break;
3333  }
3334  return 0;
3335 }
3336 
3337 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3338  MachineInstr &I, MachineRegisterInfo &MRI) const {
3339  // Find the intrinsic ID.
3340  unsigned IntrinID = findIntrinsicID(I);
3341  if (!IntrinID)
3342  return false;
3343  MachineIRBuilder MIRBuilder(I);
3344 
3345  // Select the instruction.
3346  switch (IntrinID) {
3347  default:
3348  return false;
3349  case Intrinsic::trap:
3350  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3351  break;
3352  case Intrinsic::aarch64_stlxr:
3353  unsigned StatReg = I.getOperand(0).getReg();
3354  assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3355  "Status register must be 32 bits!");
3356  unsigned SrcReg = I.getOperand(2).getReg();
3357 
3358  if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3359  LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3360  return false;
3361  }
3362 
3363  unsigned PtrReg = I.getOperand(3).getReg();
3364  assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3365 
3366  // Expect only one memory operand.
3367  if (!I.hasOneMemOperand())
3368  return false;
3369 
3370  const MachineMemOperand *MemOp = *I.memoperands_begin();
3371  unsigned NumBytesToStore = MemOp->getSize();
3372  unsigned Opc = getStlxrOpcode(NumBytesToStore);
3373  if (!Opc)
3374  return false;
3375 
3376  auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3377  constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3378  }
3379 
3380  I.eraseFromParent();
3381  return true;
3382 }
3383 
3384 bool AArch64InstructionSelector::selectIntrinsic(
3385  MachineInstr &I, MachineRegisterInfo &MRI) const {
3386  unsigned IntrinID = findIntrinsicID(I);
3387  if (!IntrinID)
3388  return false;
3389  MachineIRBuilder MIRBuilder(I);
3390 
3391  switch (IntrinID) {
3392  default:
3393  break;
3394  case Intrinsic::aarch64_crypto_sha1h:
3395  unsigned DstReg = I.getOperand(0).getReg();
3396  unsigned SrcReg = I.getOperand(2).getReg();
3397 
3398  // FIXME: Should this be an assert?
3399  if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3400  MRI.getType(SrcReg).getSizeInBits() != 32)
3401  return false;
3402 
3403  // The operation has to happen on FPRs. Set up some new FPR registers for
3404  // the source and destination if they are on GPRs.
3405  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3406  SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3407  MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3408 
3409  // Make sure the copy ends up getting constrained properly.
3411  AArch64::GPR32RegClass, MRI);
3412  }
3413 
3414  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3415  DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3416 
3417  // Actually insert the instruction.
3418  auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3419  constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3420 
3421  // Did we create a new register for the destination?
3422  if (DstReg != I.getOperand(0).getReg()) {
3423  // Yep. Copy the result of the instruction back into the original
3424  // destination.
3425  MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3427  AArch64::GPR32RegClass, MRI);
3428  }
3429 
3430  I.eraseFromParent();
3431  return true;
3432  }
3433  return false;
3434 }
3435 
3436 /// SelectArithImmed - Select an immediate value that can be represented as
3437 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
3438 /// Val set to the 12-bit value and Shift set to the shifter operand.
3440 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
3441  MachineInstr &MI = *Root.getParent();
3442  MachineBasicBlock &MBB = *MI.getParent();
3443  MachineFunction &MF = *MBB.getParent();
3444  MachineRegisterInfo &MRI = MF.getRegInfo();
3445 
3446  // This function is called from the addsub_shifted_imm ComplexPattern,
3447  // which lists [imm] as the list of opcode it's interested in, however
3448  // we still need to check whether the operand is actually an immediate
3449  // here because the ComplexPattern opcode list is only used in
3450  // root-level opcode matching.
3451  uint64_t Immed;
3452  if (Root.isImm())
3453  Immed = Root.getImm();
3454  else if (Root.isCImm())
3455  Immed = Root.getCImm()->getZExtValue();
3456  else if (Root.isReg()) {
3457  MachineInstr *Def = MRI.getVRegDef(Root.getReg());
3458  if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
3459  return None;
3460  MachineOperand &Op1 = Def->getOperand(1);
3461  if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
3462  return None;
3463  Immed = Op1.getCImm()->getZExtValue();
3464  } else
3465  return None;
3466 
3467  unsigned ShiftAmt;
3468 
3469  if (Immed >> 12 == 0) {
3470  ShiftAmt = 0;
3471  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3472  ShiftAmt = 12;
3473  Immed = Immed >> 12;
3474  } else
3475  return None;
3476 
3477  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
3478  return {{
3479  [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3480  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3481  }};
3482 }
3483 
3484 /// Select a "register plus unscaled signed 9-bit immediate" address. This
3485 /// should only match when there is an offset that is not valid for a scaled
3486 /// immediate addressing mode. The "Size" argument is the size in bytes of the
3487 /// memory reference, which is needed here to know what is valid for a scaled
3488 /// immediate.
3490 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3491  unsigned Size) const {
3492  MachineRegisterInfo &MRI =
3493  Root.getParent()->getParent()->getParent()->getRegInfo();
3494 
3495  if (!Root.isReg())
3496  return None;
3497 
3498  if (!isBaseWithConstantOffset(Root, MRI))
3499  return None;
3500 
3501  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3502  if (!RootDef)
3503  return None;
3504 
3505  MachineOperand &OffImm = RootDef->getOperand(2);
3506  if (!OffImm.isReg())
3507  return None;
3508  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3509  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3510  return None;
3511  int64_t RHSC;
3512  MachineOperand &RHSOp1 = RHS->getOperand(1);
3513  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3514  return None;
3515  RHSC = RHSOp1.getCImm()->getSExtValue();
3516 
3517  // If the offset is valid as a scaled immediate, don't match here.
3518  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3519  return None;
3520  if (RHSC >= -256 && RHSC < 256) {
3521  MachineOperand &Base = RootDef->getOperand(1);
3522  return {{
3523  [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3524  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3525  }};
3526  }
3527  return None;
3528 }
3529 
3530 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
3531 /// "Size" argument is the size in bytes of the memory reference, which
3532 /// determines the scale.
3534 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3535  unsigned Size) const {
3536  MachineRegisterInfo &MRI =
3537  Root.getParent()->getParent()->getParent()->getRegInfo();
3538 
3539  if (!Root.isReg())
3540  return None;
3541 
3542  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3543  if (!RootDef)
3544  return None;
3545 
3546  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3547  return {{
3548  [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3549  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3550  }};
3551  }
3552 
3553  if (isBaseWithConstantOffset(Root, MRI)) {
3554  MachineOperand &LHS = RootDef->getOperand(1);
3555  MachineOperand &RHS = RootDef->getOperand(2);
3556  MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3557  MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3558  if (LHSDef && RHSDef) {
3559  int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3560  unsigned Scale = Log2_32(Size);
3561  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3562  if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
3563  return {{
3564  [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3565  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3566  }};
3567 
3568  return {{
3569  [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3570  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3571  }};
3572  }
3573  }
3574  }
3575 
3576  // Before falling back to our general case, check if the unscaled
3577  // instructions can handle this. If so, that's preferable.
3578  if (selectAddrModeUnscaled(Root, Size).hasValue())
3579  return None;
3580 
3581  return {{
3582  [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3583  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3584  }};
3585 }
3586 
3587 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3588  const MachineInstr &MI) const {
3589  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3590  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
3592  assert(CstVal && "Expected constant value");
3593  MIB.addImm(CstVal.getValue());
3594 }
3595 
3596 namespace llvm {
3599  AArch64Subtarget &Subtarget,
3600  AArch64RegisterBankInfo &RBI) {
3601  return new AArch64InstructionSelector(TM, Subtarget, RBI);
3602 }
3603 }
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
const NoneType None
Definition: None.h:23
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
mop_iterator operands_end()
Definition: MachineInstr.h:455
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
MachineBasicBlock * getMBB() const
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB...
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void setTargetFlags(unsigned F)
const ConstantFP * getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:276
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
bool isScalar() const
static CondCode getInvertedCondCode(CondCode Code)
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
static uint64_t selectImpl(uint64_t CandidateMask, uint64_t &NextInSequenceMask)
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t getSize() const
Return the size in bytes of the memory reference.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
static uint32_t Concat[]
unsigned const TargetRegisterInfo * TRI
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:460
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:205
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
This file declares the targeting of the RegisterBankInfo class for AArch64.
bool isVector() const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:212
Holds all the information related to register banks.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
static unsigned getStlxrOpcode(unsigned NumBytesToStore)
Helper function to emit the correct opcode for a llvm.aarch64.stlxr intrinsic.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
static StringRef getName(Value *V)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static int getID(struct InternalInstruction *insn, const void *miiArg)
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
static bool isStore(int Opcode)
MachineFunction & getMF()
Getter for the function we currently build.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:995
#define EQ(a, b)
Definition: regexec.c:112
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
MachineRegisterInfo * getMRI()
Getter for MRI.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
const TargetRegisterInfo * getTargetRegisterInfo() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
This is an important base class in LLVM.
Definition: Constant.h:41
const GlobalValue * getGlobal() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:759
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1140
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:550
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
self_iterator getIterator()
Definition: ilist_node.h:81
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1220
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
bool isCopy() const
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
#define GET_GLOBALISEL_TEMPORARIES_INIT
const APFloat & getValueAPF() const
Definition: Constants.h:302
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT (LookThroug...
Definition: Utils.cpp:221
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:535
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:113
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function that verifies that we have a valid copy at the end of selectCopy. ...
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:210
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
Provides the logic to select generic machine instructions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, unsigned SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class provides the information for the target register banks.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:305
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
int64_t getOffset() const
Return the offset from the symbol in this operand.
const BlockAddress * getBlockAddress() const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned findIntrinsicID(MachineInstr &I)
Helper function to find an intrinsic ID on an a MachineInstr.
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void setSubReg(unsigned subReg)
static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
#define GET_GLOBALISEL_PREDICATES_INIT
uint32_t Size
Definition: Profile.cpp:46
const DataLayout & getDataLayout() const
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClassOrNull(unsigned Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel...
Definition: TargetOpcodes.h:30
LLVM Value Representation.
Definition: Value.h:72
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI)
Helper function to select the opcode for a G_FCMP.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
APInt bitcastToAPInt() const
Definition: APFloat.h:1104
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
const ConstantInt * getCImm() const
#define DEBUG_TYPE
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:163
unsigned getPredicate() const