LLVM  10.0.0svn
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
21 #include "llvm/ADT/Optional.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "aarch64-isel"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
47 
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51  const AArch64Subtarget &STI,
52  const AArch64RegisterBankInfo &RBI);
53 
54  bool select(MachineInstr &I) override;
55  static const char *getName() { return DEBUG_TYPE; }
56 
57  void setupMF(MachineFunction &MF, CodeGenCoverage &CoverageInfo) override {
58  InstructionSelector::setupMF(MF, CoverageInfo);
59 
60  // hasFnAttribute() is expensive to call on every BRCOND selection, so
61  // cache it here for each run of the selector.
62  ProduceNonFlagSettingCondBr =
63  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
64  }
65 
66 private:
67  /// tblgen-erated 'select' implementation, used as the initial selector for
68  /// the patterns that don't require complex C++.
69  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
70 
71  // A lowering phase that runs before any selection attempts.
72 
73  void preISelLower(MachineInstr &I) const;
74 
75  // An early selection function that runs before the selectImpl() call.
76  bool earlySelect(MachineInstr &I) const;
77 
78  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
79  bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
80 
81  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
82  void contractCrossBankCopyIntoStore(MachineInstr &I,
83  MachineRegisterInfo &MRI) const;
84 
85  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
86  MachineRegisterInfo &MRI) const;
87  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
88  MachineRegisterInfo &MRI) const;
89 
90  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
91  MachineRegisterInfo &MRI) const;
92 
93  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
94  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
95 
96  // Helper to generate an equivalent of scalar_to_vector into a new register,
97  // returned via 'Dst'.
98  MachineInstr *emitScalarToVector(unsigned EltSize,
99  const TargetRegisterClass *DstRC,
101  MachineIRBuilder &MIRBuilder) const;
102 
103  /// Emit a lane insert into \p DstReg, or a new vector register if None is
104  /// provided.
105  ///
106  /// The lane inserted into is defined by \p LaneIdx. The vector source
107  /// register is given by \p SrcReg. The register containing the element is
108  /// given by \p EltReg.
109  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
110  Register EltReg, unsigned LaneIdx,
111  const RegisterBank &RB,
112  MachineIRBuilder &MIRBuilder) const;
113  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
114  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
117 
118  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
119  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
120  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
121  bool selectSplitVectorUnmerge(MachineInstr &I,
122  MachineRegisterInfo &MRI) const;
123  bool selectIntrinsicWithSideEffects(MachineInstr &I,
124  MachineRegisterInfo &MRI) const;
125  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
126  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
127  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
128  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
129  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
130  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
131  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
132 
133  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
134  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
135  MachineIRBuilder &MIRBuilder) const;
136 
137  // Emit a vector concat operation.
138  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
139  Register Op2,
140  MachineIRBuilder &MIRBuilder) const;
141  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
143  MachineIRBuilder &MIRBuilder) const;
144  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
145  MachineIRBuilder &MIRBuilder) const;
146  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
147  MachineIRBuilder &MIRBuilder) const;
148  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
149  MachineIRBuilder &MIRBuilder) const;
150  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
151  const RegisterBank &DstRB, LLT ScalarTy,
152  Register VecReg, unsigned LaneIdx,
153  MachineIRBuilder &MIRBuilder) const;
154 
155  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
156  /// materialized using a FMOV instruction, then update MI and return it.
157  /// Otherwise, do nothing and return a nullptr.
158  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
159  MachineRegisterInfo &MRI) const;
160 
161  /// Emit a CSet for a compare.
162  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
163  MachineIRBuilder &MIRBuilder) const;
164 
165  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
166  // We use these manually instead of using the importer since it doesn't
167  // support SDNodeXForm.
168  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
169  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
170  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
171  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
172 
173  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
174  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
175  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
176 
177  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
178  unsigned Size) const;
179 
180  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
181  return selectAddrModeUnscaled(Root, 1);
182  }
183  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
184  return selectAddrModeUnscaled(Root, 2);
185  }
186  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
187  return selectAddrModeUnscaled(Root, 4);
188  }
189  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
190  return selectAddrModeUnscaled(Root, 8);
191  }
192  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
193  return selectAddrModeUnscaled(Root, 16);
194  }
195 
196  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
197  unsigned Size) const;
198  template <int Width>
199  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
200  return selectAddrModeIndexed(Root, Width / 8);
201  }
202 
203  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
204  const MachineRegisterInfo &MRI) const;
205  ComplexRendererFns
206  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
207  unsigned SizeInBytes) const;
208  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
209  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
210  unsigned SizeInBytes) const;
211 
212  ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
213 
214  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
215  return selectShiftedRegister(Root);
216  }
217 
218  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
219  // TODO: selectShiftedRegister should allow for rotates on logical shifts.
220  // For now, make them the same. The only difference between the two is that
221  // logical shifts are allowed to fold in rotates. Otherwise, these are
222  // functionally the same.
223  return selectShiftedRegister(Root);
224  }
225 
226  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
227  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
228  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
229 
230  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
231  void materializeLargeCMVal(MachineInstr &I, const Value *V,
232  unsigned OpFlags) const;
233 
234  // Optimization methods.
235  bool tryOptVectorShuffle(MachineInstr &I) const;
236  bool tryOptVectorDup(MachineInstr &MI) const;
237  bool tryOptSelect(MachineInstr &MI) const;
238  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
239  MachineOperand &Predicate,
240  MachineIRBuilder &MIRBuilder) const;
241 
242  const AArch64TargetMachine &TM;
243  const AArch64Subtarget &STI;
244  const AArch64InstrInfo &TII;
245  const AArch64RegisterInfo &TRI;
246  const AArch64RegisterBankInfo &RBI;
247 
248  bool ProduceNonFlagSettingCondBr = false;
249 
250 #define GET_GLOBALISEL_PREDICATES_DECL
251 #include "AArch64GenGlobalISel.inc"
252 #undef GET_GLOBALISEL_PREDICATES_DECL
253 
254 // We declare the temporaries used by selectImpl() in the class to minimize the
255 // cost of constructing placeholder values.
256 #define GET_GLOBALISEL_TEMPORARIES_DECL
257 #include "AArch64GenGlobalISel.inc"
258 #undef GET_GLOBALISEL_TEMPORARIES_DECL
259 };
260 
261 } // end anonymous namespace
262 
263 #define GET_GLOBALISEL_IMPL
264 #include "AArch64GenGlobalISel.inc"
265 #undef GET_GLOBALISEL_IMPL
266 
267 AArch64InstructionSelector::AArch64InstructionSelector(
268  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
269  const AArch64RegisterBankInfo &RBI)
270  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
271  TRI(*STI.getRegisterInfo()), RBI(RBI),
273 #include "AArch64GenGlobalISel.inc"
276 #include "AArch64GenGlobalISel.inc"
278 {
279 }
280 
281 // FIXME: This should be target-independent, inferred from the types declared
282 // for each class in the bank.
283 static const TargetRegisterClass *
284 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
285  const RegisterBankInfo &RBI,
286  bool GetAllRegSet = false) {
287  if (RB.getID() == AArch64::GPRRegBankID) {
288  if (Ty.getSizeInBits() <= 32)
289  return GetAllRegSet ? &AArch64::GPR32allRegClass
290  : &AArch64::GPR32RegClass;
291  if (Ty.getSizeInBits() == 64)
292  return GetAllRegSet ? &AArch64::GPR64allRegClass
293  : &AArch64::GPR64RegClass;
294  return nullptr;
295  }
296 
297  if (RB.getID() == AArch64::FPRRegBankID) {
298  if (Ty.getSizeInBits() <= 16)
299  return &AArch64::FPR16RegClass;
300  if (Ty.getSizeInBits() == 32)
301  return &AArch64::FPR32RegClass;
302  if (Ty.getSizeInBits() == 64)
303  return &AArch64::FPR64RegClass;
304  if (Ty.getSizeInBits() == 128)
305  return &AArch64::FPR128RegClass;
306  return nullptr;
307  }
308 
309  return nullptr;
310 }
311 
312 /// Given a register bank, and size in bits, return the smallest register class
313 /// that can represent that combination.
314 static const TargetRegisterClass *
315 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
316  bool GetAllRegSet = false) {
317  unsigned RegBankID = RB.getID();
318 
319  if (RegBankID == AArch64::GPRRegBankID) {
320  if (SizeInBits <= 32)
321  return GetAllRegSet ? &AArch64::GPR32allRegClass
322  : &AArch64::GPR32RegClass;
323  if (SizeInBits == 64)
324  return GetAllRegSet ? &AArch64::GPR64allRegClass
325  : &AArch64::GPR64RegClass;
326  }
327 
328  if (RegBankID == AArch64::FPRRegBankID) {
329  switch (SizeInBits) {
330  default:
331  return nullptr;
332  case 8:
333  return &AArch64::FPR8RegClass;
334  case 16:
335  return &AArch64::FPR16RegClass;
336  case 32:
337  return &AArch64::FPR32RegClass;
338  case 64:
339  return &AArch64::FPR64RegClass;
340  case 128:
341  return &AArch64::FPR128RegClass;
342  }
343  }
344 
345  return nullptr;
346 }
347 
348 /// Returns the correct subregister to use for a given register class.
350  const TargetRegisterInfo &TRI, unsigned &SubReg) {
351  switch (TRI.getRegSizeInBits(*RC)) {
352  case 8:
353  SubReg = AArch64::bsub;
354  break;
355  case 16:
356  SubReg = AArch64::hsub;
357  break;
358  case 32:
359  if (RC == &AArch64::GPR32RegClass)
360  SubReg = AArch64::sub_32;
361  else
362  SubReg = AArch64::ssub;
363  break;
364  case 64:
365  SubReg = AArch64::dsub;
366  break;
367  default:
368  LLVM_DEBUG(
369  dbgs() << "Couldn't find appropriate subregister for register class.");
370  return false;
371  }
372 
373  return true;
374 }
375 
376 /// Check whether \p I is a currently unsupported binary operation:
377 /// - it has an unsized type
378 /// - an operand is not a vreg
379 /// - all operands are not in the same bank
380 /// These are checks that should someday live in the verifier, but right now,
381 /// these are mostly limitations of the aarch64 selector.
382 static bool unsupportedBinOp(const MachineInstr &I,
383  const AArch64RegisterBankInfo &RBI,
384  const MachineRegisterInfo &MRI,
385  const AArch64RegisterInfo &TRI) {
386  LLT Ty = MRI.getType(I.getOperand(0).getReg());
387  if (!Ty.isValid()) {
388  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
389  return true;
390  }
391 
392  const RegisterBank *PrevOpBank = nullptr;
393  for (auto &MO : I.operands()) {
394  // FIXME: Support non-register operands.
395  if (!MO.isReg()) {
396  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
397  return true;
398  }
399 
400  // FIXME: Can generic operations have physical registers operands? If
401  // so, this will need to be taught about that, and we'll need to get the
402  // bank out of the minimal class for the register.
403  // Either way, this needs to be documented (and possibly verified).
404  if (!Register::isVirtualRegister(MO.getReg())) {
405  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
406  return true;
407  }
408 
409  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
410  if (!OpBank) {
411  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
412  return true;
413  }
414 
415  if (PrevOpBank && OpBank != PrevOpBank) {
416  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
417  return true;
418  }
419  PrevOpBank = OpBank;
420  }
421  return false;
422 }
423 
424 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
425 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
426 /// and of size \p OpSize.
427 /// \returns \p GenericOpc if the combination is unsupported.
428 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
429  unsigned OpSize) {
430  switch (RegBankID) {
431  case AArch64::GPRRegBankID:
432  if (OpSize == 32) {
433  switch (GenericOpc) {
434  case TargetOpcode::G_SHL:
435  return AArch64::LSLVWr;
436  case TargetOpcode::G_LSHR:
437  return AArch64::LSRVWr;
438  case TargetOpcode::G_ASHR:
439  return AArch64::ASRVWr;
440  default:
441  return GenericOpc;
442  }
443  } else if (OpSize == 64) {
444  switch (GenericOpc) {
445  case TargetOpcode::G_GEP:
446  return AArch64::ADDXrr;
447  case TargetOpcode::G_SHL:
448  return AArch64::LSLVXr;
449  case TargetOpcode::G_LSHR:
450  return AArch64::LSRVXr;
451  case TargetOpcode::G_ASHR:
452  return AArch64::ASRVXr;
453  default:
454  return GenericOpc;
455  }
456  }
457  break;
458  case AArch64::FPRRegBankID:
459  switch (OpSize) {
460  case 32:
461  switch (GenericOpc) {
462  case TargetOpcode::G_FADD:
463  return AArch64::FADDSrr;
464  case TargetOpcode::G_FSUB:
465  return AArch64::FSUBSrr;
466  case TargetOpcode::G_FMUL:
467  return AArch64::FMULSrr;
468  case TargetOpcode::G_FDIV:
469  return AArch64::FDIVSrr;
470  default:
471  return GenericOpc;
472  }
473  case 64:
474  switch (GenericOpc) {
475  case TargetOpcode::G_FADD:
476  return AArch64::FADDDrr;
477  case TargetOpcode::G_FSUB:
478  return AArch64::FSUBDrr;
479  case TargetOpcode::G_FMUL:
480  return AArch64::FMULDrr;
481  case TargetOpcode::G_FDIV:
482  return AArch64::FDIVDrr;
483  case TargetOpcode::G_OR:
484  return AArch64::ORRv8i8;
485  default:
486  return GenericOpc;
487  }
488  }
489  break;
490  }
491  return GenericOpc;
492 }
493 
494 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
495 /// appropriate for the (value) register bank \p RegBankID and of memory access
496 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
497 /// addressing mode (e.g., LDRXui).
498 /// \returns \p GenericOpc if the combination is unsupported.
499 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
500  unsigned OpSize) {
501  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
502  switch (RegBankID) {
503  case AArch64::GPRRegBankID:
504  switch (OpSize) {
505  case 8:
506  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
507  case 16:
508  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
509  case 32:
510  return isStore ? AArch64::STRWui : AArch64::LDRWui;
511  case 64:
512  return isStore ? AArch64::STRXui : AArch64::LDRXui;
513  }
514  break;
515  case AArch64::FPRRegBankID:
516  switch (OpSize) {
517  case 8:
518  return isStore ? AArch64::STRBui : AArch64::LDRBui;
519  case 16:
520  return isStore ? AArch64::STRHui : AArch64::LDRHui;
521  case 32:
522  return isStore ? AArch64::STRSui : AArch64::LDRSui;
523  case 64:
524  return isStore ? AArch64::STRDui : AArch64::LDRDui;
525  }
526  break;
527  }
528  return GenericOpc;
529 }
530 
531 #ifndef NDEBUG
532 /// Helper function that verifies that we have a valid copy at the end of
533 /// selectCopy. Verifies that the source and dest have the expected sizes and
534 /// then returns true.
535 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
536  const MachineRegisterInfo &MRI,
537  const TargetRegisterInfo &TRI,
538  const RegisterBankInfo &RBI) {
539  const Register DstReg = I.getOperand(0).getReg();
540  const Register SrcReg = I.getOperand(1).getReg();
541  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
542  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
543 
544  // Make sure the size of the source and dest line up.
545  assert(
546  (DstSize == SrcSize ||
547  // Copies are a mean to setup initial types, the number of
548  // bits may not exactly match.
549  (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
550  // Copies are a mean to copy bits around, as long as we are
551  // on the same register class, that's fine. Otherwise, that
552  // means we need some SUBREG_TO_REG or AND & co.
553  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
554  "Copy with different width?!");
555 
556  // Check the size of the destination.
557  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
558  "GPRs cannot get more than 64-bit width values");
559 
560  return true;
561 }
562 #endif
563 
564 /// Helper function for selectCopy. Inserts a subregister copy from
565 /// \p *From to \p *To, linking it up to \p I.
566 ///
567 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
568 ///
569 /// CopyReg (From class) = COPY SrcReg
570 /// SubRegCopy (To class) = COPY CopyReg:SubReg
571 /// Dst = COPY SubRegCopy
573  const RegisterBankInfo &RBI, Register SrcReg,
574  const TargetRegisterClass *From,
575  const TargetRegisterClass *To,
576  unsigned SubReg) {
577  MachineIRBuilder MIB(I);
578  auto Copy = MIB.buildCopy({From}, {SrcReg});
579  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
580  .addReg(Copy.getReg(0), 0, SubReg);
581  MachineOperand &RegOp = I.getOperand(1);
582  RegOp.setReg(SubRegCopy.getReg(0));
583 
584  // It's possible that the destination register won't be constrained. Make
585  // sure that happens.
586  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
587  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
588 
589  return true;
590 }
591 
592 /// Helper function to get the source and destination register classes for a
593 /// copy. Returns a std::pair containing the source register class for the
594 /// copy, and the destination register class for the copy. If a register class
595 /// cannot be determined, then it will be nullptr.
596 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
599  const RegisterBankInfo &RBI) {
600  Register DstReg = I.getOperand(0).getReg();
601  Register SrcReg = I.getOperand(1).getReg();
602  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
603  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
604  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
605  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
606 
607  // Special casing for cross-bank copies of s1s. We can technically represent
608  // a 1-bit value with any size of register. The minimum size for a GPR is 32
609  // bits. So, we need to put the FPR on 32 bits as well.
610  //
611  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
612  // then we can pull it into the helpers that get the appropriate class for a
613  // register bank. Or make a new helper that carries along some constraint
614  // information.
615  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
616  SrcSize = DstSize = 32;
617 
618  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
619  getMinClassForRegBank(DstRegBank, DstSize, true)};
620 }
621 
624  const RegisterBankInfo &RBI) {
625 
626  Register DstReg = I.getOperand(0).getReg();
627  Register SrcReg = I.getOperand(1).getReg();
628  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
629  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
630 
631  // Find the correct register classes for the source and destination registers.
632  const TargetRegisterClass *SrcRC;
633  const TargetRegisterClass *DstRC;
634  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
635 
636  if (!DstRC) {
637  LLVM_DEBUG(dbgs() << "Unexpected dest size "
638  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
639  return false;
640  }
641 
642  // A couple helpers below, for making sure that the copy we produce is valid.
643 
644  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
645  // to verify that the src and dst are the same size, since that's handled by
646  // the SUBREG_TO_REG.
647  bool KnownValid = false;
648 
649  // Returns true, or asserts if something we don't expect happens. Instead of
650  // returning true, we return isValidCopy() to ensure that we verify the
651  // result.
652  auto CheckCopy = [&]() {
653  // If we have a bitcast or something, we can't have physical registers.
654  assert((I.isCopy() ||
655  (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
656  !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
657  "No phys reg on generic operator!");
658  assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
659  (void)KnownValid;
660  return true;
661  };
662 
663  // Is this a copy? If so, then we may need to insert a subregister copy, or
664  // a SUBREG_TO_REG.
665  if (I.isCopy()) {
666  // Yes. Check if there's anything to fix up.
667  if (!SrcRC) {
668  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
669  return false;
670  }
671 
672  // Is this a cross-bank copy?
673  if (DstRegBank.getID() != SrcRegBank.getID()) {
674  // If we're doing a cross-bank copy on different-sized registers, we need
675  // to do a bit more work.
676  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
677  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
678 
679  if (SrcSize > DstSize) {
680  // We're doing a cross-bank copy into a smaller register. We need a
681  // subregister copy. First, get a register class that's on the same bank
682  // as the destination, but the same size as the source.
683  const TargetRegisterClass *SubregRC =
684  getMinClassForRegBank(DstRegBank, SrcSize, true);
685  assert(SubregRC && "Didn't get a register class for subreg?");
686 
687  // Get the appropriate subregister for the destination.
688  unsigned SubReg = 0;
689  if (!getSubRegForClass(DstRC, TRI, SubReg)) {
690  LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
691  return false;
692  }
693 
694  // Now, insert a subregister copy using the new register class.
695  selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
696  return CheckCopy();
697  }
698 
699  else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
700  SrcSize == 16) {
701  // Special case for FPR16 to GPR32.
702  // FIXME: This can probably be generalized like the above case.
703  Register PromoteReg =
704  MRI.createVirtualRegister(&AArch64::FPR32RegClass);
705  BuildMI(*I.getParent(), I, I.getDebugLoc(),
706  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
707  .addImm(0)
708  .addUse(SrcReg)
709  .addImm(AArch64::hsub);
710  MachineOperand &RegOp = I.getOperand(1);
711  RegOp.setReg(PromoteReg);
712 
713  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
714  KnownValid = true;
715  }
716  }
717 
718  // If the destination is a physical register, then there's nothing to
719  // change, so we're done.
720  if (Register::isPhysicalRegister(DstReg))
721  return CheckCopy();
722  }
723 
724  // No need to constrain SrcReg. It will get constrained when we hit another
725  // of its use or its defs. Copies do not have constraints.
726  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
727  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
728  << " operand\n");
729  return false;
730  }
731  I.setDesc(TII.get(AArch64::COPY));
732  return CheckCopy();
733 }
734 
735 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
736  if (!DstTy.isScalar() || !SrcTy.isScalar())
737  return GenericOpc;
738 
739  const unsigned DstSize = DstTy.getSizeInBits();
740  const unsigned SrcSize = SrcTy.getSizeInBits();
741 
742  switch (DstSize) {
743  case 32:
744  switch (SrcSize) {
745  case 32:
746  switch (GenericOpc) {
747  case TargetOpcode::G_SITOFP:
748  return AArch64::SCVTFUWSri;
749  case TargetOpcode::G_UITOFP:
750  return AArch64::UCVTFUWSri;
751  case TargetOpcode::G_FPTOSI:
752  return AArch64::FCVTZSUWSr;
753  case TargetOpcode::G_FPTOUI:
754  return AArch64::FCVTZUUWSr;
755  default:
756  return GenericOpc;
757  }
758  case 64:
759  switch (GenericOpc) {
760  case TargetOpcode::G_SITOFP:
761  return AArch64::SCVTFUXSri;
762  case TargetOpcode::G_UITOFP:
763  return AArch64::UCVTFUXSri;
764  case TargetOpcode::G_FPTOSI:
765  return AArch64::FCVTZSUWDr;
766  case TargetOpcode::G_FPTOUI:
767  return AArch64::FCVTZUUWDr;
768  default:
769  return GenericOpc;
770  }
771  default:
772  return GenericOpc;
773  }
774  case 64:
775  switch (SrcSize) {
776  case 32:
777  switch (GenericOpc) {
778  case TargetOpcode::G_SITOFP:
779  return AArch64::SCVTFUWDri;
780  case TargetOpcode::G_UITOFP:
781  return AArch64::UCVTFUWDri;
782  case TargetOpcode::G_FPTOSI:
783  return AArch64::FCVTZSUXSr;
784  case TargetOpcode::G_FPTOUI:
785  return AArch64::FCVTZUUXSr;
786  default:
787  return GenericOpc;
788  }
789  case 64:
790  switch (GenericOpc) {
791  case TargetOpcode::G_SITOFP:
792  return AArch64::SCVTFUXDri;
793  case TargetOpcode::G_UITOFP:
794  return AArch64::UCVTFUXDri;
795  case TargetOpcode::G_FPTOSI:
796  return AArch64::FCVTZSUXDr;
797  case TargetOpcode::G_FPTOUI:
798  return AArch64::FCVTZUUXDr;
799  default:
800  return GenericOpc;
801  }
802  default:
803  return GenericOpc;
804  }
805  default:
806  return GenericOpc;
807  };
808  return GenericOpc;
809 }
810 
812  const RegisterBankInfo &RBI) {
814  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
815  AArch64::GPRRegBankID);
816  LLT Ty = MRI.getType(I.getOperand(0).getReg());
817  if (Ty == LLT::scalar(32))
818  return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
819  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
820  return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
821  return 0;
822 }
823 
824 /// Helper function to select the opcode for a G_FCMP.
826  // If this is a compare against +0.0, then we don't have to explicitly
827  // materialize a constant.
828  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
829  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
830  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
831  if (OpSize != 32 && OpSize != 64)
832  return 0;
833  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
834  {AArch64::FCMPSri, AArch64::FCMPDri}};
835  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
836 }
837 
838 /// Returns true if \p P is an unsigned integer comparison predicate.
840  switch (P) {
841  default:
842  return false;
843  case CmpInst::ICMP_UGT:
844  case CmpInst::ICMP_UGE:
845  case CmpInst::ICMP_ULT:
846  case CmpInst::ICMP_ULE:
847  return true;
848  }
849 }
850 
852  switch (P) {
853  default:
854  llvm_unreachable("Unknown condition code!");
855  case CmpInst::ICMP_NE:
856  return AArch64CC::NE;
857  case CmpInst::ICMP_EQ:
858  return AArch64CC::EQ;
859  case CmpInst::ICMP_SGT:
860  return AArch64CC::GT;
861  case CmpInst::ICMP_SGE:
862  return AArch64CC::GE;
863  case CmpInst::ICMP_SLT:
864  return AArch64CC::LT;
865  case CmpInst::ICMP_SLE:
866  return AArch64CC::LE;
867  case CmpInst::ICMP_UGT:
868  return AArch64CC::HI;
869  case CmpInst::ICMP_UGE:
870  return AArch64CC::HS;
871  case CmpInst::ICMP_ULT:
872  return AArch64CC::LO;
873  case CmpInst::ICMP_ULE:
874  return AArch64CC::LS;
875  }
876 }
877 
880  AArch64CC::CondCode &CondCode2) {
881  CondCode2 = AArch64CC::AL;
882  switch (P) {
883  default:
884  llvm_unreachable("Unknown FP condition!");
885  case CmpInst::FCMP_OEQ:
886  CondCode = AArch64CC::EQ;
887  break;
888  case CmpInst::FCMP_OGT:
889  CondCode = AArch64CC::GT;
890  break;
891  case CmpInst::FCMP_OGE:
892  CondCode = AArch64CC::GE;
893  break;
894  case CmpInst::FCMP_OLT:
895  CondCode = AArch64CC::MI;
896  break;
897  case CmpInst::FCMP_OLE:
898  CondCode = AArch64CC::LS;
899  break;
900  case CmpInst::FCMP_ONE:
901  CondCode = AArch64CC::MI;
902  CondCode2 = AArch64CC::GT;
903  break;
904  case CmpInst::FCMP_ORD:
905  CondCode = AArch64CC::VC;
906  break;
907  case CmpInst::FCMP_UNO:
908  CondCode = AArch64CC::VS;
909  break;
910  case CmpInst::FCMP_UEQ:
911  CondCode = AArch64CC::EQ;
912  CondCode2 = AArch64CC::VS;
913  break;
914  case CmpInst::FCMP_UGT:
915  CondCode = AArch64CC::HI;
916  break;
917  case CmpInst::FCMP_UGE:
918  CondCode = AArch64CC::PL;
919  break;
920  case CmpInst::FCMP_ULT:
921  CondCode = AArch64CC::LT;
922  break;
923  case CmpInst::FCMP_ULE:
924  CondCode = AArch64CC::LE;
925  break;
926  case CmpInst::FCMP_UNE:
927  CondCode = AArch64CC::NE;
928  break;
929  }
930 }
931 
932 bool AArch64InstructionSelector::selectCompareBranch(
934 
935  const Register CondReg = I.getOperand(0).getReg();
936  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
937  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
938  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
939  CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
940  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
941  return false;
942 
943  Register LHS = CCMI->getOperand(2).getReg();
944  Register RHS = CCMI->getOperand(3).getReg();
945  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
946  if (!VRegAndVal)
947  std::swap(RHS, LHS);
948 
949  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
950  if (!VRegAndVal || VRegAndVal->Value != 0) {
951  MachineIRBuilder MIB(I);
952  // If we can't select a CBZ then emit a cmp + Bcc.
953  if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
954  CCMI->getOperand(1), MIB))
955  return false;
958  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
959  I.eraseFromParent();
960  return true;
961  }
962 
963  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
964  if (RB.getID() != AArch64::GPRRegBankID)
965  return false;
966 
967  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
968  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
969  return false;
970 
971  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
972  unsigned CBOpc = 0;
973  if (CmpWidth <= 32)
974  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
975  else if (CmpWidth == 64)
976  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
977  else
978  return false;
979 
980  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
981  .addUse(LHS)
982  .addMBB(DestMBB)
983  .constrainAllUses(TII, TRI, RBI);
984 
985  I.eraseFromParent();
986  return true;
987 }
988 
989 bool AArch64InstructionSelector::selectVectorSHL(
990  MachineInstr &I, MachineRegisterInfo &MRI) const {
991  assert(I.getOpcode() == TargetOpcode::G_SHL);
992  Register DstReg = I.getOperand(0).getReg();
993  const LLT Ty = MRI.getType(DstReg);
994  Register Src1Reg = I.getOperand(1).getReg();
995  Register Src2Reg = I.getOperand(2).getReg();
996 
997  if (!Ty.isVector())
998  return false;
999 
1000  unsigned Opc = 0;
1001  if (Ty == LLT::vector(4, 32)) {
1002  Opc = AArch64::USHLv4i32;
1003  } else if (Ty == LLT::vector(2, 32)) {
1004  Opc = AArch64::USHLv2i32;
1005  } else {
1006  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1007  return false;
1008  }
1009 
1010  MachineIRBuilder MIB(I);
1011  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
1013  I.eraseFromParent();
1014  return true;
1015 }
1016 
1017 bool AArch64InstructionSelector::selectVectorASHR(
1018  MachineInstr &I, MachineRegisterInfo &MRI) const {
1019  assert(I.getOpcode() == TargetOpcode::G_ASHR);
1020  Register DstReg = I.getOperand(0).getReg();
1021  const LLT Ty = MRI.getType(DstReg);
1022  Register Src1Reg = I.getOperand(1).getReg();
1023  Register Src2Reg = I.getOperand(2).getReg();
1024 
1025  if (!Ty.isVector())
1026  return false;
1027 
1028  // There is not a shift right register instruction, but the shift left
1029  // register instruction takes a signed value, where negative numbers specify a
1030  // right shift.
1031 
1032  unsigned Opc = 0;
1033  unsigned NegOpc = 0;
1034  const TargetRegisterClass *RC = nullptr;
1035  if (Ty == LLT::vector(4, 32)) {
1036  Opc = AArch64::SSHLv4i32;
1037  NegOpc = AArch64::NEGv4i32;
1038  RC = &AArch64::FPR128RegClass;
1039  } else if (Ty == LLT::vector(2, 32)) {
1040  Opc = AArch64::SSHLv2i32;
1041  NegOpc = AArch64::NEGv2i32;
1042  RC = &AArch64::FPR64RegClass;
1043  } else {
1044  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1045  return false;
1046  }
1047 
1048  MachineIRBuilder MIB(I);
1049  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1051  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1053  I.eraseFromParent();
1054  return true;
1055 }
1056 
1057 bool AArch64InstructionSelector::selectVaStartAAPCS(
1058  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1059  return false;
1060 }
1061 
1062 bool AArch64InstructionSelector::selectVaStartDarwin(
1063  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1065  Register ListReg = I.getOperand(0).getReg();
1066 
1067  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1068 
1069  auto MIB =
1070  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1071  .addDef(ArgsAddrReg)
1072  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1073  .addImm(0)
1074  .addImm(0);
1075 
1077 
1078  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1079  .addUse(ArgsAddrReg)
1080  .addUse(ListReg)
1081  .addImm(0)
1083 
1085  I.eraseFromParent();
1086  return true;
1087 }
1088 
1089 void AArch64InstructionSelector::materializeLargeCMVal(
1090  MachineInstr &I, const Value *V, unsigned OpFlags) const {
1091  MachineBasicBlock &MBB = *I.getParent();
1092  MachineFunction &MF = *MBB.getParent();
1093  MachineRegisterInfo &MRI = MF.getRegInfo();
1094  MachineIRBuilder MIB(I);
1095 
1096  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1097  MovZ->addOperand(MF, I.getOperand(1));
1098  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1100  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1102 
1103  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1104  Register ForceDstReg) {
1105  Register DstReg = ForceDstReg
1106  ? ForceDstReg
1107  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1108  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1109  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1110  MovI->addOperand(MF, MachineOperand::CreateGA(
1111  GV, MovZ->getOperand(1).getOffset(), Flags));
1112  } else {
1113  MovI->addOperand(
1114  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1115  MovZ->getOperand(1).getOffset(), Flags));
1116  }
1117  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1119  return DstReg;
1120  };
1121  Register DstReg = BuildMovK(MovZ.getReg(0),
1123  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1124  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1125  return;
1126 }
1127 
1128 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1129  MachineBasicBlock &MBB = *I.getParent();
1130  MachineFunction &MF = *MBB.getParent();
1131  MachineRegisterInfo &MRI = MF.getRegInfo();
1132 
1133  switch (I.getOpcode()) {
1134  case TargetOpcode::G_SHL:
1135  case TargetOpcode::G_ASHR:
1136  case TargetOpcode::G_LSHR: {
1137  // These shifts are legalized to have 64 bit shift amounts because we want
1138  // to take advantage of the existing imported selection patterns that assume
1139  // the immediates are s64s. However, if the shifted type is 32 bits and for
1140  // some reason we receive input GMIR that has an s64 shift amount that's not
1141  // a G_CONSTANT, insert a truncate so that we can still select the s32
1142  // register-register variant.
1143  Register SrcReg = I.getOperand(1).getReg();
1144  Register ShiftReg = I.getOperand(2).getReg();
1145  const LLT ShiftTy = MRI.getType(ShiftReg);
1146  const LLT SrcTy = MRI.getType(SrcReg);
1147  if (SrcTy.isVector())
1148  return;
1149  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1150  if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1151  return;
1152  auto *AmtMI = MRI.getVRegDef(ShiftReg);
1153  assert(AmtMI && "could not find a vreg definition for shift amount");
1154  if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1155  // Insert a subregister copy to implement a 64->32 trunc
1156  MachineIRBuilder MIB(I);
1157  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1158  .addReg(ShiftReg, 0, AArch64::sub_32);
1159  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1160  I.getOperand(2).setReg(Trunc.getReg(0));
1161  }
1162  return;
1163  }
1164  case TargetOpcode::G_STORE:
1165  contractCrossBankCopyIntoStore(I, MRI);
1166  return;
1167  default:
1168  return;
1169  }
1170 }
1171 
1172 bool AArch64InstructionSelector::earlySelectSHL(
1173  MachineInstr &I, MachineRegisterInfo &MRI) const {
1174  // We try to match the immediate variant of LSL, which is actually an alias
1175  // for a special case of UBFM. Otherwise, we fall back to the imported
1176  // selector which will match the register variant.
1177  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1178  const auto &MO = I.getOperand(2);
1179  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1180  if (!VRegAndVal)
1181  return false;
1182 
1183  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1184  if (DstTy.isVector())
1185  return false;
1186  bool Is64Bit = DstTy.getSizeInBits() == 64;
1187  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1188  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1189  MachineIRBuilder MIB(I);
1190 
1191  if (!Imm1Fn || !Imm2Fn)
1192  return false;
1193 
1194  auto NewI =
1195  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1196  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1197 
1198  for (auto &RenderFn : *Imm1Fn)
1199  RenderFn(NewI);
1200  for (auto &RenderFn : *Imm2Fn)
1201  RenderFn(NewI);
1202 
1203  I.eraseFromParent();
1204  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1205 }
1206 
1207 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1208  MachineInstr &I, MachineRegisterInfo &MRI) const {
1209  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1210  // If we're storing a scalar, it doesn't matter what register bank that
1211  // scalar is on. All that matters is the size.
1212  //
1213  // So, if we see something like this (with a 32-bit scalar as an example):
1214  //
1215  // %x:gpr(s32) = ... something ...
1216  // %y:fpr(s32) = COPY %x:gpr(s32)
1217  // G_STORE %y:fpr(s32)
1218  //
1219  // We can fix this up into something like this:
1220  //
1221  // G_STORE %x:gpr(s32)
1222  //
1223  // And then continue the selection process normally.
1225  if (!Def)
1226  return;
1227  Register DefDstReg = Def->getOperand(0).getReg();
1228  LLT DefDstTy = MRI.getType(DefDstReg);
1229  Register StoreSrcReg = I.getOperand(0).getReg();
1230  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1231 
1232  // If we get something strange like a physical register, then we shouldn't
1233  // go any further.
1234  if (!DefDstTy.isValid())
1235  return;
1236 
1237  // Are the source and dst types the same size?
1238  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1239  return;
1240 
1241  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1242  RBI.getRegBank(DefDstReg, MRI, TRI))
1243  return;
1244 
1245  // We have a cross-bank copy, which is entering a store. Let's fold it.
1246  I.getOperand(0).setReg(DefDstReg);
1247 }
1248 
1249 bool AArch64InstructionSelector::earlySelectLoad(
1250  MachineInstr &I, MachineRegisterInfo &MRI) const {
1251  // Try to fold in shifts, etc into the addressing mode of a load.
1252  assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1253 
1254  // Don't handle atomic loads/stores yet.
1255  auto &MemOp = **I.memoperands_begin();
1256  if (MemOp.isAtomic()) {
1257  LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1258  return false;
1259  }
1260 
1261  unsigned MemBytes = MemOp.getSize();
1262 
1263  // Only support 64-bit loads for now.
1264  if (MemBytes != 8)
1265  return false;
1266 
1267  Register DstReg = I.getOperand(0).getReg();
1268  const LLT DstTy = MRI.getType(DstReg);
1269  // Don't handle vectors.
1270  if (DstTy.isVector())
1271  return false;
1272 
1273  unsigned DstSize = DstTy.getSizeInBits();
1274  // TODO: 32-bit destinations.
1275  if (DstSize != 64)
1276  return false;
1277 
1278  // Check if we can do any folding from GEPs/shifts etc. into the load.
1279  auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
1280  if (!ImmFn)
1281  return false;
1282 
1283  // We can fold something. Emit the load here.
1284  MachineIRBuilder MIB(I);
1285 
1286  // Choose the instruction based off the size of the element being loaded, and
1287  // whether or not we're loading into a FPR.
1288  const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1289  unsigned Opc =
1290  RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
1291  // Construct the load.
1292  auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1293  for (auto &RenderFn : *ImmFn)
1294  RenderFn(LoadMI);
1295  LoadMI.addMemOperand(*I.memoperands_begin());
1296  I.eraseFromParent();
1297  return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1298 }
1299 
1300 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1301  assert(I.getParent() && "Instruction should be in a basic block!");
1302  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1303 
1304  MachineBasicBlock &MBB = *I.getParent();
1305  MachineFunction &MF = *MBB.getParent();
1306  MachineRegisterInfo &MRI = MF.getRegInfo();
1307 
1308  switch (I.getOpcode()) {
1309  case TargetOpcode::G_SHL:
1310  return earlySelectSHL(I, MRI);
1311  case TargetOpcode::G_LOAD:
1312  return earlySelectLoad(I, MRI);
1313  case TargetOpcode::G_CONSTANT: {
1314  bool IsZero = false;
1315  if (I.getOperand(1).isCImm())
1316  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1317  else if (I.getOperand(1).isImm())
1318  IsZero = I.getOperand(1).getImm() == 0;
1319 
1320  if (!IsZero)
1321  return false;
1322 
1323  Register DefReg = I.getOperand(0).getReg();
1324  LLT Ty = MRI.getType(DefReg);
1325  if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1326  return false;
1327 
1328  if (Ty == LLT::scalar(64)) {
1329  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1330  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1331  } else {
1332  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1333  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1334  }
1335  I.setDesc(TII.get(TargetOpcode::COPY));
1336  return true;
1337  }
1338  default:
1339  return false;
1340  }
1341 }
1342 
1343 bool AArch64InstructionSelector::select(MachineInstr &I) {
1344  assert(I.getParent() && "Instruction should be in a basic block!");
1345  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1346 
1347  MachineBasicBlock &MBB = *I.getParent();
1348  MachineFunction &MF = *MBB.getParent();
1349  MachineRegisterInfo &MRI = MF.getRegInfo();
1350 
1351  unsigned Opcode = I.getOpcode();
1352  // G_PHI requires same handling as PHI
1353  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1354  // Certain non-generic instructions also need some special handling.
1355 
1356  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1357  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1358 
1359  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1360  const Register DefReg = I.getOperand(0).getReg();
1361  const LLT DefTy = MRI.getType(DefReg);
1362 
1363  const RegClassOrRegBank &RegClassOrBank =
1364  MRI.getRegClassOrRegBank(DefReg);
1365 
1366  const TargetRegisterClass *DefRC
1367  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1368  if (!DefRC) {
1369  if (!DefTy.isValid()) {
1370  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1371  return false;
1372  }
1373  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1374  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1375  if (!DefRC) {
1376  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1377  return false;
1378  }
1379  }
1380 
1381  I.setDesc(TII.get(TargetOpcode::PHI));
1382 
1383  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1384  }
1385 
1386  if (I.isCopy())
1387  return selectCopy(I, TII, MRI, TRI, RBI);
1388 
1389  return true;
1390  }
1391 
1392 
1393  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1394  LLVM_DEBUG(
1395  dbgs() << "Generic instruction has unexpected implicit operands\n");
1396  return false;
1397  }
1398 
1399  // Try to do some lowering before we start instruction selecting. These
1400  // lowerings are purely transformations on the input G_MIR and so selection
1401  // must continue after any modification of the instruction.
1402  preISelLower(I);
1403 
1404  // There may be patterns where the importer can't deal with them optimally,
1405  // but does select it to a suboptimal sequence so our custom C++ selection
1406  // code later never has a chance to work on it. Therefore, we have an early
1407  // selection attempt here to give priority to certain selection routines
1408  // over the imported ones.
1409  if (earlySelect(I))
1410  return true;
1411 
1412  if (selectImpl(I, *CoverageInfo))
1413  return true;
1414 
1415  LLT Ty =
1416  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1417 
1418  MachineIRBuilder MIB(I);
1419 
1420  switch (Opcode) {
1421  case TargetOpcode::G_BRCOND: {
1422  if (Ty.getSizeInBits() > 32) {
1423  // We shouldn't need this on AArch64, but it would be implemented as an
1424  // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1425  // bit being tested is < 32.
1426  LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1427  << ", expected at most 32-bits");
1428  return false;
1429  }
1430 
1431  const Register CondReg = I.getOperand(0).getReg();
1432  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1433 
1434  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1435  // instructions will not be produced, as they are conditional branch
1436  // instructions that do not set flags.
1437  bool ProduceNonFlagSettingCondBr =
1438  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1439  if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1440  return true;
1441 
1442  if (ProduceNonFlagSettingCondBr) {
1443  auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1444  .addUse(CondReg)
1445  .addImm(/*bit offset=*/0)
1446  .addMBB(DestMBB);
1447 
1448  I.eraseFromParent();
1449  return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1450  } else {
1451  auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1452  .addDef(AArch64::WZR)
1453  .addUse(CondReg)
1454  .addImm(1);
1455  constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1456  auto Bcc =
1457  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1458  .addImm(AArch64CC::EQ)
1459  .addMBB(DestMBB);
1460 
1461  I.eraseFromParent();
1462  return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1463  }
1464  }
1465 
1466  case TargetOpcode::G_BRINDIRECT: {
1467  I.setDesc(TII.get(AArch64::BR));
1468  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1469  }
1470 
1471  case TargetOpcode::G_BRJT:
1472  return selectBrJT(I, MRI);
1473 
1474  case TargetOpcode::G_BSWAP: {
1475  // Handle vector types for G_BSWAP directly.
1476  Register DstReg = I.getOperand(0).getReg();
1477  LLT DstTy = MRI.getType(DstReg);
1478 
1479  // We should only get vector types here; everything else is handled by the
1480  // importer right now.
1481  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1482  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1483  return false;
1484  }
1485 
1486  // Only handle 4 and 2 element vectors for now.
1487  // TODO: 16-bit elements.
1488  unsigned NumElts = DstTy.getNumElements();
1489  if (NumElts != 4 && NumElts != 2) {
1490  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1491  return false;
1492  }
1493 
1494  // Choose the correct opcode for the supported types. Right now, that's
1495  // v2s32, v4s32, and v2s64.
1496  unsigned Opc = 0;
1497  unsigned EltSize = DstTy.getElementType().getSizeInBits();
1498  if (EltSize == 32)
1499  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1500  : AArch64::REV32v16i8;
1501  else if (EltSize == 64)
1502  Opc = AArch64::REV64v16i8;
1503 
1504  // We should always get something by the time we get here...
1505  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1506 
1507  I.setDesc(TII.get(Opc));
1508  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1509  }
1510 
1511  case TargetOpcode::G_FCONSTANT:
1512  case TargetOpcode::G_CONSTANT: {
1513  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1514 
1515  const LLT s8 = LLT::scalar(8);
1516  const LLT s16 = LLT::scalar(16);
1517  const LLT s32 = LLT::scalar(32);
1518  const LLT s64 = LLT::scalar(64);
1519  const LLT p0 = LLT::pointer(0, 64);
1520 
1521  const Register DefReg = I.getOperand(0).getReg();
1522  const LLT DefTy = MRI.getType(DefReg);
1523  const unsigned DefSize = DefTy.getSizeInBits();
1524  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1525 
1526  // FIXME: Redundant check, but even less readable when factored out.
1527  if (isFP) {
1528  if (Ty != s32 && Ty != s64) {
1529  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1530  << " constant, expected: " << s32 << " or " << s64
1531  << '\n');
1532  return false;
1533  }
1534 
1535  if (RB.getID() != AArch64::FPRRegBankID) {
1536  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1537  << " constant on bank: " << RB
1538  << ", expected: FPR\n");
1539  return false;
1540  }
1541 
1542  // The case when we have 0.0 is covered by tablegen. Reject it here so we
1543  // can be sure tablegen works correctly and isn't rescued by this code.
1544  if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1545  return false;
1546  } else {
1547  // s32 and s64 are covered by tablegen.
1548  if (Ty != p0 && Ty != s8 && Ty != s16) {
1549  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1550  << " constant, expected: " << s32 << ", " << s64
1551  << ", or " << p0 << '\n');
1552  return false;
1553  }
1554 
1555  if (RB.getID() != AArch64::GPRRegBankID) {
1556  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1557  << " constant on bank: " << RB
1558  << ", expected: GPR\n");
1559  return false;
1560  }
1561  }
1562 
1563  // We allow G_CONSTANT of types < 32b.
1564  const unsigned MovOpc =
1565  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1566 
1567  if (isFP) {
1568  // Either emit a FMOV, or emit a copy to emit a normal mov.
1569  const TargetRegisterClass &GPRRC =
1570  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1571  const TargetRegisterClass &FPRRC =
1572  DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1573 
1574  // Can we use a FMOV instruction to represent the immediate?
1575  if (emitFMovForFConstant(I, MRI))
1576  return true;
1577 
1578  // Nope. Emit a copy and use a normal mov instead.
1579  const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1580  MachineOperand &RegOp = I.getOperand(0);
1581  RegOp.setReg(DefGPRReg);
1582  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1583  MIB.buildCopy({DefReg}, {DefGPRReg});
1584 
1585  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1586  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1587  return false;
1588  }
1589 
1590  MachineOperand &ImmOp = I.getOperand(1);
1591  // FIXME: Is going through int64_t always correct?
1592  ImmOp.ChangeToImmediate(
1594  } else if (I.getOperand(1).isCImm()) {
1595  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1596  I.getOperand(1).ChangeToImmediate(Val);
1597  } else if (I.getOperand(1).isImm()) {
1598  uint64_t Val = I.getOperand(1).getImm();
1599  I.getOperand(1).ChangeToImmediate(Val);
1600  }
1601 
1602  I.setDesc(TII.get(MovOpc));
1604  return true;
1605  }
1606  case TargetOpcode::G_EXTRACT: {
1607  Register DstReg = I.getOperand(0).getReg();
1608  Register SrcReg = I.getOperand(1).getReg();
1609  LLT SrcTy = MRI.getType(SrcReg);
1610  LLT DstTy = MRI.getType(DstReg);
1611  (void)DstTy;
1612  unsigned SrcSize = SrcTy.getSizeInBits();
1613 
1614  if (SrcTy.getSizeInBits() > 64) {
1615  // This should be an extract of an s128, which is like a vector extract.
1616  if (SrcTy.getSizeInBits() != 128)
1617  return false;
1618  // Only support extracting 64 bits from an s128 at the moment.
1619  if (DstTy.getSizeInBits() != 64)
1620  return false;
1621 
1622  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1623  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1624  // Check we have the right regbank always.
1625  assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1626  DstRB.getID() == AArch64::FPRRegBankID &&
1627  "Wrong extract regbank!");
1628  (void)SrcRB;
1629 
1630  // Emit the same code as a vector extract.
1631  // Offset must be a multiple of 64.
1632  unsigned Offset = I.getOperand(2).getImm();
1633  if (Offset % 64 != 0)
1634  return false;
1635  unsigned LaneIdx = Offset / 64;
1636  MachineIRBuilder MIB(I);
1637  MachineInstr *Extract = emitExtractVectorElt(
1638  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1639  if (!Extract)
1640  return false;
1641  I.eraseFromParent();
1642  return true;
1643  }
1644 
1645  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1647  Ty.getSizeInBits() - 1);
1648 
1649  if (SrcSize < 64) {
1650  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1651  "unexpected G_EXTRACT types");
1652  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1653  }
1654 
1655  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1656  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1657  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1658  .addReg(DstReg, 0, AArch64::sub_32);
1660  AArch64::GPR32RegClass, MRI);
1661  I.getOperand(0).setReg(DstReg);
1662 
1663  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1664  }
1665 
1666  case TargetOpcode::G_INSERT: {
1667  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1668  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1669  unsigned DstSize = DstTy.getSizeInBits();
1670  // Larger inserts are vectors, same-size ones should be something else by
1671  // now (split up or turned into COPYs).
1672  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1673  return false;
1674 
1675  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1676  unsigned LSB = I.getOperand(3).getImm();
1677  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1678  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1679  MachineInstrBuilder(MF, I).addImm(Width - 1);
1680 
1681  if (DstSize < 64) {
1682  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1683  "unexpected G_INSERT types");
1684  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1685  }
1686 
1687  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1688  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1689  TII.get(AArch64::SUBREG_TO_REG))
1690  .addDef(SrcReg)
1691  .addImm(0)
1692  .addUse(I.getOperand(2).getReg())
1693  .addImm(AArch64::sub_32);
1695  AArch64::GPR32RegClass, MRI);
1696  I.getOperand(2).setReg(SrcReg);
1697 
1698  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1699  }
1700  case TargetOpcode::G_FRAME_INDEX: {
1701  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1702  if (Ty != LLT::pointer(0, 64)) {
1703  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1704  << ", expected: " << LLT::pointer(0, 64) << '\n');
1705  return false;
1706  }
1707  I.setDesc(TII.get(AArch64::ADDXri));
1708 
1709  // MOs for a #0 shifted immediate.
1710  I.addOperand(MachineOperand::CreateImm(0));
1711  I.addOperand(MachineOperand::CreateImm(0));
1712 
1713  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1714  }
1715 
1716  case TargetOpcode::G_GLOBAL_VALUE: {
1717  auto GV = I.getOperand(1).getGlobal();
1718  if (GV->isThreadLocal())
1719  return selectTLSGlobalValue(I, MRI);
1720 
1721  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1722  if (OpFlags & AArch64II::MO_GOT) {
1723  I.setDesc(TII.get(AArch64::LOADgot));
1724  I.getOperand(1).setTargetFlags(OpFlags);
1725  } else if (TM.getCodeModel() == CodeModel::Large) {
1726  // Materialize the global using movz/movk instructions.
1727  materializeLargeCMVal(I, GV, OpFlags);
1728  I.eraseFromParent();
1729  return true;
1730  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1731  I.setDesc(TII.get(AArch64::ADR));
1732  I.getOperand(1).setTargetFlags(OpFlags);
1733  } else {
1734  I.setDesc(TII.get(AArch64::MOVaddr));
1736  MachineInstrBuilder MIB(MF, I);
1737  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1739  }
1740  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1741  }
1742 
1743  case TargetOpcode::G_ZEXTLOAD:
1744  case TargetOpcode::G_LOAD:
1745  case TargetOpcode::G_STORE: {
1746  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1747  MachineIRBuilder MIB(I);
1748 
1749  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1750 
1751  if (PtrTy != LLT::pointer(0, 64)) {
1752  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1753  << ", expected: " << LLT::pointer(0, 64) << '\n');
1754  return false;
1755  }
1756 
1757  auto &MemOp = **I.memoperands_begin();
1758  if (MemOp.isAtomic()) {
1759  // For now we just support s8 acquire loads to be able to compile stack
1760  // protector code.
1761  if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1762  MemOp.getSize() == 1) {
1763  I.setDesc(TII.get(AArch64::LDARB));
1764  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1765  }
1766  LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1767  return false;
1768  }
1769  unsigned MemSizeInBits = MemOp.getSize() * 8;
1770 
1771  const Register PtrReg = I.getOperand(1).getReg();
1772 #ifndef NDEBUG
1773  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1774  // Sanity-check the pointer register.
1775  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1776  "Load/Store pointer operand isn't a GPR");
1777  assert(MRI.getType(PtrReg).isPointer() &&
1778  "Load/Store pointer operand isn't a pointer");
1779 #endif
1780 
1781  const Register ValReg = I.getOperand(0).getReg();
1782  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1783 
1784  const unsigned NewOpc =
1785  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1786  if (NewOpc == I.getOpcode())
1787  return false;
1788 
1789  I.setDesc(TII.get(NewOpc));
1790 
1791  uint64_t Offset = 0;
1792  auto *PtrMI = MRI.getVRegDef(PtrReg);
1793 
1794  // Try to fold a GEP into our unsigned immediate addressing mode.
1795  if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1796  if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1797  int64_t Imm = *COff;
1798  const unsigned Size = MemSizeInBits / 8;
1799  const unsigned Scale = Log2_32(Size);
1800  if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1801  Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1802  I.getOperand(1).setReg(Ptr2Reg);
1803  PtrMI = MRI.getVRegDef(Ptr2Reg);
1804  Offset = Imm / Size;
1805  }
1806  }
1807  }
1808 
1809  // If we haven't folded anything into our addressing mode yet, try to fold
1810  // a frame index into the base+offset.
1811  if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1812  I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1813 
1814  I.addOperand(MachineOperand::CreateImm(Offset));
1815 
1816  // If we're storing a 0, use WZR/XZR.
1817  if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1818  if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1819  if (I.getOpcode() == AArch64::STRWui)
1820  I.getOperand(0).setReg(AArch64::WZR);
1821  else if (I.getOpcode() == AArch64::STRXui)
1822  I.getOperand(0).setReg(AArch64::XZR);
1823  }
1824  }
1825 
1826  if (IsZExtLoad) {
1827  // The zextload from a smaller type to i32 should be handled by the importer.
1828  if (MRI.getType(ValReg).getSizeInBits() != 64)
1829  return false;
1830  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1831  //and zero_extend with SUBREG_TO_REG.
1832  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1833  Register DstReg = I.getOperand(0).getReg();
1834  I.getOperand(0).setReg(LdReg);
1835 
1836  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1837  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1838  .addImm(0)
1839  .addUse(LdReg)
1840  .addImm(AArch64::sub_32);
1842  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1843  MRI);
1844  }
1845  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1846  }
1847 
1848  case TargetOpcode::G_SMULH:
1849  case TargetOpcode::G_UMULH: {
1850  // Reject the various things we don't support yet.
1851  if (unsupportedBinOp(I, RBI, MRI, TRI))
1852  return false;
1853 
1854  const Register DefReg = I.getOperand(0).getReg();
1855  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1856 
1857  if (RB.getID() != AArch64::GPRRegBankID) {
1858  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1859  return false;
1860  }
1861 
1862  if (Ty != LLT::scalar(64)) {
1863  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1864  << ", expected: " << LLT::scalar(64) << '\n');
1865  return false;
1866  }
1867 
1868  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1869  : AArch64::UMULHrr;
1870  I.setDesc(TII.get(NewOpc));
1871 
1872  // Now that we selected an opcode, we need to constrain the register
1873  // operands to use appropriate classes.
1874  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1875  }
1876  case TargetOpcode::G_FADD:
1877  case TargetOpcode::G_FSUB:
1878  case TargetOpcode::G_FMUL:
1879  case TargetOpcode::G_FDIV:
1880 
1881  case TargetOpcode::G_ASHR:
1882  if (MRI.getType(I.getOperand(0).getReg()).isVector())
1883  return selectVectorASHR(I, MRI);
1885  case TargetOpcode::G_SHL:
1886  if (Opcode == TargetOpcode::G_SHL &&
1887  MRI.getType(I.getOperand(0).getReg()).isVector())
1888  return selectVectorSHL(I, MRI);
1890  case TargetOpcode::G_OR:
1891  case TargetOpcode::G_LSHR: {
1892  // Reject the various things we don't support yet.
1893  if (unsupportedBinOp(I, RBI, MRI, TRI))
1894  return false;
1895 
1896  const unsigned OpSize = Ty.getSizeInBits();
1897 
1898  const Register DefReg = I.getOperand(0).getReg();
1899  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1900 
1901  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1902  if (NewOpc == I.getOpcode())
1903  return false;
1904 
1905  I.setDesc(TII.get(NewOpc));
1906  // FIXME: Should the type be always reset in setDesc?
1907 
1908  // Now that we selected an opcode, we need to constrain the register
1909  // operands to use appropriate classes.
1910  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1911  }
1912 
1913  case TargetOpcode::G_GEP: {
1914  MachineIRBuilder MIRBuilder(I);
1915  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1916  MIRBuilder);
1917  I.eraseFromParent();
1918  return true;
1919  }
1920  case TargetOpcode::G_UADDO: {
1921  // TODO: Support other types.
1922  unsigned OpSize = Ty.getSizeInBits();
1923  if (OpSize != 32 && OpSize != 64) {
1924  LLVM_DEBUG(
1925  dbgs()
1926  << "G_UADDO currently only supported for 32 and 64 b types.\n");
1927  return false;
1928  }
1929 
1930  // TODO: Support vectors.
1931  if (Ty.isVector()) {
1932  LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1933  return false;
1934  }
1935 
1936  // Add and set the set condition flag.
1937  unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1938  MachineIRBuilder MIRBuilder(I);
1939  auto AddsMI = MIRBuilder.buildInstr(
1940  AddsOpc, {I.getOperand(0).getReg()},
1941  {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1942  constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1943 
1944  // Now, put the overflow result in the register given by the first operand
1945  // to the G_UADDO. CSINC increments the result when the predicate is false,
1946  // so to get the increment when it's true, we need to use the inverse. In
1947  // this case, we want to increment when carry is set.
1948  auto CsetMI = MIRBuilder
1949  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1950  {Register(AArch64::WZR), Register(AArch64::WZR)})
1952  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1953  I.eraseFromParent();
1954  return true;
1955  }
1956 
1957  case TargetOpcode::G_PTR_MASK: {
1958  uint64_t Align = I.getOperand(2).getImm();
1959  if (Align >= 64 || Align == 0)
1960  return false;
1961 
1962  uint64_t Mask = ~((1ULL << Align) - 1);
1963  I.setDesc(TII.get(AArch64::ANDXri));
1965 
1966  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1967  }
1968  case TargetOpcode::G_PTRTOINT:
1969  case TargetOpcode::G_TRUNC: {
1970  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1971  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1972 
1973  const Register DstReg = I.getOperand(0).getReg();
1974  const Register SrcReg = I.getOperand(1).getReg();
1975 
1976  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1977  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1978 
1979  if (DstRB.getID() != SrcRB.getID()) {
1980  LLVM_DEBUG(
1981  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1982  return false;
1983  }
1984 
1985  if (DstRB.getID() == AArch64::GPRRegBankID) {
1986  const TargetRegisterClass *DstRC =
1987  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1988  if (!DstRC)
1989  return false;
1990 
1991  const TargetRegisterClass *SrcRC =
1992  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1993  if (!SrcRC)
1994  return false;
1995 
1996  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1997  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1998  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1999  return false;
2000  }
2001 
2002  if (DstRC == SrcRC) {
2003  // Nothing to be done
2004  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2005  SrcTy == LLT::scalar(64)) {
2006  llvm_unreachable("TableGen can import this case");
2007  return false;
2008  } else if (DstRC == &AArch64::GPR32RegClass &&
2009  SrcRC == &AArch64::GPR64RegClass) {
2010  I.getOperand(1).setSubReg(AArch64::sub_32);
2011  } else {
2012  LLVM_DEBUG(
2013  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2014  return false;
2015  }
2016 
2017  I.setDesc(TII.get(TargetOpcode::COPY));
2018  return true;
2019  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2020  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2021  I.setDesc(TII.get(AArch64::XTNv4i16));
2023  return true;
2024  }
2025 
2026  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2027  MachineIRBuilder MIB(I);
2028  MachineInstr *Extract = emitExtractVectorElt(
2029  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2030  if (!Extract)
2031  return false;
2032  I.eraseFromParent();
2033  return true;
2034  }
2035  }
2036 
2037  return false;
2038  }
2039 
2040  case TargetOpcode::G_ANYEXT: {
2041  const Register DstReg = I.getOperand(0).getReg();
2042  const Register SrcReg = I.getOperand(1).getReg();
2043 
2044  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2045  if (RBDst.getID() != AArch64::GPRRegBankID) {
2046  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2047  << ", expected: GPR\n");
2048  return false;
2049  }
2050 
2051  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2052  if (RBSrc.getID() != AArch64::GPRRegBankID) {
2053  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2054  << ", expected: GPR\n");
2055  return false;
2056  }
2057 
2058  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2059 
2060  if (DstSize == 0) {
2061  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2062  return false;
2063  }
2064 
2065  if (DstSize != 64 && DstSize > 32) {
2066  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2067  << ", expected: 32 or 64\n");
2068  return false;
2069  }
2070  // At this point G_ANYEXT is just like a plain COPY, but we need
2071  // to explicitly form the 64-bit value if any.
2072  if (DstSize > 32) {
2073  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2074  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2075  .addDef(ExtSrc)
2076  .addImm(0)
2077  .addUse(SrcReg)
2078  .addImm(AArch64::sub_32);
2079  I.getOperand(1).setReg(ExtSrc);
2080  }
2081  return selectCopy(I, TII, MRI, TRI, RBI);
2082  }
2083 
2084  case TargetOpcode::G_ZEXT:
2085  case TargetOpcode::G_SEXT: {
2086  unsigned Opcode = I.getOpcode();
2087  const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2088  const Register DefReg = I.getOperand(0).getReg();
2089  const Register SrcReg = I.getOperand(1).getReg();
2090  const LLT DstTy = MRI.getType(DefReg);
2091  const LLT SrcTy = MRI.getType(SrcReg);
2092  unsigned DstSize = DstTy.getSizeInBits();
2093  unsigned SrcSize = SrcTy.getSizeInBits();
2094 
2095  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2096  AArch64::GPRRegBankID &&
2097  "Unexpected ext regbank");
2098 
2099  MachineIRBuilder MIB(I);
2100  MachineInstr *ExtI;
2101  if (DstTy.isVector())
2102  return false; // Should be handled by imported patterns.
2103 
2104  // First check if we're extending the result of a load which has a dest type
2105  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2106  // GPR register on AArch64 and all loads which are smaller automatically
2107  // zero-extend the upper bits. E.g.
2108  // %v(s8) = G_LOAD %p, :: (load 1)
2109  // %v2(s32) = G_ZEXT %v(s8)
2110  if (!IsSigned) {
2111  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2112  if (LoadMI &&
2113  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2114  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2115  unsigned BytesLoaded = MemOp->getSize();
2116  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2117  return selectCopy(I, TII, MRI, TRI, RBI);
2118  }
2119  }
2120 
2121  if (DstSize == 64) {
2122  // FIXME: Can we avoid manually doing this?
2123  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2124  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2125  << " operand\n");
2126  return false;
2127  }
2128 
2129  auto SubregToReg =
2130  MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2131  .addImm(0)
2132  .addUse(SrcReg)
2133  .addImm(AArch64::sub_32);
2134 
2135  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2136  {DefReg}, {SubregToReg})
2137  .addImm(0)
2138  .addImm(SrcSize - 1);
2139  } else if (DstSize <= 32) {
2140  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2141  {DefReg}, {SrcReg})
2142  .addImm(0)
2143  .addImm(SrcSize - 1);
2144  } else {
2145  return false;
2146  }
2147 
2149  I.eraseFromParent();
2150  return true;
2151  }
2152 
2153  case TargetOpcode::G_SITOFP:
2154  case TargetOpcode::G_UITOFP:
2155  case TargetOpcode::G_FPTOSI:
2156  case TargetOpcode::G_FPTOUI: {
2157  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2158  SrcTy = MRI.getType(I.getOperand(1).getReg());
2159  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2160  if (NewOpc == Opcode)
2161  return false;
2162 
2163  I.setDesc(TII.get(NewOpc));
2165 
2166  return true;
2167  }
2168 
2169 
2170  case TargetOpcode::G_INTTOPTR:
2171  // The importer is currently unable to import pointer types since they
2172  // didn't exist in SelectionDAG.
2173  return selectCopy(I, TII, MRI, TRI, RBI);
2174 
2175  case TargetOpcode::G_BITCAST:
2176  // Imported SelectionDAG rules can handle every bitcast except those that
2177  // bitcast from a type to the same type. Ideally, these shouldn't occur
2178  // but we might not run an optimizer that deletes them. The other exception
2179  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2180  // of them.
2181  return selectCopy(I, TII, MRI, TRI, RBI);
2182 
2183  case TargetOpcode::G_SELECT: {
2184  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2185  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2186  << ", expected: " << LLT::scalar(1) << '\n');
2187  return false;
2188  }
2189 
2190  const Register CondReg = I.getOperand(1).getReg();
2191  const Register TReg = I.getOperand(2).getReg();
2192  const Register FReg = I.getOperand(3).getReg();
2193 
2194  if (tryOptSelect(I))
2195  return true;
2196 
2197  Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2198  MachineInstr &TstMI =
2199  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2200  .addDef(AArch64::WZR)
2201  .addUse(CondReg)
2203 
2204  MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2205  .addDef(I.getOperand(0).getReg())
2206  .addUse(TReg)
2207  .addUse(FReg)
2209 
2211  constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2212 
2213  I.eraseFromParent();
2214  return true;
2215  }
2216  case TargetOpcode::G_ICMP: {
2217  if (Ty.isVector())
2218  return selectVectorICmp(I, MRI);
2219 
2220  if (Ty != LLT::scalar(32)) {
2221  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2222  << ", expected: " << LLT::scalar(32) << '\n');
2223  return false;
2224  }
2225 
2226  MachineIRBuilder MIRBuilder(I);
2227  if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2228  MIRBuilder))
2229  return false;
2230  emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2231  MIRBuilder);
2232  I.eraseFromParent();
2233  return true;
2234  }
2235 
2236  case TargetOpcode::G_FCMP: {
2237  if (Ty != LLT::scalar(32)) {
2238  LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2239  << ", expected: " << LLT::scalar(32) << '\n');
2240  return false;
2241  }
2242 
2243  unsigned CmpOpc = selectFCMPOpc(I, MRI);
2244  if (!CmpOpc)
2245  return false;
2246 
2247  // FIXME: regbank
2248 
2249  AArch64CC::CondCode CC1, CC2;
2251  (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2252 
2253  // Partially build the compare. Decide if we need to add a use for the
2254  // third operand based off whether or not we're comparing against 0.0.
2255  auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2256  .addUse(I.getOperand(2).getReg());
2257 
2258  // If we don't have an immediate compare, then we need to add a use of the
2259  // register which wasn't used for the immediate.
2260  // Note that the immediate will always be the last operand.
2261  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2262  CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2263 
2264  const Register DefReg = I.getOperand(0).getReg();
2265  Register Def1Reg = DefReg;
2266  if (CC2 != AArch64CC::AL)
2267  Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2268 
2269  MachineInstr &CSetMI =
2270  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2271  .addDef(Def1Reg)
2272  .addUse(AArch64::WZR)
2273  .addUse(AArch64::WZR)
2274  .addImm(getInvertedCondCode(CC1));
2275 
2276  if (CC2 != AArch64CC::AL) {
2277  Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2278  MachineInstr &CSet2MI =
2279  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2280  .addDef(Def2Reg)
2281  .addUse(AArch64::WZR)
2282  .addUse(AArch64::WZR)
2283  .addImm(getInvertedCondCode(CC2));
2284  MachineInstr &OrMI =
2285  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2286  .addDef(DefReg)
2287  .addUse(Def1Reg)
2288  .addUse(Def2Reg);
2290  constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2291  }
2292  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2293  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2294 
2295  I.eraseFromParent();
2296  return true;
2297  }
2298  case TargetOpcode::G_VASTART:
2299  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2300  : selectVaStartAAPCS(I, MF, MRI);
2301  case TargetOpcode::G_INTRINSIC:
2302  return selectIntrinsic(I, MRI);
2303  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2304  return selectIntrinsicWithSideEffects(I, MRI);
2305  case TargetOpcode::G_IMPLICIT_DEF: {
2306  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2307  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2308  const Register DstReg = I.getOperand(0).getReg();
2309  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2310  const TargetRegisterClass *DstRC =
2311  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2312  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2313  return true;
2314  }
2315  case TargetOpcode::G_BLOCK_ADDR: {
2316  if (TM.getCodeModel() == CodeModel::Large) {
2317  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2318  I.eraseFromParent();
2319  return true;
2320  } else {
2321  I.setDesc(TII.get(AArch64::MOVaddrBA));
2322  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2323  I.getOperand(0).getReg())
2324  .addBlockAddress(I.getOperand(1).getBlockAddress(),
2325  /* Offset */ 0, AArch64II::MO_PAGE)
2326  .addBlockAddress(
2327  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2329  I.eraseFromParent();
2330  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2331  }
2332  }
2333  case TargetOpcode::G_INTRINSIC_TRUNC:
2334  return selectIntrinsicTrunc(I, MRI);
2335  case TargetOpcode::G_INTRINSIC_ROUND:
2336  return selectIntrinsicRound(I, MRI);
2337  case TargetOpcode::G_BUILD_VECTOR:
2338  return selectBuildVector(I, MRI);
2339  case TargetOpcode::G_MERGE_VALUES:
2340  return selectMergeValues(I, MRI);
2341  case TargetOpcode::G_UNMERGE_VALUES:
2342  return selectUnmergeValues(I, MRI);
2343  case TargetOpcode::G_SHUFFLE_VECTOR:
2344  return selectShuffleVector(I, MRI);
2345  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2346  return selectExtractElt(I, MRI);
2347  case TargetOpcode::G_INSERT_VECTOR_ELT:
2348  return selectInsertElt(I, MRI);
2349  case TargetOpcode::G_CONCAT_VECTORS:
2350  return selectConcatVectors(I, MRI);
2351  case TargetOpcode::G_JUMP_TABLE:
2352  return selectJumpTable(I, MRI);
2353  }
2354 
2355  return false;
2356 }
2357 
2358 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2359  MachineRegisterInfo &MRI) const {
2360  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2361  Register JTAddr = I.getOperand(0).getReg();
2362  unsigned JTI = I.getOperand(1).getIndex();
2363  Register Index = I.getOperand(2).getReg();
2364  MachineIRBuilder MIB(I);
2365 
2366  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2367  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2368  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2369  {JTAddr, Index})
2370  .addJumpTableIndex(JTI);
2371 
2372  // Build the indirect branch.
2373  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2374  I.eraseFromParent();
2375  return true;
2376 }
2377 
2378 bool AArch64InstructionSelector::selectJumpTable(
2379  MachineInstr &I, MachineRegisterInfo &MRI) const {
2380  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2381  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2382 
2383  Register DstReg = I.getOperand(0).getReg();
2384  unsigned JTI = I.getOperand(1).getIndex();
2385  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2386  MachineIRBuilder MIB(I);
2387  auto MovMI =
2388  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2389  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2391  I.eraseFromParent();
2392  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2393 }
2394 
2395 bool AArch64InstructionSelector::selectTLSGlobalValue(
2396  MachineInstr &I, MachineRegisterInfo &MRI) const {
2397  if (!STI.isTargetMachO())
2398  return false;
2399  MachineFunction &MF = *I.getParent()->getParent();
2400  MF.getFrameInfo().setAdjustsStack(true);
2401 
2402  const GlobalValue &GV = *I.getOperand(1).getGlobal();
2403  MachineIRBuilder MIB(I);
2404 
2405  MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2406  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2407 
2408  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2409  {Register(AArch64::X0)})
2410  .addImm(0);
2411 
2412  // TLS calls preserve all registers except those that absolutely must be
2413  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2414  // silly).
2415  MIB.buildInstr(AArch64::BLR, {}, {Load})
2416  .addDef(AArch64::X0, RegState::Implicit)
2417  .addRegMask(TRI.getTLSCallPreservedMask());
2418 
2419  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2420  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2421  MRI);
2422  I.eraseFromParent();
2423  return true;
2424 }
2425 
2426 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2427  MachineInstr &I, MachineRegisterInfo &MRI) const {
2428  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2429 
2430  // Select the correct opcode.
2431  unsigned Opc = 0;
2432  if (!SrcTy.isVector()) {
2433  switch (SrcTy.getSizeInBits()) {
2434  default:
2435  case 16:
2436  Opc = AArch64::FRINTZHr;
2437  break;
2438  case 32:
2439  Opc = AArch64::FRINTZSr;
2440  break;
2441  case 64:
2442  Opc = AArch64::FRINTZDr;
2443  break;
2444  }
2445  } else {
2446  unsigned NumElts = SrcTy.getNumElements();
2447  switch (SrcTy.getElementType().getSizeInBits()) {
2448  default:
2449  break;
2450  case 16:
2451  if (NumElts == 4)
2452  Opc = AArch64::FRINTZv4f16;
2453  else if (NumElts == 8)
2454  Opc = AArch64::FRINTZv8f16;
2455  break;
2456  case 32:
2457  if (NumElts == 2)
2458  Opc = AArch64::FRINTZv2f32;
2459  else if (NumElts == 4)
2460  Opc = AArch64::FRINTZv4f32;
2461  break;
2462  case 64:
2463  if (NumElts == 2)
2464  Opc = AArch64::FRINTZv2f64;
2465  break;
2466  }
2467  }
2468 
2469  if (!Opc) {
2470  // Didn't get an opcode above, bail.
2471  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2472  return false;
2473  }
2474 
2475  // Legalization would have set us up perfectly for this; we just need to
2476  // set the opcode and move on.
2477  I.setDesc(TII.get(Opc));
2478  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2479 }
2480 
2481 bool AArch64InstructionSelector::selectIntrinsicRound(
2482  MachineInstr &I, MachineRegisterInfo &MRI) const {
2483  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2484 
2485  // Select the correct opcode.
2486  unsigned Opc = 0;
2487  if (!SrcTy.isVector()) {
2488  switch (SrcTy.getSizeInBits()) {
2489  default:
2490  case 16:
2491  Opc = AArch64::FRINTAHr;
2492  break;
2493  case 32:
2494  Opc = AArch64::FRINTASr;
2495  break;
2496  case 64:
2497  Opc = AArch64::FRINTADr;
2498  break;
2499  }
2500  } else {
2501  unsigned NumElts = SrcTy.getNumElements();
2502  switch (SrcTy.getElementType().getSizeInBits()) {
2503  default:
2504  break;
2505  case 16:
2506  if (NumElts == 4)
2507  Opc = AArch64::FRINTAv4f16;
2508  else if (NumElts == 8)
2509  Opc = AArch64::FRINTAv8f16;
2510  break;
2511  case 32:
2512  if (NumElts == 2)
2513  Opc = AArch64::FRINTAv2f32;
2514  else if (NumElts == 4)
2515  Opc = AArch64::FRINTAv4f32;
2516  break;
2517  case 64:
2518  if (NumElts == 2)
2519  Opc = AArch64::FRINTAv2f64;
2520  break;
2521  }
2522  }
2523 
2524  if (!Opc) {
2525  // Didn't get an opcode above, bail.
2526  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2527  return false;
2528  }
2529 
2530  // Legalization would have set us up perfectly for this; we just need to
2531  // set the opcode and move on.
2532  I.setDesc(TII.get(Opc));
2533  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2534 }
2535 
2536 bool AArch64InstructionSelector::selectVectorICmp(
2537  MachineInstr &I, MachineRegisterInfo &MRI) const {
2538  Register DstReg = I.getOperand(0).getReg();
2539  LLT DstTy = MRI.getType(DstReg);
2540  Register SrcReg = I.getOperand(2).getReg();
2541  Register Src2Reg = I.getOperand(3).getReg();
2542  LLT SrcTy = MRI.getType(SrcReg);
2543 
2544  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2545  unsigned NumElts = DstTy.getNumElements();
2546 
2547  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2548  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2549  // Third index is cc opcode:
2550  // 0 == eq
2551  // 1 == ugt
2552  // 2 == uge
2553  // 3 == ult
2554  // 4 == ule
2555  // 5 == sgt
2556  // 6 == sge
2557  // 7 == slt
2558  // 8 == sle
2559  // ne is done by negating 'eq' result.
2560 
2561  // This table below assumes that for some comparisons the operands will be
2562  // commuted.
2563  // ult op == commute + ugt op
2564  // ule op == commute + uge op
2565  // slt op == commute + sgt op
2566  // sle op == commute + sge op
2567  unsigned PredIdx = 0;
2568  bool SwapOperands = false;
2570  switch (Pred) {
2571  case CmpInst::ICMP_NE:
2572  case CmpInst::ICMP_EQ:
2573  PredIdx = 0;
2574  break;
2575  case CmpInst::ICMP_UGT:
2576  PredIdx = 1;
2577  break;
2578  case CmpInst::ICMP_UGE:
2579  PredIdx = 2;
2580  break;
2581  case CmpInst::ICMP_ULT:
2582  PredIdx = 3;
2583  SwapOperands = true;
2584  break;
2585  case CmpInst::ICMP_ULE:
2586  PredIdx = 4;
2587  SwapOperands = true;
2588  break;
2589  case CmpInst::ICMP_SGT:
2590  PredIdx = 5;
2591  break;
2592  case CmpInst::ICMP_SGE:
2593  PredIdx = 6;
2594  break;
2595  case CmpInst::ICMP_SLT:
2596  PredIdx = 7;
2597  SwapOperands = true;
2598  break;
2599  case CmpInst::ICMP_SLE:
2600  PredIdx = 8;
2601  SwapOperands = true;
2602  break;
2603  default:
2604  llvm_unreachable("Unhandled icmp predicate");
2605  return false;
2606  }
2607 
2608  // This table obviously should be tablegen'd when we have our GISel native
2609  // tablegen selector.
2610 
2611  static const unsigned OpcTable[4][4][9] = {
2612  {
2613  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2614  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2615  0 /* invalid */},
2616  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2617  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2618  0 /* invalid */},
2619  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2620  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2621  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2622  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2623  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2624  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2625  },
2626  {
2627  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2628  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2629  0 /* invalid */},
2630  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2631  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2632  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2633  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2634  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2635  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2636  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2637  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2638  0 /* invalid */}
2639  },
2640  {
2641  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2642  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2643  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2644  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2645  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2646  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2647  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2648  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2649  0 /* invalid */},
2650  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2651  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2652  0 /* invalid */}
2653  },
2654  {
2655  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2656  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2657  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2658  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2659  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2660  0 /* invalid */},
2661  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2662  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2663  0 /* invalid */},
2664  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2665  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2666  0 /* invalid */}
2667  },
2668  };
2669  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2670  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2671  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2672  if (!Opc) {
2673  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2674  return false;
2675  }
2676 
2677  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2678  const TargetRegisterClass *SrcRC =
2679  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2680  if (!SrcRC) {
2681  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2682  return false;
2683  }
2684 
2685  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2686  if (SrcTy.getSizeInBits() == 128)
2687  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2688 
2689  if (SwapOperands)
2690  std::swap(SrcReg, Src2Reg);
2691 
2692  MachineIRBuilder MIB(I);
2693  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2695 
2696  // Invert if we had a 'ne' cc.
2697  if (NotOpc) {
2698  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2700  } else {
2701  MIB.buildCopy(DstReg, Cmp.getReg(0));
2702  }
2703  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2704  I.eraseFromParent();
2705  return true;
2706 }
2707 
2708 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2709  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2710  MachineIRBuilder &MIRBuilder) const {
2711  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2712 
2713  auto BuildFn = [&](unsigned SubregIndex) {
2714  auto Ins =
2715  MIRBuilder
2716  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2717  .addImm(SubregIndex);
2720  return &*Ins;
2721  };
2722 
2723  switch (EltSize) {
2724  case 16:
2725  return BuildFn(AArch64::hsub);
2726  case 32:
2727  return BuildFn(AArch64::ssub);
2728  case 64:
2729  return BuildFn(AArch64::dsub);
2730  default:
2731  return nullptr;
2732  }
2733 }
2734 
2736  MachineInstr &I, MachineRegisterInfo &MRI) const {
2737  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2738  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2739  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2740  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2741  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2742 
2743  if (I.getNumOperands() != 3)
2744  return false;
2745 
2746  // Merging 2 s64s into an s128.
2747  if (DstTy == LLT::scalar(128)) {
2748  if (SrcTy.getSizeInBits() != 64)
2749  return false;
2750  MachineIRBuilder MIB(I);
2751  Register DstReg = I.getOperand(0).getReg();
2752  Register Src1Reg = I.getOperand(1).getReg();
2753  Register Src2Reg = I.getOperand(2).getReg();
2754  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2755  MachineInstr *InsMI =
2756  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2757  if (!InsMI)
2758  return false;
2759  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2760  Src2Reg, /* LaneIdx */ 1, RB, MIB);
2761  if (!Ins2MI)
2762  return false;
2763  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2764  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2765  I.eraseFromParent();
2766  return true;
2767  }
2768 
2769  if (RB.getID() != AArch64::GPRRegBankID)
2770  return false;
2771 
2772  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2773  return false;
2774 
2775  auto *DstRC = &AArch64::GPR64RegClass;
2776  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2777  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2778  TII.get(TargetOpcode::SUBREG_TO_REG))
2779  .addDef(SubToRegDef)
2780  .addImm(0)
2781  .addUse(I.getOperand(1).getReg())
2782  .addImm(AArch64::sub_32);
2783  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2784  // Need to anyext the second scalar before we can use bfm
2785  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2786  TII.get(TargetOpcode::SUBREG_TO_REG))
2787  .addDef(SubToRegDef2)
2788  .addImm(0)
2789  .addUse(I.getOperand(2).getReg())
2790  .addImm(AArch64::sub_32);
2791  MachineInstr &BFM =
2792  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2793  .addDef(I.getOperand(0).getReg())
2794  .addUse(SubToRegDef)
2795  .addUse(SubToRegDef2)
2796  .addImm(32)
2797  .addImm(31);
2798  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2799  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2801  I.eraseFromParent();
2802  return true;
2803 }
2804 
2805 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2806  const unsigned EltSize) {
2807  // Choose a lane copy opcode and subregister based off of the size of the
2808  // vector's elements.
2809  switch (EltSize) {
2810  case 16:
2811  CopyOpc = AArch64::CPYi16;
2812  ExtractSubReg = AArch64::hsub;
2813  break;
2814  case 32:
2815  CopyOpc = AArch64::CPYi32;
2816  ExtractSubReg = AArch64::ssub;
2817  break;
2818  case 64:
2819  CopyOpc = AArch64::CPYi64;
2820  ExtractSubReg = AArch64::dsub;
2821  break;
2822  default:
2823  // Unknown size, bail out.
2824  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2825  return false;
2826  }
2827  return true;
2828 }
2829 
2830 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2831  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2832  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2833  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2834  unsigned CopyOpc = 0;
2835  unsigned ExtractSubReg = 0;
2836  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2837  LLVM_DEBUG(
2838  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2839  return nullptr;
2840  }
2841 
2842  const TargetRegisterClass *DstRC =
2843  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2844  if (!DstRC) {
2845  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2846  return nullptr;
2847  }
2848 
2849  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2850  const LLT &VecTy = MRI.getType(VecReg);
2851  const TargetRegisterClass *VecRC =
2852  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2853  if (!VecRC) {
2854  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2855  return nullptr;
2856  }
2857 
2858  // The register that we're going to copy into.
2859  Register InsertReg = VecReg;
2860  if (!DstReg)
2861  DstReg = MRI.createVirtualRegister(DstRC);
2862  // If the lane index is 0, we just use a subregister COPY.
2863  if (LaneIdx == 0) {
2864  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2865  .addReg(VecReg, 0, ExtractSubReg);
2866  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2867  return &*Copy;
2868  }
2869 
2870  // Lane copies require 128-bit wide registers. If we're dealing with an
2871  // unpacked vector, then we need to move up to that width. Insert an implicit
2872  // def and a subregister insert to get us there.
2873  if (VecTy.getSizeInBits() != 128) {
2874  MachineInstr *ScalarToVector = emitScalarToVector(
2875  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2876  if (!ScalarToVector)
2877  return nullptr;
2878  InsertReg = ScalarToVector->getOperand(0).getReg();
2879  }
2880 
2881  MachineInstr *LaneCopyMI =
2882  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2883  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2884 
2885  // Make sure that we actually constrain the initial copy.
2886  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2887  return LaneCopyMI;
2888 }
2889 
2890 bool AArch64InstructionSelector::selectExtractElt(
2891  MachineInstr &I, MachineRegisterInfo &MRI) const {
2892  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2893  "unexpected opcode!");
2894  Register DstReg = I.getOperand(0).getReg();
2895  const LLT NarrowTy = MRI.getType(DstReg);
2896  const Register SrcReg = I.getOperand(1).getReg();
2897  const LLT WideTy = MRI.getType(SrcReg);
2898  (void)WideTy;
2899  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2900  "source register size too small!");
2901  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2902 
2903  // Need the lane index to determine the correct copy opcode.
2904  MachineOperand &LaneIdxOp = I.getOperand(2);
2905  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2906 
2907  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2908  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2909  return false;
2910  }
2911 
2912  // Find the index to extract from.
2913  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2914  if (!VRegAndVal)
2915  return false;
2916  unsigned LaneIdx = VRegAndVal->Value;
2917 
2918  MachineIRBuilder MIRBuilder(I);
2919 
2920  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2921  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2922  LaneIdx, MIRBuilder);
2923  if (!Extract)
2924  return false;
2925 
2926  I.eraseFromParent();
2927  return true;
2928 }
2929 
2930 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2931  MachineInstr &I, MachineRegisterInfo &MRI) const {
2932  unsigned NumElts = I.getNumOperands() - 1;
2933  Register SrcReg = I.getOperand(NumElts).getReg();
2934  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2935  const LLT SrcTy = MRI.getType(SrcReg);
2936 
2937  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2938  if (SrcTy.getSizeInBits() > 128) {
2939  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2940  return false;
2941  }
2942 
2943  MachineIRBuilder MIB(I);
2944 
2945  // We implement a split vector operation by treating the sub-vectors as
2946  // scalars and extracting them.
2947  const RegisterBank &DstRB =
2948  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2949  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2950  Register Dst = I.getOperand(OpIdx).getReg();
2951  MachineInstr *Extract =
2952  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2953  if (!Extract)
2954  return false;
2955  }
2956  I.eraseFromParent();
2957  return true;
2958 }
2959 
2961  MachineInstr &I, MachineRegisterInfo &MRI) const {
2962  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2963  "unexpected opcode");
2964 
2965  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2966  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2967  AArch64::FPRRegBankID ||
2968  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2969  AArch64::FPRRegBankID) {
2970  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2971  "currently unsupported.\n");
2972  return false;
2973  }
2974 
2975  // The last operand is the vector source register, and every other operand is
2976  // a register to unpack into.
2977  unsigned NumElts = I.getNumOperands() - 1;
2978  Register SrcReg = I.getOperand(NumElts).getReg();
2979  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2980  const LLT WideTy = MRI.getType(SrcReg);
2981  (void)WideTy;
2982  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
2983  "can only unmerge from vector or s128 types!");
2984  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2985  "source register size too small!");
2986 
2987  if (!NarrowTy.isScalar())
2988  return selectSplitVectorUnmerge(I, MRI);
2989 
2990  MachineIRBuilder MIB(I);
2991 
2992  // Choose a lane copy opcode and subregister based off of the size of the
2993  // vector's elements.
2994  unsigned CopyOpc = 0;
2995  unsigned ExtractSubReg = 0;
2996  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2997  return false;
2998 
2999  // Set up for the lane copies.
3000  MachineBasicBlock &MBB = *I.getParent();
3001 
3002  // Stores the registers we'll be copying from.
3003  SmallVector<Register, 4> InsertRegs;
3004 
3005  // We'll use the first register twice, so we only need NumElts-1 registers.
3006  unsigned NumInsertRegs = NumElts - 1;
3007 
3008  // If our elements fit into exactly 128 bits, then we can copy from the source
3009  // directly. Otherwise, we need to do a bit of setup with some subregister
3010  // inserts.
3011  if (NarrowTy.getSizeInBits() * NumElts == 128) {
3012  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3013  } else {
3014  // No. We have to perform subregister inserts. For each insert, create an
3015  // implicit def and a subregister insert, and save the register we create.
3016  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3017  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3018  MachineInstr &ImpDefMI =
3019  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3020  ImpDefReg);
3021 
3022  // Now, create the subregister insert from SrcReg.
3023  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3024  MachineInstr &InsMI =
3025  *BuildMI(MBB, I, I.getDebugLoc(),
3026  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3027  .addUse(ImpDefReg)
3028  .addUse(SrcReg)
3029  .addImm(AArch64::dsub);
3030 
3031  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3033 
3034  // Save the register so that we can copy from it after.
3035  InsertRegs.push_back(InsertReg);
3036  }
3037  }
3038 
3039  // Now that we've created any necessary subregister inserts, we can
3040  // create the copies.
3041  //
3042  // Perform the first copy separately as a subregister copy.
3043  Register CopyTo = I.getOperand(0).getReg();
3044  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3045  .addReg(InsertRegs[0], 0, ExtractSubReg);
3046  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3047 
3048  // Now, perform the remaining copies as vector lane copies.
3049  unsigned LaneIdx = 1;
3050  for (Register InsReg : InsertRegs) {
3051  Register CopyTo = I.getOperand(LaneIdx).getReg();
3052  MachineInstr &CopyInst =
3053  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3054  .addUse(InsReg)
3055  .addImm(LaneIdx);
3056  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3057  ++LaneIdx;
3058  }
3059 
3060  // Separately constrain the first copy's destination. Because of the
3061  // limitation in constrainOperandRegClass, we can't guarantee that this will
3062  // actually be constrained. So, do it ourselves using the second operand.
3063  const TargetRegisterClass *RC =
3064  MRI.getRegClassOrNull(I.getOperand(1).getReg());
3065  if (!RC) {
3066  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3067  return false;
3068  }
3069 
3070  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3071  I.eraseFromParent();
3072  return true;
3073 }
3074 
3075 bool AArch64InstructionSelector::selectConcatVectors(
3076  MachineInstr &I, MachineRegisterInfo &MRI) const {
3077  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3078  "Unexpected opcode");
3079  Register Dst = I.getOperand(0).getReg();
3080  Register Op1 = I.getOperand(1).getReg();
3081  Register Op2 = I.getOperand(2).getReg();
3082  MachineIRBuilder MIRBuilder(I);
3083  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3084  if (!ConcatMI)
3085  return false;
3086  I.eraseFromParent();
3087  return true;
3088 }
3089 
3090 unsigned
3091 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3092  MachineFunction &MF) const {
3093  Type *CPTy = CPVal->getType();
3094  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3095  if (Align == 0)
3096  Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3097 
3099  return MCP->getConstantPoolIndex(CPVal, Align);
3100 }
3101 
3102 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3103  Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3104  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3105 
3106  auto Adrp =
3107  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3108  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3109 
3110  MachineInstr *LoadMI = nullptr;
3111  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3112  case 16:
3113  LoadMI =
3114  &*MIRBuilder
3115  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3116  .addConstantPoolIndex(CPIdx, 0,
3118  break;
3119  case 8:
3120  LoadMI = &*MIRBuilder
3121  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3122  .addConstantPoolIndex(
3124  break;
3125  default:
3126  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3127  << *CPVal->getType());
3128  return nullptr;
3129  }
3131  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3132  return LoadMI;
3133 }
3134 
3135 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3136 /// size and RB.
3137 static std::pair<unsigned, unsigned>
3138 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3139  unsigned Opc, SubregIdx;
3140  if (RB.getID() == AArch64::GPRRegBankID) {
3141  if (EltSize == 32) {
3142  Opc = AArch64::INSvi32gpr;
3143  SubregIdx = AArch64::ssub;
3144  } else if (EltSize == 64) {
3145  Opc = AArch64::INSvi64gpr;
3146  SubregIdx = AArch64::dsub;
3147  } else {
3148  llvm_unreachable("invalid elt size!");
3149  }
3150  } else {
3151  if (EltSize == 8) {
3152  Opc = AArch64::INSvi8lane;
3153  SubregIdx = AArch64::bsub;
3154  } else if (EltSize == 16) {
3155  Opc = AArch64::INSvi16lane;
3156  SubregIdx = AArch64::hsub;
3157  } else if (EltSize == 32) {
3158  Opc = AArch64::INSvi32lane;
3159  SubregIdx = AArch64::ssub;
3160  } else if (EltSize == 64) {
3161  Opc = AArch64::INSvi64lane;
3162  SubregIdx = AArch64::dsub;
3163  } else {
3164  llvm_unreachable("invalid elt size!");
3165  }
3166  }
3167  return std::make_pair(Opc, SubregIdx);
3168 }
3169 
3170 MachineInstr *
3171 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3172  MachineOperand &RHS,
3173  MachineIRBuilder &MIRBuilder) const {
3174  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3175  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3176  static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3177  {AArch64::ADDWrr, AArch64::ADDWri}};
3178  bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3179  auto ImmFns = selectArithImmed(RHS);
3180  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3181  auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3182 
3183  // If we matched a valid constant immediate, add those operands.
3184  if (ImmFns) {
3185  for (auto &RenderFn : *ImmFns)
3186  RenderFn(AddMI);
3187  } else {
3188  AddMI.addUse(RHS.getReg());
3189  }
3190 
3191  constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3192  return &*AddMI;
3193 }
3194 
3195 MachineInstr *
3196 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3197  MachineIRBuilder &MIRBuilder) const {
3198  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3199  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3200  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3201  {AArch64::ADDSWrr, AArch64::ADDSWri}};
3202  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3203  auto ImmFns = selectArithImmed(RHS);
3204  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3205  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3206 
3207  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3208 
3209  // If we matched a valid constant immediate, add those operands.
3210  if (ImmFns) {
3211  for (auto &RenderFn : *ImmFns)
3212  RenderFn(CmpMI);
3213  } else {
3214  CmpMI.addUse(RHS.getReg());
3215  }
3216 
3217  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3218  return &*CmpMI;
3219 }
3220 
3221 MachineInstr *
3222 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3223  MachineIRBuilder &MIRBuilder) const {
3224  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3225  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3226  bool Is32Bit = (RegSize == 32);
3227  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3228  {AArch64::ANDSWrr, AArch64::ANDSWri}};
3229  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3230 
3231  // We might be able to fold in an immediate into the TST. We need to make sure
3232  // it's a logical immediate though, since ANDS requires that.
3233  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3234  bool IsImmForm = ValAndVReg.hasValue() &&
3235  AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3236  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3237  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3238 
3239  if (IsImmForm)
3240  TstMI.addImm(
3241  AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3242  else
3243  TstMI.addUse(RHS);
3244 
3245  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3246  return &*TstMI;
3247 }
3248 
3249 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3251  MachineIRBuilder &MIRBuilder) const {
3252  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3253  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3254 
3255  // Fold the compare if possible.
3256  MachineInstr *FoldCmp =
3257  tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3258  if (FoldCmp)
3259  return FoldCmp;
3260 
3261  // Can't fold into a CMN. Just emit a normal compare.
3262  unsigned CmpOpc = 0;
3263  Register ZReg;
3264 
3265  LLT CmpTy = MRI.getType(LHS.getReg());
3266  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3267  "Expected scalar or pointer");
3268  if (CmpTy == LLT::scalar(32)) {
3269  CmpOpc = AArch64::SUBSWrr;
3270  ZReg = AArch64::WZR;
3271  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3272  CmpOpc = AArch64::SUBSXrr;
3273  ZReg = AArch64::XZR;
3274  } else {
3275  return nullptr;
3276  }
3277 
3278  // Try to match immediate forms.
3279  auto ImmFns = selectArithImmed(RHS);
3280  if (ImmFns)
3281  CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3282 
3283  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3284  // If we matched a valid constant immediate, add those operands.
3285  if (ImmFns) {
3286  for (auto &RenderFn : *ImmFns)
3287  RenderFn(CmpMI);
3288  } else {
3289  CmpMI.addUse(RHS.getReg());
3290  }
3291 
3292  // Make sure that we can constrain the compare that we emitted.
3293  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3294  return &*CmpMI;
3295 }
3296 
3297 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3298  Optional<Register> Dst, Register Op1, Register Op2,
3299  MachineIRBuilder &MIRBuilder) const {
3300  // We implement a vector concat by:
3301  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3302  // 2. Insert the upper vector into the destination's upper element
3303  // TODO: some of this code is common with G_BUILD_VECTOR handling.
3304  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3305 
3306  const LLT Op1Ty = MRI.getType(Op1);
3307  const LLT Op2Ty = MRI.getType(Op2);
3308 
3309  if (Op1Ty != Op2Ty) {
3310  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3311  return nullptr;
3312  }
3313  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3314 
3315  if (Op1Ty.getSizeInBits() >= 128) {
3316  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3317  return nullptr;
3318  }
3319 
3320  // At the moment we just support 64 bit vector concats.
3321  if (Op1Ty.getSizeInBits() != 64) {
3322  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3323  return nullptr;
3324  }
3325 
3326  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3327  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3328  const TargetRegisterClass *DstRC =
3329  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3330 
3331  MachineInstr *WidenedOp1 =
3332  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3333  MachineInstr *WidenedOp2 =
3334  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3335  if (!WidenedOp1 || !WidenedOp2) {
3336  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3337  return nullptr;
3338  }
3339 
3340  // Now do the insert of the upper element.
3341  unsigned InsertOpc, InsSubRegIdx;
3342  std::tie(InsertOpc, InsSubRegIdx) =
3343  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3344 
3345  if (!Dst)
3346  Dst = MRI.createVirtualRegister(DstRC);
3347  auto InsElt =
3348  MIRBuilder
3349  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3350  .addImm(1) /* Lane index */
3351  .addUse(WidenedOp2->getOperand(0).getReg())
3352  .addImm(0);
3353  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3354  return &*InsElt;
3355 }
3356 
3357 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3358  MachineInstr &I, MachineRegisterInfo &MRI) const {
3359  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3360  "Expected a G_FCONSTANT!");
3361  MachineOperand &ImmOp = I.getOperand(1);
3362  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3363 
3364  // Only handle 32 and 64 bit defs for now.
3365  if (DefSize != 32 && DefSize != 64)
3366  return nullptr;
3367 
3368  // Don't handle null values using FMOV.
3369  if (ImmOp.getFPImm()->isNullValue())
3370  return nullptr;
3371 
3372  // Get the immediate representation for the FMOV.
3373  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3374  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3375  : AArch64_AM::getFP64Imm(ImmValAPF);
3376 
3377  // If this is -1, it means the immediate can't be represented as the requested
3378  // floating point value. Bail.
3379  if (Imm == -1)
3380  return nullptr;
3381 
3382  // Update MI to represent the new FMOV instruction, constrain it, and return.
3383  ImmOp.ChangeToImmediate(Imm);
3384  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3385  I.setDesc(TII.get(MovOpc));
3387  return &I;
3388 }
3389 
3390 MachineInstr *
3391 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3392  MachineIRBuilder &MIRBuilder) const {
3393  // CSINC increments the result when the predicate is false. Invert it.
3395  CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3396  auto I =
3397  MIRBuilder
3398  .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3399  .addImm(InvCC);
3401  return &*I;
3402 }
3403 
3404 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3405  MachineIRBuilder MIB(I);
3406  MachineRegisterInfo &MRI = *MIB.getMRI();
3408 
3409  // We want to recognize this pattern:
3410  //
3411  // $z = G_FCMP pred, $x, $y
3412  // ...
3413  // $w = G_SELECT $z, $a, $b
3414  //
3415  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3416  // some copies/truncs in between.)
3417  //
3418  // If we see this, then we can emit something like this:
3419  //
3420  // fcmp $x, $y
3421  // fcsel $w, $a, $b, pred
3422  //
3423  // Rather than emitting both of the rather long sequences in the standard
3424  // G_FCMP/G_SELECT select methods.
3425 
3426  // First, check if the condition is defined by a compare.
3427  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3428  while (CondDef) {
3429  // We can only fold if all of the defs have one use.
3430  if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3431  return false;
3432 
3433  // We can skip over G_TRUNC since the condition is 1-bit.
3434  // Truncating/extending can have no impact on the value.
3435  unsigned Opc = CondDef->getOpcode();
3436  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3437  break;
3438 
3439  // Can't see past copies from physregs.
3440  if (Opc == TargetOpcode::COPY &&
3441  Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3442  return false;
3443 
3444  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3445  }
3446 
3447  // Is the condition defined by a compare?
3448  if (!CondDef)
3449  return false;
3450 
3451  unsigned CondOpc = CondDef->getOpcode();
3452  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3453  return false;
3454 
3456  if (CondOpc == TargetOpcode::G_ICMP) {
3457  CondCode = changeICMPPredToAArch64CC(
3458  (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3459  if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3460  CondDef->getOperand(1), MIB)) {
3461  LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3462  return false;
3463  }
3464  } else {
3465  // Get the condition code for the select.
3466  AArch64CC::CondCode CondCode2;
3469  CondCode2);
3470 
3471  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3472  // instructions to emit the comparison.
3473  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3474  // unnecessary.
3475  if (CondCode2 != AArch64CC::AL)
3476  return false;
3477 
3478  // Make sure we'll be able to select the compare.
3479  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3480  if (!CmpOpc)
3481  return false;
3482 
3483  // Emit a new compare.
3484  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3485  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3486  Cmp.addUse(CondDef->getOperand(3).getReg());
3487  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3488  }
3489 
3490  // Emit the select.
3491  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3492  auto CSel =
3493  MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3494  {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3495  .addImm(CondCode);
3496  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3497  I.eraseFromParent();
3498  return true;
3499 }
3500 
3501 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3502  MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3503  MachineIRBuilder &MIRBuilder) const {
3504  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3505  "Unexpected MachineOperand");
3506  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3507  // We want to find this sort of thing:
3508  // x = G_SUB 0, y
3509  // G_ICMP z, x
3510  //
3511  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3512  // e.g:
3513  //
3514  // cmn z, y
3515 
3516  // Helper lambda to detect the subtract followed by the compare.
3517  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3518  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3519  if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3520  return false;
3521 
3522  // Need to make sure NZCV is the same at the end of the transformation.
3523  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3524  return false;
3525 
3526  // We want to match against SUBs.
3527  if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3528  return false;
3529 
3530  // Make sure that we're getting
3531  // x = G_SUB 0, y
3532  auto ValAndVReg =
3534  if (!ValAndVReg || ValAndVReg->Value != 0)
3535  return false;
3536 
3537  // This can safely be represented as a CMN.
3538  return true;
3539  };
3540 
3541  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3542  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3543  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3546 
3547  // Given this:
3548  //
3549  // x = G_SUB 0, y
3550  // G_ICMP x, z
3551  //
3552  // Produce this:
3553  //
3554  // cmn y, z
3555  if (IsCMN(LHSDef, CC))
3556  return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3557 
3558  // Same idea here, but with the RHS of the compare instead:
3559  //
3560  // Given this:
3561  //
3562  // x = G_SUB 0, y
3563  // G_ICMP z, x
3564  //
3565  // Produce this:
3566  //
3567  // cmn z, y
3568  if (IsCMN(RHSDef, CC))
3569  return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3570 
3571  // Given this:
3572  //
3573  // z = G_AND x, y
3574  // G_ICMP z, 0
3575  //
3576  // Produce this if the compare is signed:
3577  //
3578  // tst x, y
3579  if (!isUnsignedICMPPred(P) && LHSDef &&
3580  LHSDef->getOpcode() == TargetOpcode::G_AND) {
3581  // Make sure that the RHS is 0.
3582  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3583  if (!ValAndVReg || ValAndVReg->Value != 0)
3584  return nullptr;
3585 
3586  return emitTST(LHSDef->getOperand(1).getReg(),
3587  LHSDef->getOperand(2).getReg(), MIRBuilder);
3588  }
3589 
3590  return nullptr;
3591 }
3592 
3593 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3594  // Try to match a vector splat operation into a dup instruction.
3595  // We're looking for this pattern:
3596  // %scalar:gpr(s64) = COPY $x0
3597  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3598  // %cst0:gpr(s32) = G_CONSTANT i32 0
3599  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3600  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3601  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3602  // %zerovec(<2 x s32>)
3603  //
3604  // ...into:
3605  // %splat = DUP %scalar
3606  // We use the regbank of the scalar to determine which kind of dup to use.
3607  MachineIRBuilder MIB(I);
3608  MachineRegisterInfo &MRI = *MIB.getMRI();
3610  using namespace TargetOpcode;
3611  using namespace MIPatternMatch;
3612 
3613  // Begin matching the insert.
3614  auto *InsMI =
3615  getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3616  if (!InsMI)
3617  return false;
3618  // Match the undef vector operand.
3619  auto *UndefMI =
3620  getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3621  if (!UndefMI)
3622  return false;
3623  // Match the scalar being splatted.
3624  Register ScalarReg = InsMI->getOperand(2).getReg();
3625  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3626  // Match the index constant 0.
3627  int64_t Index = 0;
3628  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3629  return false;
3630 
3631  // The shuffle's second operand doesn't matter if the mask is all zero.
3632  const Constant *Mask = I.getOperand(3).getShuffleMask();
3633  if (!isa<ConstantAggregateZero>(Mask))
3634  return false;
3635 
3636  // We're done, now find out what kind of splat we need.
3637  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3638  LLT EltTy = VecTy.getElementType();
3639  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3640  LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3641  return false;
3642  }
3643  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3644  static const unsigned OpcTable[2][2] = {
3645  {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3646  {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3647  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3648 
3649  // For FP splats, we need to widen the scalar reg via undef too.
3650  if (IsFP) {
3651  MachineInstr *Widen = emitScalarToVector(
3652  EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3653  if (!Widen)
3654  return false;
3655  ScalarReg = Widen->getOperand(0).getReg();
3656  }
3657  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3658  if (IsFP)
3659  Dup.addImm(0);
3660  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3661  I.eraseFromParent();
3662  return true;
3663 }
3664 
3665 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3666  if (TM.getOptLevel() == CodeGenOpt::None)
3667  return false;
3668  if (tryOptVectorDup(I))
3669  return true;
3670  return false;
3671 }
3672 
3673 bool AArch64InstructionSelector::selectShuffleVector(
3674  MachineInstr &I, MachineRegisterInfo &MRI) const {
3675  if (tryOptVectorShuffle(I))
3676  return true;
3677  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3678  Register Src1Reg = I.getOperand(1).getReg();
3679  const LLT Src1Ty = MRI.getType(Src1Reg);
3680  Register Src2Reg = I.getOperand(2).getReg();
3681  const LLT Src2Ty = MRI.getType(Src2Reg);
3682  const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
3683 
3684  MachineBasicBlock &MBB = *I.getParent();
3685  MachineFunction &MF = *MBB.getParent();
3686  LLVMContext &Ctx = MF.getFunction().getContext();
3687 
3689  ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
3690 
3691  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3692  // it's originated from a <1 x T> type. Those should have been lowered into
3693  // G_BUILD_VECTOR earlier.
3694  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3695  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3696  return false;
3697  }
3698 
3699  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3700 
3702  for (int Val : Mask) {
3703  // For now, any undef indexes we'll just assume to be 0. This should be
3704  // optimized in future, e.g. to select DUP etc.
3705  Val = Val < 0 ? 0 : Val;
3706  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3707  unsigned Offset = Byte + Val * BytesPerElt;
3708  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3709  }
3710  }
3711 
3712  MachineIRBuilder MIRBuilder(I);
3713 
3714  // Use a constant pool to load the index vector for TBL.
3715  Constant *CPVal = ConstantVector::get(CstIdxs);
3716  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3717  if (!IndexLoad) {
3718  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3719  return false;
3720  }
3721 
3722  if (DstTy.getSizeInBits() != 128) {
3723  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3724  // This case can be done with TBL1.
3725  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3726  if (!Concat) {
3727  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3728  return false;
3729  }
3730 
3731  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3732  IndexLoad =
3733  emitScalarToVector(64, &AArch64::FPR128RegClass,
3734  IndexLoad->getOperand(0).getReg(), MIRBuilder);
3735 
3736  auto TBL1 = MIRBuilder.buildInstr(
3737  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3738  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3740 
3741  auto Copy =
3742  MIRBuilder
3743  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3744  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3745  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3746  I.eraseFromParent();
3747  return true;
3748  }
3749 
3750  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3751  // Q registers for regalloc.
3752  auto RegSeq = MIRBuilder
3753  .buildInstr(TargetOpcode::REG_SEQUENCE,
3754  {&AArch64::QQRegClass}, {Src1Reg})
3755  .addImm(AArch64::qsub0)
3756  .addUse(Src2Reg)
3757  .addImm(AArch64::qsub1);
3758 
3759  auto TBL2 =
3760  MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3761  {RegSeq, IndexLoad->getOperand(0).getReg()});
3762  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3764  I.eraseFromParent();
3765  return true;
3766 }
3767 
3768 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3769  Optional<Register> DstReg, Register SrcReg, Register EltReg,
3770  unsigned LaneIdx, const RegisterBank &RB,
3771  MachineIRBuilder &MIRBuilder) const {
3772  MachineInstr *InsElt = nullptr;
3773  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3774  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3775 
3776  // Create a register to define with the insert if one wasn't passed in.
3777  if (!DstReg)
3778  DstReg = MRI.createVirtualRegister(DstRC);
3779 
3780  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3781  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3782 
3783  if (RB.getID() == AArch64::FPRRegBankID) {
3784  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3785  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3786  .addImm(LaneIdx)
3787  .addUse(InsSub->getOperand(0).getReg())
3788  .addImm(0);
3789  } else {
3790  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3791  .addImm(LaneIdx)
3792  .addUse(EltReg);
3793  }
3794 
3795  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3796  return InsElt;
3797 }
3798 
3799 bool AArch64InstructionSelector::selectInsertElt(
3800  MachineInstr &I, MachineRegisterInfo &MRI) const {
3801  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3802 
3803  // Get information on the destination.
3804  Register DstReg = I.getOperand(0).getReg();
3805  const LLT DstTy = MRI.getType(DstReg);
3806  unsigned VecSize = DstTy.getSizeInBits();
3807 
3808  // Get information on the element we want to insert into the destination.
3809  Register EltReg = I.getOperand(2).getReg();
3810  const LLT EltTy = MRI.getType(EltReg);
3811  unsigned EltSize = EltTy.getSizeInBits();
3812  if (EltSize < 16 || EltSize > 64)
3813  return false; // Don't support all element types yet.
3814 
3815  // Find the definition of the index. Bail out if it's not defined by a
3816  // G_CONSTANT.
3817  Register IdxReg = I.getOperand(3).getReg();
3818  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3819  if (!VRegAndVal)
3820  return false;
3821  unsigned LaneIdx = VRegAndVal->Value;
3822 
3823  // Perform the lane insert.
3824  Register SrcReg = I.getOperand(1).getReg();
3825  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3826  MachineIRBuilder MIRBuilder(I);
3827 
3828  if (VecSize < 128) {
3829  // If the vector we're inserting into is smaller than 128 bits, widen it
3830  // to 128 to do the insert.
3831  MachineInstr *ScalarToVec = emitScalarToVector(
3832  VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3833  if (!ScalarToVec)
3834  return false;
3835  SrcReg = ScalarToVec->getOperand(0).getReg();
3836  }
3837 
3838  // Create an insert into a new FPR128 register.
3839  // Note that if our vector is already 128 bits, we end up emitting an extra
3840  // register.
3841  MachineInstr *InsMI =
3842  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3843 
3844  if (VecSize < 128) {
3845  // If we had to widen to perform the insert, then we have to demote back to
3846  // the original size to get the result we want.
3847  Register DemoteVec = InsMI->getOperand(0).getReg();
3848  const TargetRegisterClass *RC =
3849  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3850  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3851  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3852  return false;
3853  }
3854  unsigned SubReg = 0;
3855  if (!getSubRegForClass(RC, TRI, SubReg))
3856  return false;
3857  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3858  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3859  << "\n");
3860  return false;
3861  }
3862  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3863  .addReg(DemoteVec, 0, SubReg);
3864  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3865  } else {
3866  // No widening needed.
3867  InsMI->getOperand(0).setReg(DstReg);
3868  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3869  }
3870 
3871  I.eraseFromParent();
3872  return true;
3873 }
3874 
3875 bool AArch64InstructionSelector::selectBuildVector(
3876  MachineInstr &I, MachineRegisterInfo &MRI) const {
3877  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3878  // Until we port more of the optimized selections, for now just use a vector
3879  // insert sequence.
3880  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3881  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3882  unsigned EltSize = EltTy.getSizeInBits();
3883  if (EltSize < 16 || EltSize > 64)
3884  return false; // Don't support all element types yet.
3885  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3886  MachineIRBuilder MIRBuilder(I);
3887 
3888  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3889  MachineInstr *ScalarToVec =
3890  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3891  I.getOperand(1).getReg(), MIRBuilder);
3892  if (!ScalarToVec)
3893  return false;
3894 
3895  Register DstVec = ScalarToVec->getOperand(0).getReg();
3896  unsigned DstSize = DstTy.getSizeInBits();
3897 
3898  // Keep track of the last MI we inserted. Later on, we might be able to save
3899  // a copy using it.
3900  MachineInstr *PrevMI = nullptr;
3901  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3902  // Note that if we don't do a subregister copy, we can end up making an
3903  // extra register.
3904  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3905  MIRBuilder);
3906  DstVec = PrevMI->getOperand(0).getReg();
3907  }
3908 
3909  // If DstTy's size in bits is less than 128, then emit a subregister copy
3910  // from DstVec to the last register we've defined.
3911  if (DstSize < 128) {
3912  // Force this to be FPR using the destination vector.
3913  const TargetRegisterClass *RC =
3914  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3915  if (!RC)
3916  return false;
3917  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3918  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3919  return false;
3920  }
3921 
3922  unsigned SubReg = 0;
3923  if (!getSubRegForClass(RC, TRI, SubReg))
3924  return false;
3925  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3926  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3927  << "\n");
3928  return false;
3929  }
3930 
3932  Register DstReg = I.getOperand(0).getReg();
3933 
3934  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3935  .addReg(DstVec, 0, SubReg);
3936  MachineOperand &RegOp = I.getOperand(1);
3937  RegOp.setReg(Reg);
3938  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3939  } else {
3940  // We don't need a subregister copy. Save a copy by re-using the
3941  // destination register on the final insert.
3942  assert(PrevMI && "PrevMI was null?");
3943  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3944  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3945  }
3946 
3947  I.eraseFromParent();
3948  return true;
3949 }
3950 
3951 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3952 /// ID if it exists, and 0 otherwise.
3953 static unsigned findIntrinsicID(MachineInstr &I) {
3954  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3955  return Op.isIntrinsicID();
3956  });
3957  if (IntrinOp == I.operands_end())
3958  return 0;
3959  return IntrinOp->getIntrinsicID();
3960 }
3961 
3962 /// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3963 /// intrinsic.
3964 static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3965  switch (NumBytesToStore) {
3966  // TODO: 1 and 2 byte stores
3967  case 4:
3968  return AArch64::STLXRW;
3969  case 8:
3970  return AArch64::STLXRX;
3971  default:
3972  LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3973  << NumBytesToStore << ")\n");
3974  break;
3975  }
3976  return 0;
3977 }
3978 
3979 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3980  MachineInstr &I, MachineRegisterInfo &MRI) const {
3981  // Find the intrinsic ID.
3982  unsigned IntrinID = findIntrinsicID(I);
3983  if (!IntrinID)
3984  return false;
3985  MachineIRBuilder MIRBuilder(I);
3986 
3987  // Select the instruction.
3988  switch (IntrinID) {
3989  default:
3990  return false;
3991  case Intrinsic::trap:
3992  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3993  break;
3994  case Intrinsic::debugtrap:
3995  if (!STI.isTargetWindows())
3996  return false;
3997  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3998  break;
3999  case Intrinsic::aarch64_stlxr:
4000  Register StatReg = I.getOperand(0).getReg();
4001  assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
4002  "Status register must be 32 bits!");
4003  Register SrcReg = I.getOperand(2).getReg();
4004 
4005  if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
4006  LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
4007  return false;
4008  }
4009 
4010  Register PtrReg = I.getOperand(3).getReg();
4011  assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
4012 
4013  // Expect only one memory operand.
4014  if (!I.hasOneMemOperand())
4015  return false;
4016 
4017  const MachineMemOperand *MemOp = *I.memoperands_begin();
4018  unsigned NumBytesToStore = MemOp->getSize();
4019  unsigned Opc = getStlxrOpcode(NumBytesToStore);
4020  if (!Opc)
4021  return false;
4022  unsigned NumBitsToStore = NumBytesToStore * 8;
4023  if (NumBitsToStore != 64) {
4024  // The intrinsic always has a 64-bit source, but we might actually want
4025  // a differently-sized source for the instruction. Try to get it.
4026  // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's
4027  // just handle 4-byte stores.
4028  // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down
4029  // to the right size for the STLXR.
4030  MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI);
4031  if (!Zext)
4032  return false;
4033  SrcReg = Zext->getOperand(1).getReg();
4034  // We should get an appropriately-sized register here.
4035  if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore)
4036  return false;
4037  }
4038  auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg})
4039  .addMemOperand(*I.memoperands_begin());
4040  constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
4041  }
4042 
4043  I.eraseFromParent();
4044  return true;
4045 }
4046 
4047 bool AArch64InstructionSelector::selectIntrinsic(
4048  MachineInstr &I, MachineRegisterInfo &MRI) const {
4049  unsigned IntrinID = findIntrinsicID(I);
4050  if (!IntrinID)
4051  return false;
4052  MachineIRBuilder MIRBuilder(I);
4053 
4054  switch (IntrinID) {
4055  default:
4056  break;
4057  case Intrinsic::aarch64_crypto_sha1h:
4058  Register DstReg = I.getOperand(0).getReg();
4059  Register SrcReg = I.getOperand(2).getReg();
4060 
4061  // FIXME: Should this be an assert?
4062  if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4063  MRI.getType(SrcReg).getSizeInBits() != 32)
4064  return false;
4065 
4066  // The operation has to happen on FPRs. Set up some new FPR registers for
4067  // the source and destination if they are on GPRs.
4068  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4069  SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4070  MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4071 
4072  // Make sure the copy ends up getting constrained properly.
4074  AArch64::GPR32RegClass, MRI);
4075  }
4076 
4077  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4078  DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4079 
4080  // Actually insert the instruction.
4081  auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4082  constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4083 
4084  // Did we create a new register for the destination?
4085  if (DstReg != I.getOperand(0).getReg()) {
4086  // Yep. Copy the result of the instruction back into the original
4087  // destination.
4088  MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4090  AArch64::GPR32RegClass, MRI);
4091  }
4092 
4093  I.eraseFromParent();
4094  return true;
4095  }
4096  return false;
4097 }
4098 
4100  auto &MI = *Root.getParent();
4101  auto &MBB = *MI.getParent();
4102  auto &MF = *MBB.getParent();
4103  auto &MRI = MF.getRegInfo();
4104  uint64_t Immed;
4105  if (Root.isImm())
4106  Immed = Root.getImm();
4107  else if (Root.isCImm())
4108  Immed = Root.getCImm()->getZExtValue();
4109  else if (Root.isReg()) {
4110  auto ValAndVReg =
4112  if (!ValAndVReg)
4113  return None;
4114  Immed = ValAndVReg->Value;
4115  } else
4116  return None;
4117  return Immed;
4118 }
4119 
4121 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4122  auto MaybeImmed = getImmedFromMO(Root);
4123  if (MaybeImmed == None || *MaybeImmed > 31)
4124  return None;
4125  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4126  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4127 }
4128 
4130 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4131  auto MaybeImmed = getImmedFromMO(Root);
4132  if (MaybeImmed == None || *MaybeImmed > 31)
4133  return None;
4134  uint64_t Enc = 31 - *MaybeImmed;
4135  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4136 }
4137 
4139 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4140  auto MaybeImmed = getImmedFromMO(Root);
4141  if (MaybeImmed == None || *MaybeImmed > 63)
4142  return None;
4143  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4144  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4145 }
4146 
4148 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4149  auto MaybeImmed = getImmedFromMO(Root);
4150  if (MaybeImmed == None || *MaybeImmed > 63)
4151  return None;
4152  uint64_t Enc = 63 - *MaybeImmed;
4153  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4154 }
4155 
4156 /// Helper to select an immediate value that can be represented as a 12-bit
4157 /// value shifted left by either 0 or 12. If it is possible to do so, return
4158 /// the immediate and shift value. If not, return None.
4159 ///
4160 /// Used by selectArithImmed and selectNegArithImmed.
4162 AArch64InstructionSelector::select12BitValueWithLeftShift(
4163  uint64_t Immed) const {
4164  unsigned ShiftAmt;
4165  if (Immed >> 12 == 0) {
4166  ShiftAmt = 0;
4167  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4168  ShiftAmt = 12;
4169  Immed = Immed >> 12;
4170  } else
4171  return None;
4172 
4173  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4174  return {{
4175  [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4176  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4177  }};
4178 }
4179 
4180 /// SelectArithImmed - Select an immediate value that can be represented as
4181 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
4182 /// Val set to the 12-bit value and Shift set to the shifter operand.
4184 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4185  // This function is called from the addsub_shifted_imm ComplexPattern,
4186  // which lists [imm] as the list of opcode it's interested in, however
4187  // we still need to check whether the operand is actually an immediate
4188  // here because the ComplexPattern opcode list is only used in
4189  // root-level opcode matching.
4190  auto MaybeImmed = getImmedFromMO(Root);
4191  if (MaybeImmed == None)
4192  return None;
4193  return select12BitValueWithLeftShift(*MaybeImmed);
4194 }
4195 
4196 /// SelectNegArithImmed - As above, but negates the value before trying to
4197 /// select it.
4199 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4200  // We need a register here, because we need to know if we have a 64 or 32
4201  // bit immediate.
4202  if (!Root.isReg())
4203  return None;
4204  auto MaybeImmed = getImmedFromMO(Root);
4205  if (MaybeImmed == None)
4206  return None;
4207  uint64_t Immed = *MaybeImmed;
4208 
4209  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4210  // have the opposite effect on the C flag, so this pattern mustn't match under
4211  // those circumstances.
4212  if (Immed == 0)
4213  return None;
4214 
4215  // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4216  // the root.
4217  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4218  if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4219  Immed = ~((uint32_t)Immed) + 1;
4220  else
4221  Immed = ~Immed + 1ULL;
4222 
4223  if (Immed & 0xFFFFFFFFFF000000ULL)
4224  return None;
4225 
4226  Immed &= 0xFFFFFFULL;
4227  return select12BitValueWithLeftShift(Immed);
4228 }
4229 
4230 /// Return true if it is worth folding MI into an extended register. That is,
4231 /// if it's safe to pull it into the addressing mode of a load or store as a
4232 /// shift.
4233 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4234  MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4235  // Always fold if there is one use, or if we're optimizing for size.
4236  Register DefReg = MI.getOperand(0).getReg();
4237  if (MRI.hasOneUse(DefReg) ||
4239  return true;
4240 
4241  // It's better to avoid folding and recomputing shifts when we don't have a
4242  // fastpath.
4243  if (!STI.hasLSLFast())
4244  return false;
4245 
4246  // We have a fastpath, so folding a shift in and potentially computing it
4247  // many times may be beneficial. Check if this is only used in memory ops.
4248  // If it is, then we should fold.
4249  return all_of(MRI.use_instructions(DefReg),
4250  [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4251 }
4252 
4253 /// This is used for computing addresses like this:
4254 ///
4255 /// ldr x1, [x2, x3, lsl #3]
4256 ///
4257 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4258 /// is a constant value specific to this load instruction. That is, we'll never
4259 /// see anything other than a 3 here (which corresponds to the size of the
4260 /// element being loaded.)
4262 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4263  MachineOperand &Root, unsigned SizeInBytes) const {
4264  if (!Root.isReg())
4265  return None;
4266  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4267 
4268  // Make sure that the memory op is a valid size.
4269  int64_t LegalShiftVal = Log2_32(SizeInBytes);
4270  if (LegalShiftVal == 0)
4271  return None;
4272 
4273  // We want to find something like this:
4274  //
4275  // val = G_CONSTANT LegalShiftVal
4276  // shift = G_SHL off_reg val
4277  // ptr = G_GEP base_reg shift
4278  // x = G_LOAD ptr
4279  //
4280  // And fold it into this addressing mode:
4281  //
4282  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4283 
4284  // Check if we can find the G_GEP.
4285  MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4286  if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4287  return None;
4288 
4289  // Now, try to match an opcode which will match our specific offset.
4290  // We want a G_SHL or a G_MUL.
4291  MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4292  if (!OffsetInst)
4293  return None;
4294 
4295  unsigned OffsetOpc = OffsetInst->getOpcode();
4296  if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4297  return None;
4298 
4299  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4300  return None;
4301 
4302  // Now, try to find the specific G_CONSTANT. Start by assuming that the
4303  // register we will offset is the LHS, and the register containing the
4304  // constant is the RHS.
4305  Register OffsetReg = OffsetInst->getOperand(1).getReg();
4306  Register ConstantReg = OffsetInst->getOperand(2).getReg();
4307  auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4308  if (!ValAndVReg) {
4309  // We didn't get a constant on the RHS. If the opcode is a shift, then
4310  // we're done.
4311  if (OffsetOpc == TargetOpcode::G_SHL)
4312  return None;
4313 
4314  // If we have a G_MUL, we can use either register. Try looking at the RHS.
4315  std::swap(OffsetReg, ConstantReg);
4316  ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4317  if (!ValAndVReg)
4318  return None;
4319  }
4320 
4321  // The value must fit into 3 bits, and must be positive. Make sure that is
4322  // true.
4323  int64_t ImmVal = ValAndVReg->Value;
4324 
4325  // Since we're going to pull this into a shift, the constant value must be
4326  // a power of 2. If we got a multiply, then we need to check this.
4327  if (OffsetOpc == TargetOpcode::G_MUL) {
4328  if (!isPowerOf2_32(ImmVal))
4329  return None;
4330 
4331  // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4332  ImmVal = Log2_32(ImmVal);
4333  }
4334 
4335  if ((ImmVal & 0x7) != ImmVal)
4336  return None;
4337 
4338  // We are only allowed to shift by LegalShiftVal. This shift value is built
4339  // into the instruction, so we can't just use whatever we want.
4340  if (ImmVal != LegalShiftVal)
4341  return None;
4342 
4343  // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4344  // offset. Signify that we are shifting by setting the shift flag to 1.
4345  return {{
4346  [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4347  [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4348  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4349  [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4350  }};
4351 }
4352 
4353 /// This is used for computing addresses like this:
4354 ///
4355 /// ldr x1, [x2, x3]
4356 ///
4357 /// Where x2 is the base register, and x3 is an offset register.
4358 ///
4359 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4360 /// this will do so. Otherwise, it will return None.
4362 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4363  MachineOperand &Root) const {
4364  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4365 
4366  // We need a GEP.
4367  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4368  if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4369  return None;
4370 
4371  // If this is used more than once, let's not bother folding.
4372  // TODO: Check if they are memory ops. If they are, then we can still fold
4373  // without having to recompute anything.
4374  if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4375  return None;
4376 
4377  // Base is the GEP's LHS, offset is its RHS.
4378  return {{
4379  [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4380  [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4381  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4382  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4383  }};
4384 }
4385 
4386 /// This is intended to be equivalent to selectAddrModeXRO in
4387 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4389 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4390  unsigned SizeInBytes) const {
4391  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4392 
4393  // If we have a constant offset, then we probably don't want to match a
4394  // register offset.
4395  if (isBaseWithConstantOffset(Root, MRI))
4396  return None;
4397 
4398  // Try to fold shifts into the addressing mode.
4399  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4400  if (AddrModeFns)
4401  return AddrModeFns;
4402 
4403  // If that doesn't work, see if it's possible to fold in registers from
4404  // a GEP.
4405  return selectAddrModeRegisterOffset(Root);
4406 }
4407 
4408 /// Select a "register plus unscaled signed 9-bit immediate" address. This
4409 /// should only match when there is an offset that is not valid for a scaled
4410 /// immediate addressing mode. The "Size" argument is the size in bytes of the
4411 /// memory reference, which is needed here to know what is valid for a scaled
4412 /// immediate.
4414 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4415  unsigned Size) const {
4416  MachineRegisterInfo &MRI =
4417  Root.getParent()->getParent()->getParent()->getRegInfo();
4418 
4419  if (!Root.isReg())
4420  return None;
4421 
4422  if (!isBaseWithConstantOffset(Root, MRI))
4423  return None;
4424 
4425  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4426  if (!RootDef)
4427  return None;
4428 
4429  MachineOperand &OffImm = RootDef->getOperand(2);
4430  if (!OffImm.isReg())
4431  return None;
4432  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4433  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4434  return None;
4435  int64_t RHSC;
4436  MachineOperand &RHSOp1 = RHS->getOperand(1);
4437  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4438  return None;
4439  RHSC = RHSOp1.getCImm()->getSExtValue();
4440 
4441  // If the offset is valid as a scaled immediate, don't match here.
4442  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4443  return None;
4444  if (RHSC >= -256 && RHSC < 256) {
4445  MachineOperand &Base = RootDef->getOperand(1);
4446  return {{
4447  [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4448  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4449  }};
4450  }
4451  return None;
4452 }
4453 
4454 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
4455 /// "Size" argument is the size in bytes of the memory reference, which
4456 /// determines the scale.
4458 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4459  unsigned Size) const {
4460  MachineRegisterInfo &MRI =
4461  Root.getParent()->getParent()->getParent()->getRegInfo();
4462 
4463  if (!Root.isReg())
4464  return None;
4465 
4466  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4467  if (!RootDef)
4468  return None;
4469 
4470  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4471  return {{
4472  [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4473  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4474  }};
4475  }
4476 
4477  if (isBaseWithConstantOffset(Root, MRI)) {
4478  MachineOperand &LHS = RootDef->getOperand(1);
4479  MachineOperand &RHS = RootDef->getOperand(2);
4480  MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4481  MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4482  if (LHSDef && RHSDef) {
4483  int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4484  unsigned Scale = Log2_32(Size);
4485  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4486  if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4487  return {{
4488  [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4489  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4490  }};
4491 
4492  return {{
4493  [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4494  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4495  }};
4496  }
4497  }
4498  }
4499 
4500  // Before falling back to our general case, check if the unscaled
4501  // instructions can handle this. If so, that's preferable.
4502  if (selectAddrModeUnscaled(Root, Size).hasValue())
4503  return None;
4504 
4505  return {{
4506  [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4507  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4508  }};
4509 }
4510 
4511 /// Given a shift instruction, return the correct shift type for that
4512 /// instruction.
4514  // TODO: Handle AArch64_AM::ROR
4515  switch (MI.getOpcode()) {
4516  default:
4518  case TargetOpcode::G_SHL:
4519  return AArch64_AM::LSL;
4520  case TargetOpcode::G_LSHR:
4521  return AArch64_AM::LSR;
4522  case TargetOpcode::G_ASHR:
4523  return AArch64_AM::ASR;
4524  }
4525 }
4526 
4527 /// Select a "shifted register" operand. If the value is not shifted, set the
4528 /// shift operand to a default value of "lsl 0".
4529 ///
4530 /// TODO: Allow shifted register to be rotated in logical instructions.
4532 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4533  if (!Root.isReg())
4534  return None;
4535  MachineRegisterInfo &MRI =
4536  Root.getParent()->getParent()->getParent()->getRegInfo();
4537 
4538  // Check if the operand is defined by an instruction which corresponds to
4539  // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4540  //
4541  // TODO: Handle AArch64_AM::ROR for logical instructions.
4542  MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4543  if (!ShiftInst)
4544  return None;
4545  AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4546  if (ShType == AArch64_AM::InvalidShiftExtend)
4547  return None;
4548  if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4549  return None;
4550 
4551  // Need an immediate on the RHS.
4552  MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4553  auto Immed = getImmedFromMO(ShiftRHS);
4554  if (!Immed)
4555  return None;
4556 
4557  // We have something that we can fold. Fold in the shift's LHS and RHS into
4558  // the instruction.
4559  MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4560  Register ShiftReg = ShiftLHS.getReg();
4561 
4562  unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4563  unsigned Val = *Immed & (NumBits - 1);
4564  unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4565 
4566  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4567  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4568 }
4569 
4570 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4571  const MachineInstr &MI) const {
4572  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4573  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4575  assert(CstVal && "Expected constant value");
4576  MIB.addImm(CstVal.getValue());
4577 }
4578 
4579 void AArch64InstructionSelector::renderLogicalImm32(
4580  MachineInstrBuilder &MIB, const MachineInstr &I) const {
4581  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4582  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4583  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4584  MIB.addImm(Enc);
4585 }
4586 
4587 void AArch64InstructionSelector::renderLogicalImm64(
4588  MachineInstrBuilder &MIB, const MachineInstr &I) const {
4589  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4590  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4591  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4592  MIB.addImm(Enc);
4593 }
4594 
4595 namespace llvm {
4598  AArch64Subtarget &Subtarget,
4599  AArch64RegisterBankInfo &RBI) {
4600  return new AArch64InstructionSelector(TM, Subtarget, RBI);
4601 }
4602 }
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
const NoneType None
Definition: None.h:23
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
mop_iterator operands_end()
Definition: MachineInstr.h:472
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1569
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
MachineBasicBlock * getMBB() const
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB...
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void setTargetFlags(unsigned F)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ConstantFP * getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:276
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
bool isScalar() const
static CondCode getInvertedCondCode(CondCode Code)
unsigned Reg
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
static uint64_t selectImpl(uint64_t CandidateMask, uint64_t &NextInSequenceMask)
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t getSize() const
Return the size in bytes of the memory reference.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
static uint32_t Concat[]
unsigned const TargetRegisterInfo * TRI
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:194
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:284
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
This file declares the targeting of the RegisterBankInfo class for AArch64.
bool isVector() const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
Holds all the information related to register banks.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static unsigned getStlxrOpcode(unsigned NumBytesToStore)
Helper function to emit the correct opcode for a llvm.aarch64.stlxr intrinsic.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
static StringRef getName(Value *V)
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static int getID(struct InternalInstruction *insn, const void *miiArg)
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
static bool isStore(int Opcode)
MachineFunction & getMF()
Getter for the function we currently build.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
static bool isUnsignedICMPPred(const CmpInst::Predicate P)
Returns true if P is an unsigned integer comparison predicate.
bool isPredicate() const
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1012
void setReg(Register Reg)
Change the register this operand corresponds to.
#define EQ(a, b)
Definition: regexec.c:112
virtual void setupMF(MachineFunction &mf, CodeGenCoverage &covinfo)
Setup per-MF selector state.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
const TargetRegisterInfo * getTargetRegisterInfo() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:41
const GlobalValue * getGlobal() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:762
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1140
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:567
bool isValid() const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
self_iterator getIterator()
Definition: ilist_node.h:81
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
bool isCopy() const
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:300
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
const Constant * getShuffleMask() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
#define GET_GLOBALISEL_TEMPORARIES_INIT
const APFloat & getValueAPF() const
Definition: Constants.h:302
static Optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT (LookThroug...
Definition: Utils.cpp:218
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder MachineInstrBuilder & DefMI
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:111
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function that verifies that we have a valid copy at the end of selectCopy. ...
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:207
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
Provides the logic to select generic machine instructions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class provides the information for the target register banks.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:305
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
int64_t getOffset() const
Return the offset from the symbol in this operand.
const BlockAddress * getBlockAddress() const
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned findIntrinsicID(MachineInstr &I)
Helper function to find an intrinsic ID on an a MachineInstr.
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
void setSubReg(unsigned subReg)
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
#define GET_GLOBALISEL_PREDICATES_INIT
uint32_t Size
Definition: Profile.cpp:46
const DataLayout & getDataLayout() const
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< use_instr_iterator > use_instructions(unsigned Reg) const
const TargetRegisterClass * getRegClassOrNull(unsigned Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel...
Definition: TargetOpcodes.h:30
LLVM Value Representation.
Definition: Value.h:73
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI)
Helper function to select the opcode for a G_FCMP.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:258
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
APInt bitcastToAPInt() const
Definition: APFloat.h:1104
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
const ConstantInt * getCImm() const
#define DEBUG_TYPE
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:156
unsigned getPredicate() const