LLVM  13.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/FastISel.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82  using BaseKind = enum {
83  RegBase,
84  FrameIndexBase
85  };
86 
87  private:
88  BaseKind Kind = RegBase;
90  union {
91  unsigned Reg;
92  int FI;
93  } Base;
94  unsigned OffsetReg = 0;
95  unsigned Shift = 0;
96  int64_t Offset = 0;
97  const GlobalValue *GV = nullptr;
98 
99  public:
100  Address() { Base.Reg = 0; }
101 
102  void setKind(BaseKind K) { Kind = K; }
103  BaseKind getKind() const { return Kind; }
104  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106  bool isRegBase() const { return Kind == RegBase; }
107  bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109  void setReg(unsigned Reg) {
110  assert(isRegBase() && "Invalid base register access!");
111  Base.Reg = Reg;
112  }
113 
114  unsigned getReg() const {
115  assert(isRegBase() && "Invalid base register access!");
116  return Base.Reg;
117  }
118 
119  void setOffsetReg(unsigned Reg) {
120  OffsetReg = Reg;
121  }
122 
123  unsigned getOffsetReg() const {
124  return OffsetReg;
125  }
126 
127  void setFI(unsigned FI) {
128  assert(isFIBase() && "Invalid base frame index access!");
129  Base.FI = FI;
130  }
131 
132  unsigned getFI() const {
133  assert(isFIBase() && "Invalid base frame index access!");
134  return Base.FI;
135  }
136 
137  void setOffset(int64_t O) { Offset = O; }
138  int64_t getOffset() { return Offset; }
139  void setShift(unsigned S) { Shift = S; }
140  unsigned getShift() { return Shift; }
141 
142  void setGlobalValue(const GlobalValue *G) { GV = G; }
143  const GlobalValue *getGlobalValue() { return GV; }
144  };
145 
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
150 
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190  unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193  unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195  const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  unsigned getRegForGEPIndex(const Value *Idx);
199 
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202  const Value *RHS, bool SetFlags = false,
203  bool WantResult = true, bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205  unsigned RHSReg, bool SetFlags = false,
206  bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208  uint64_t Imm, bool SetFlags = false,
209  bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211  unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
212  uint64_t ShiftImm, bool SetFlags = false,
213  bool WantResult = true);
214  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
215  unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
216  uint64_t ShiftImm, bool SetFlags = false,
217  bool WantResult = true);
218 
219  // Emit functions.
220  bool emitCompareAndBranch(const BranchInst *BI);
221  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
222  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
223  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
224  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
225  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
226  MachineMemOperand *MMO = nullptr);
227  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
228  MachineMemOperand *MMO = nullptr);
229  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
230  MachineMemOperand *MMO = nullptr);
231  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
232  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
233  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
234  bool SetFlags = false, bool WantResult = true,
235  bool IsZExt = false);
236  unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
237  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
238  bool SetFlags = false, bool WantResult = true,
239  bool IsZExt = false);
240  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
241  bool WantResult = true);
242  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
244  bool WantResult = true);
245  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
246  const Value *RHS);
247  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
248  uint64_t Imm);
249  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250  unsigned RHSReg, uint64_t ShiftImm);
251  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
252  unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
253  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
256  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
257  bool IsZExt = true);
258  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260  bool IsZExt = true);
261  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263  bool IsZExt = false);
264 
265  unsigned materializeInt(const ConstantInt *CI, MVT VT);
266  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
267  unsigned materializeGV(const GlobalValue *GV);
268 
269  // Call handling routines.
270 private:
271  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
272  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
273  unsigned &NumBytes);
274  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
275 
276 public:
277  // Backend specific FastISel code.
278  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
279  unsigned fastMaterializeConstant(const Constant *C) override;
280  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
281 
282  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
283  const TargetLibraryInfo *LibInfo)
284  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
285  Subtarget =
286  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
287  Context = &FuncInfo.Fn->getContext();
288  }
289 
290  bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300  "Unexpected integer extend instruction.");
301  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302  "Unexpected value type.");
303  bool IsZExt = isa<ZExtInst>(I);
304 
305  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306  if (LI->hasOneUse())
307  return true;
308 
309  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311  return true;
312 
313  return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319  switch (VT.SimpleTy) {
320  default:
321  return 0; // invalid
322  case MVT::i1: // fall-through
323  case MVT::i8:
324  return 1;
325  case MVT::i16:
326  return 2;
327  case MVT::i32: // fall-through
328  case MVT::f32:
329  return 4;
330  case MVT::i64: // fall-through
331  case MVT::f64:
332  return 8;
333  }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337  if (CC == CallingConv::WebKit_JS)
338  return CC_AArch64_WebKit_JS;
339  if (CC == CallingConv::GHC)
340  return CC_AArch64_GHC;
341  if (CC == CallingConv::CFGuard_Check)
343  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348  "Alloca should always return a pointer.");
349 
350  // Don't handle dynamic allocas.
351  if (!FuncInfo.StaticAllocaMap.count(AI))
352  return 0;
353 
355  FuncInfo.StaticAllocaMap.find(AI);
356 
357  if (SI != FuncInfo.StaticAllocaMap.end()) {
358  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
360  ResultReg)
361  .addFrameIndex(SI->second)
362  .addImm(0)
363  .addImm(0);
364  return ResultReg;
365  }
366 
367  return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371  if (VT > MVT::i64)
372  return 0;
373 
374  if (!CI->isZero())
375  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377  // Create a copy from the zero register to materialize a "0" value.
378  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379  : &AArch64::GPR32RegClass;
380  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381  unsigned ResultReg = createResultReg(RC);
382  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
383  ResultReg).addReg(ZeroReg, getKillRegState(true));
384  return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388  // Positive zero (+0.0) has to be materialized with a fmov from the zero
389  // register, because the immediate version of fmov cannot encode zero.
390  if (CFP->isNullValue())
391  return fastMaterializeFloatZero(CFP);
392 
393  if (VT != MVT::f32 && VT != MVT::f64)
394  return 0;
395 
396  const APFloat Val = CFP->getValueAPF();
397  bool Is64Bit = (VT == MVT::f64);
398  // This checks to see if we can use FMOV instructions to materialize
399  // a constant, otherwise we have to materialize via the constant pool.
400  int Imm =
401  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402  if (Imm != -1) {
403  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405  }
406 
407  // For the large code model materialize the FP constant in code.
408  if (TM.getCodeModel() == CodeModel::Large) {
409  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410  const TargetRegisterClass *RC = Is64Bit ?
411  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413  unsigned TmpReg = createResultReg(RC);
414  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
416 
417  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
419  TII.get(TargetOpcode::COPY), ResultReg)
420  .addReg(TmpReg, getKillRegState(true));
421 
422  return ResultReg;
423  }
424 
425  // Materialize via constant pool. MachineConstantPool wants an explicit
426  // alignment.
427  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
437  .addReg(ADRPReg)
439  return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443  // We can't handle thread-local variables quickly yet.
444  if (GV->isThreadLocal())
445  return 0;
446 
447  // MachO still uses GOT for large code-model accesses, but ELF requires
448  // movz/movk sequences, which FastISel doesn't handle yet.
449  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450  return 0;
451 
452  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455  if (!DestEVT.isSimple())
456  return 0;
457 
458  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459  unsigned ResultReg;
460 
461  if (OpFlags & AArch64II::MO_GOT) {
462  // ADRP + LDRX
463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
464  ADRPReg)
465  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467  unsigned LdrOpc;
468  if (Subtarget->isTargetILP32()) {
469  ResultReg = createResultReg(&AArch64::GPR32RegClass);
470  LdrOpc = AArch64::LDRWui;
471  } else {
472  ResultReg = createResultReg(&AArch64::GPR64RegClass);
473  LdrOpc = AArch64::LDRXui;
474  }
475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
476  ResultReg)
477  .addReg(ADRPReg)
479  AArch64II::MO_NC | OpFlags);
480  if (!Subtarget->isTargetILP32())
481  return ResultReg;
482 
483  // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484  // so we must extend the result on ILP32.
485  unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
487  TII.get(TargetOpcode::SUBREG_TO_REG))
488  .addDef(Result64)
489  .addImm(0)
490  .addReg(ResultReg, RegState::Kill)
491  .addImm(AArch64::sub_32);
492  return Result64;
493  } else {
494  // ADRP + ADDX
495  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
496  ADRPReg)
497  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
501  ResultReg)
502  .addReg(ADRPReg)
503  .addGlobalAddress(GV, 0,
505  .addImm(0);
506  }
507  return ResultReg;
508 }
509 
510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
511  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
512 
513  // Only handle simple types.
514  if (!CEVT.isSimple())
515  return 0;
516  MVT VT = CEVT.getSimpleVT();
517  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
518  // 'null' pointers need to have a somewhat special treatment.
519  if (isa<ConstantPointerNull>(C)) {
520  assert(VT == MVT::i64 && "Expected 64-bit pointers");
521  return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
522  }
523 
524  if (const auto *CI = dyn_cast<ConstantInt>(C))
525  return materializeInt(CI, VT);
526  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
527  return materializeFP(CFP, VT);
528  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
529  return materializeGV(GV);
530 
531  return 0;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
535  assert(CFP->isNullValue() &&
536  "Floating-point constant is not a positive zero.");
537  MVT VT;
538  if (!isTypeLegal(CFP->getType(), VT))
539  return 0;
540 
541  if (VT != MVT::f32 && VT != MVT::f64)
542  return 0;
543 
544  bool Is64Bit = (VT == MVT::f64);
545  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
546  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
547  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
548 }
549 
550 /// Check if the multiply is by a power-of-2 constant.
551 static bool isMulPowOf2(const Value *I) {
552  if (const auto *MI = dyn_cast<MulOperator>(I)) {
553  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
554  if (C->getValue().isPowerOf2())
555  return true;
556  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
557  if (C->getValue().isPowerOf2())
558  return true;
559  }
560  return false;
561 }
562 
563 // Computes the address to get to an object.
564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
565 {
566  const User *U = nullptr;
567  unsigned Opcode = Instruction::UserOp1;
568  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
569  // Don't walk into other basic blocks unless the object is an alloca from
570  // another block, otherwise it may not have a virtual register assigned.
571  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
572  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
573  Opcode = I->getOpcode();
574  U = I;
575  }
576  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
577  Opcode = C->getOpcode();
578  U = C;
579  }
580 
581  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
582  if (Ty->getAddressSpace() > 255)
583  // Fast instruction selection doesn't support the special
584  // address spaces.
585  return false;
586 
587  switch (Opcode) {
588  default:
589  break;
590  case Instruction::BitCast:
591  // Look through bitcasts.
592  return computeAddress(U->getOperand(0), Addr, Ty);
593 
594  case Instruction::IntToPtr:
595  // Look past no-op inttoptrs.
596  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
597  TLI.getPointerTy(DL))
598  return computeAddress(U->getOperand(0), Addr, Ty);
599  break;
600 
601  case Instruction::PtrToInt:
602  // Look past no-op ptrtoints.
603  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
604  return computeAddress(U->getOperand(0), Addr, Ty);
605  break;
606 
607  case Instruction::GetElementPtr: {
608  Address SavedAddr = Addr;
609  uint64_t TmpOffset = Addr.getOffset();
610 
611  // Iterate through the GEP folding the constants into offsets where
612  // we can.
613  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
614  GTI != E; ++GTI) {
615  const Value *Op = GTI.getOperand();
616  if (StructType *STy = GTI.getStructTypeOrNull()) {
617  const StructLayout *SL = DL.getStructLayout(STy);
618  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
619  TmpOffset += SL->getElementOffset(Idx);
620  } else {
621  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
622  while (true) {
623  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
624  // Constant-offset addressing.
625  TmpOffset += CI->getSExtValue() * S;
626  break;
627  }
628  if (canFoldAddIntoGEP(U, Op)) {
629  // A compatible add with a constant operand. Fold the constant.
630  ConstantInt *CI =
631  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
632  TmpOffset += CI->getSExtValue() * S;
633  // Iterate on the other operand.
634  Op = cast<AddOperator>(Op)->getOperand(0);
635  continue;
636  }
637  // Unsupported
638  goto unsupported_gep;
639  }
640  }
641  }
642 
643  // Try to grab the base operand now.
644  Addr.setOffset(TmpOffset);
645  if (computeAddress(U->getOperand(0), Addr, Ty))
646  return true;
647 
648  // We failed, restore everything and try the other options.
649  Addr = SavedAddr;
650 
651  unsupported_gep:
652  break;
653  }
654  case Instruction::Alloca: {
655  const AllocaInst *AI = cast<AllocaInst>(Obj);
657  FuncInfo.StaticAllocaMap.find(AI);
658  if (SI != FuncInfo.StaticAllocaMap.end()) {
659  Addr.setKind(Address::FrameIndexBase);
660  Addr.setFI(SI->second);
661  return true;
662  }
663  break;
664  }
665  case Instruction::Add: {
666  // Adds of constants are common and easy enough.
667  const Value *LHS = U->getOperand(0);
668  const Value *RHS = U->getOperand(1);
669 
670  if (isa<ConstantInt>(LHS))
671  std::swap(LHS, RHS);
672 
673  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
674  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
675  return computeAddress(LHS, Addr, Ty);
676  }
677 
678  Address Backup = Addr;
679  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
680  return true;
681  Addr = Backup;
682 
683  break;
684  }
685  case Instruction::Sub: {
686  // Subs of constants are common and easy enough.
687  const Value *LHS = U->getOperand(0);
688  const Value *RHS = U->getOperand(1);
689 
690  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
691  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
692  return computeAddress(LHS, Addr, Ty);
693  }
694  break;
695  }
696  case Instruction::Shl: {
697  if (Addr.getOffsetReg())
698  break;
699 
700  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
701  if (!CI)
702  break;
703 
704  unsigned Val = CI->getZExtValue();
705  if (Val < 1 || Val > 3)
706  break;
707 
708  uint64_t NumBytes = 0;
709  if (Ty && Ty->isSized()) {
710  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
711  NumBytes = NumBits / 8;
712  if (!isPowerOf2_64(NumBits))
713  NumBytes = 0;
714  }
715 
716  if (NumBytes != (1ULL << Val))
717  break;
718 
719  Addr.setShift(Val);
720  Addr.setExtendType(AArch64_AM::LSL);
721 
722  const Value *Src = U->getOperand(0);
723  if (const auto *I = dyn_cast<Instruction>(Src)) {
724  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725  // Fold the zext or sext when it won't become a noop.
726  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
727  if (!isIntExtFree(ZE) &&
728  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
729  Addr.setExtendType(AArch64_AM::UXTW);
730  Src = ZE->getOperand(0);
731  }
732  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
733  if (!isIntExtFree(SE) &&
734  SE->getOperand(0)->getType()->isIntegerTy(32)) {
735  Addr.setExtendType(AArch64_AM::SXTW);
736  Src = SE->getOperand(0);
737  }
738  }
739  }
740  }
741 
742  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
743  if (AI->getOpcode() == Instruction::And) {
744  const Value *LHS = AI->getOperand(0);
745  const Value *RHS = AI->getOperand(1);
746 
747  if (const auto *C = dyn_cast<ConstantInt>(LHS))
748  if (C->getValue() == 0xffffffff)
749  std::swap(LHS, RHS);
750 
751  if (const auto *C = dyn_cast<ConstantInt>(RHS))
752  if (C->getValue() == 0xffffffff) {
753  Addr.setExtendType(AArch64_AM::UXTW);
754  unsigned Reg = getRegForValue(LHS);
755  if (!Reg)
756  return false;
757  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
758  Addr.setOffsetReg(Reg);
759  return true;
760  }
761  }
762 
763  unsigned Reg = getRegForValue(Src);
764  if (!Reg)
765  return false;
766  Addr.setOffsetReg(Reg);
767  return true;
768  }
769  case Instruction::Mul: {
770  if (Addr.getOffsetReg())
771  break;
772 
773  if (!isMulPowOf2(U))
774  break;
775 
776  const Value *LHS = U->getOperand(0);
777  const Value *RHS = U->getOperand(1);
778 
779  // Canonicalize power-of-2 value to the RHS.
780  if (const auto *C = dyn_cast<ConstantInt>(LHS))
781  if (C->getValue().isPowerOf2())
782  std::swap(LHS, RHS);
783 
784  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
785  const auto *C = cast<ConstantInt>(RHS);
786  unsigned Val = C->getValue().logBase2();
787  if (Val < 1 || Val > 3)
788  break;
789 
790  uint64_t NumBytes = 0;
791  if (Ty && Ty->isSized()) {
792  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
793  NumBytes = NumBits / 8;
794  if (!isPowerOf2_64(NumBits))
795  NumBytes = 0;
796  }
797 
798  if (NumBytes != (1ULL << Val))
799  break;
800 
801  Addr.setShift(Val);
802  Addr.setExtendType(AArch64_AM::LSL);
803 
804  const Value *Src = LHS;
805  if (const auto *I = dyn_cast<Instruction>(Src)) {
806  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807  // Fold the zext or sext when it won't become a noop.
808  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
809  if (!isIntExtFree(ZE) &&
810  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
811  Addr.setExtendType(AArch64_AM::UXTW);
812  Src = ZE->getOperand(0);
813  }
814  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
815  if (!isIntExtFree(SE) &&
816  SE->getOperand(0)->getType()->isIntegerTy(32)) {
817  Addr.setExtendType(AArch64_AM::SXTW);
818  Src = SE->getOperand(0);
819  }
820  }
821  }
822  }
823 
824  unsigned Reg = getRegForValue(Src);
825  if (!Reg)
826  return false;
827  Addr.setOffsetReg(Reg);
828  return true;
829  }
830  case Instruction::And: {
831  if (Addr.getOffsetReg())
832  break;
833 
834  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
835  break;
836 
837  const Value *LHS = U->getOperand(0);
838  const Value *RHS = U->getOperand(1);
839 
840  if (const auto *C = dyn_cast<ConstantInt>(LHS))
841  if (C->getValue() == 0xffffffff)
842  std::swap(LHS, RHS);
843 
844  if (const auto *C = dyn_cast<ConstantInt>(RHS))
845  if (C->getValue() == 0xffffffff) {
846  Addr.setShift(0);
847  Addr.setExtendType(AArch64_AM::LSL);
848  Addr.setExtendType(AArch64_AM::UXTW);
849 
850  unsigned Reg = getRegForValue(LHS);
851  if (!Reg)
852  return false;
853  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
854  Addr.setOffsetReg(Reg);
855  return true;
856  }
857  break;
858  }
859  case Instruction::SExt:
860  case Instruction::ZExt: {
861  if (!Addr.getReg() || Addr.getOffsetReg())
862  break;
863 
864  const Value *Src = nullptr;
865  // Fold the zext or sext when it won't become a noop.
866  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
867  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
868  Addr.setExtendType(AArch64_AM::UXTW);
869  Src = ZE->getOperand(0);
870  }
871  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
872  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
873  Addr.setExtendType(AArch64_AM::SXTW);
874  Src = SE->getOperand(0);
875  }
876  }
877 
878  if (!Src)
879  break;
880 
881  Addr.setShift(0);
882  unsigned Reg = getRegForValue(Src);
883  if (!Reg)
884  return false;
885  Addr.setOffsetReg(Reg);
886  return true;
887  }
888  } // end switch
889 
890  if (Addr.isRegBase() && !Addr.getReg()) {
891  unsigned Reg = getRegForValue(Obj);
892  if (!Reg)
893  return false;
894  Addr.setReg(Reg);
895  return true;
896  }
897 
898  if (!Addr.getOffsetReg()) {
899  unsigned Reg = getRegForValue(Obj);
900  if (!Reg)
901  return false;
902  Addr.setOffsetReg(Reg);
903  return true;
904  }
905 
906  return false;
907 }
908 
909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
910  const User *U = nullptr;
911  unsigned Opcode = Instruction::UserOp1;
912  bool InMBB = true;
913 
914  if (const auto *I = dyn_cast<Instruction>(V)) {
915  Opcode = I->getOpcode();
916  U = I;
917  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
918  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
919  Opcode = C->getOpcode();
920  U = C;
921  }
922 
923  switch (Opcode) {
924  default: break;
925  case Instruction::BitCast:
926  // Look past bitcasts if its operand is in the same BB.
927  if (InMBB)
928  return computeCallAddress(U->getOperand(0), Addr);
929  break;
930  case Instruction::IntToPtr:
931  // Look past no-op inttoptrs if its operand is in the same BB.
932  if (InMBB &&
933  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
934  TLI.getPointerTy(DL))
935  return computeCallAddress(U->getOperand(0), Addr);
936  break;
937  case Instruction::PtrToInt:
938  // Look past no-op ptrtoints if its operand is in the same BB.
939  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
940  return computeCallAddress(U->getOperand(0), Addr);
941  break;
942  }
943 
944  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
945  Addr.setGlobalValue(GV);
946  return true;
947  }
948 
949  // If all else fails, try to materialize the value in a register.
950  if (!Addr.getGlobalValue()) {
951  Addr.setReg(getRegForValue(V));
952  return Addr.getReg() != 0;
953  }
954 
955  return false;
956 }
957 
958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
959  EVT evt = TLI.getValueType(DL, Ty, true);
960 
961  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
962  return false;
963 
964  // Only handle simple types.
965  if (evt == MVT::Other || !evt.isSimple())
966  return false;
967  VT = evt.getSimpleVT();
968 
969  // This is a legal type, but it's not something we handle in fast-isel.
970  if (VT == MVT::f128)
971  return false;
972 
973  // Handle all other legal types, i.e. a register that will directly hold this
974  // value.
975  return TLI.isTypeLegal(VT);
976 }
977 
978 /// Determine if the value type is supported by FastISel.
979 ///
980 /// FastISel for AArch64 can handle more value types than are legal. This adds
981 /// simple value type such as i1, i8, and i16.
982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
983  if (Ty->isVectorTy() && !IsVectorAllowed)
984  return false;
985 
986  if (isTypeLegal(Ty, VT))
987  return true;
988 
989  // If this is a type than can be sign or zero-extended to a basic operation
990  // go ahead and accept it now.
991  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
992  return true;
993 
994  return false;
995 }
996 
997 bool AArch64FastISel::isValueAvailable(const Value *V) const {
998  if (!isa<Instruction>(V))
999  return true;
1000 
1001  const auto *I = cast<Instruction>(V);
1002  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1003 }
1004 
1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1006  if (Subtarget->isTargetILP32())
1007  return false;
1008 
1009  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1010  if (!ScaleFactor)
1011  return false;
1012 
1013  bool ImmediateOffsetNeedsLowering = false;
1014  bool RegisterOffsetNeedsLowering = false;
1015  int64_t Offset = Addr.getOffset();
1016  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1017  ImmediateOffsetNeedsLowering = true;
1018  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1019  !isUInt<12>(Offset / ScaleFactor))
1020  ImmediateOffsetNeedsLowering = true;
1021 
1022  // Cannot encode an offset register and an immediate offset in the same
1023  // instruction. Fold the immediate offset into the load/store instruction and
1024  // emit an additional add to take care of the offset register.
1025  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1026  RegisterOffsetNeedsLowering = true;
1027 
1028  // Cannot encode zero register as base.
1029  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1030  RegisterOffsetNeedsLowering = true;
1031 
1032  // If this is a stack pointer and the offset needs to be simplified then put
1033  // the alloca address into a register, set the base type back to register and
1034  // continue. This should almost never happen.
1035  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1036  {
1037  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1039  ResultReg)
1040  .addFrameIndex(Addr.getFI())
1041  .addImm(0)
1042  .addImm(0);
1043  Addr.setKind(Address::RegBase);
1044  Addr.setReg(ResultReg);
1045  }
1046 
1047  if (RegisterOffsetNeedsLowering) {
1048  unsigned ResultReg = 0;
1049  if (Addr.getReg()) {
1050  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1051  Addr.getExtendType() == AArch64_AM::UXTW )
1052  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1053  Addr.getOffsetReg(), Addr.getExtendType(),
1054  Addr.getShift());
1055  else
1056  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1057  Addr.getOffsetReg(), AArch64_AM::LSL,
1058  Addr.getShift());
1059  } else {
1060  if (Addr.getExtendType() == AArch64_AM::UXTW)
1061  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1062  Addr.getShift(), /*IsZExt=*/true);
1063  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1064  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1065  Addr.getShift(), /*IsZExt=*/false);
1066  else
1067  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1068  Addr.getShift());
1069  }
1070  if (!ResultReg)
1071  return false;
1072 
1073  Addr.setReg(ResultReg);
1074  Addr.setOffsetReg(0);
1075  Addr.setShift(0);
1076  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1077  }
1078 
1079  // Since the offset is too large for the load/store instruction get the
1080  // reg+offset into a register.
1081  if (ImmediateOffsetNeedsLowering) {
1082  unsigned ResultReg;
1083  if (Addr.getReg())
1084  // Try to fold the immediate into the add instruction.
1085  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1086  else
1087  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1088 
1089  if (!ResultReg)
1090  return false;
1091  Addr.setReg(ResultReg);
1092  Addr.setOffset(0);
1093  }
1094  return true;
1095 }
1096 
1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1098  const MachineInstrBuilder &MIB,
1100  unsigned ScaleFactor,
1101  MachineMemOperand *MMO) {
1102  int64_t Offset = Addr.getOffset() / ScaleFactor;
1103  // Frame base works a bit differently. Handle it separately.
1104  if (Addr.isFIBase()) {
1105  int FI = Addr.getFI();
1106  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1107  // and alignment should be based on the VT.
1108  MMO = FuncInfo.MF->getMachineMemOperand(
1109  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1110  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1111  // Now add the rest of the operands.
1112  MIB.addFrameIndex(FI).addImm(Offset);
1113  } else {
1114  assert(Addr.isRegBase() && "Unexpected address kind.");
1115  const MCInstrDesc &II = MIB->getDesc();
1116  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1117  Addr.setReg(
1118  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1119  Addr.setOffsetReg(
1120  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1121  if (Addr.getOffsetReg()) {
1122  assert(Addr.getOffset() == 0 && "Unexpected offset");
1123  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1124  Addr.getExtendType() == AArch64_AM::SXTX;
1125  MIB.addReg(Addr.getReg());
1126  MIB.addReg(Addr.getOffsetReg());
1127  MIB.addImm(IsSigned);
1128  MIB.addImm(Addr.getShift() != 0);
1129  } else
1130  MIB.addReg(Addr.getReg()).addImm(Offset);
1131  }
1132 
1133  if (MMO)
1134  MIB.addMemOperand(MMO);
1135 }
1136 
1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1138  const Value *RHS, bool SetFlags,
1139  bool WantResult, bool IsZExt) {
1141  bool NeedExtend = false;
1142  switch (RetVT.SimpleTy) {
1143  default:
1144  return 0;
1145  case MVT::i1:
1146  NeedExtend = true;
1147  break;
1148  case MVT::i8:
1149  NeedExtend = true;
1150  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1151  break;
1152  case MVT::i16:
1153  NeedExtend = true;
1154  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1155  break;
1156  case MVT::i32: // fall-through
1157  case MVT::i64:
1158  break;
1159  }
1160  MVT SrcVT = RetVT;
1161  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1162 
1163  // Canonicalize immediates to the RHS first.
1164  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1165  std::swap(LHS, RHS);
1166 
1167  // Canonicalize mul by power of 2 to the RHS.
1168  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1169  if (isMulPowOf2(LHS))
1170  std::swap(LHS, RHS);
1171 
1172  // Canonicalize shift immediate to the RHS.
1173  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1174  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1175  if (isa<ConstantInt>(SI->getOperand(1)))
1176  if (SI->getOpcode() == Instruction::Shl ||
1177  SI->getOpcode() == Instruction::LShr ||
1178  SI->getOpcode() == Instruction::AShr )
1179  std::swap(LHS, RHS);
1180 
1181  unsigned LHSReg = getRegForValue(LHS);
1182  if (!LHSReg)
1183  return 0;
1184 
1185  if (NeedExtend)
1186  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1187 
1188  unsigned ResultReg = 0;
1189  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1190  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1191  if (C->isNegative())
1192  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1193  WantResult);
1194  else
1195  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1196  WantResult);
1197  } else if (const auto *C = dyn_cast<Constant>(RHS))
1198  if (C->isNullValue())
1199  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1200 
1201  if (ResultReg)
1202  return ResultReg;
1203 
1204  // Only extend the RHS within the instruction if there is a valid extend type.
1205  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1206  isValueAvailable(RHS)) {
1207  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1208  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1209  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1210  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1211  if (!RHSReg)
1212  return 0;
1213  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1214  C->getZExtValue(), SetFlags, WantResult);
1215  }
1216  unsigned RHSReg = getRegForValue(RHS);
1217  if (!RHSReg)
1218  return 0;
1219  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1220  SetFlags, WantResult);
1221  }
1222 
1223  // Check if the mul can be folded into the instruction.
1224  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1225  if (isMulPowOf2(RHS)) {
1226  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1227  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1228 
1229  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1230  if (C->getValue().isPowerOf2())
1231  std::swap(MulLHS, MulRHS);
1232 
1233  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1234  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1235  unsigned RHSReg = getRegForValue(MulLHS);
1236  if (!RHSReg)
1237  return 0;
1238  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1239  ShiftVal, SetFlags, WantResult);
1240  if (ResultReg)
1241  return ResultReg;
1242  }
1243  }
1244 
1245  // Check if the shift can be folded into the instruction.
1246  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1247  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1248  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1250  switch (SI->getOpcode()) {
1251  default: break;
1252  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1253  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1254  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1255  }
1256  uint64_t ShiftVal = C->getZExtValue();
1257  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1258  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1259  if (!RHSReg)
1260  return 0;
1261  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1262  ShiftVal, SetFlags, WantResult);
1263  if (ResultReg)
1264  return ResultReg;
1265  }
1266  }
1267  }
1268  }
1269 
1270  unsigned RHSReg = getRegForValue(RHS);
1271  if (!RHSReg)
1272  return 0;
1273 
1274  if (NeedExtend)
1275  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277  return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1278 }
1279 
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281  unsigned RHSReg, bool SetFlags,
1282  bool WantResult) {
1283  assert(LHSReg && RHSReg && "Invalid register number.");
1284 
1285  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287  return 0;
1288 
1289  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290  return 0;
1291 
1292  static const unsigned OpcTable[2][2][2] = {
1293  { { AArch64::SUBWrr, AArch64::SUBXrr },
1294  { AArch64::ADDWrr, AArch64::ADDXrr } },
1295  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1297  };
1298  bool Is64Bit = RetVT == MVT::i64;
1299  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300  const TargetRegisterClass *RC =
1301  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302  unsigned ResultReg;
1303  if (WantResult)
1304  ResultReg = createResultReg(RC);
1305  else
1306  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307 
1308  const MCInstrDesc &II = TII.get(Opc);
1309  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1312  .addReg(LHSReg)
1313  .addReg(RHSReg);
1314  return ResultReg;
1315 }
1316 
1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318  uint64_t Imm, bool SetFlags,
1319  bool WantResult) {
1320  assert(LHSReg && "Invalid register number.");
1321 
1322  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323  return 0;
1324 
1325  unsigned ShiftImm;
1326  if (isUInt<12>(Imm))
1327  ShiftImm = 0;
1328  else if ((Imm & 0xfff000) == Imm) {
1329  ShiftImm = 12;
1330  Imm >>= 12;
1331  } else
1332  return 0;
1333 
1334  static const unsigned OpcTable[2][2][2] = {
1335  { { AArch64::SUBWri, AArch64::SUBXri },
1336  { AArch64::ADDWri, AArch64::ADDXri } },
1337  { { AArch64::SUBSWri, AArch64::SUBSXri },
1338  { AArch64::ADDSWri, AArch64::ADDSXri } }
1339  };
1340  bool Is64Bit = RetVT == MVT::i64;
1341  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342  const TargetRegisterClass *RC;
1343  if (SetFlags)
1344  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345  else
1346  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347  unsigned ResultReg;
1348  if (WantResult)
1349  ResultReg = createResultReg(RC);
1350  else
1351  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352 
1353  const MCInstrDesc &II = TII.get(Opc);
1354  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1356  .addReg(LHSReg)
1357  .addImm(Imm)
1358  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359  return ResultReg;
1360 }
1361 
1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363  unsigned RHSReg,
1364  AArch64_AM::ShiftExtendType ShiftType,
1365  uint64_t ShiftImm, bool SetFlags,
1366  bool WantResult) {
1367  assert(LHSReg && RHSReg && "Invalid register number.");
1368  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1369  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 
1371  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372  return 0;
1373 
1374  // Don't deal with undefined shifts.
1375  if (ShiftImm >= RetVT.getSizeInBits())
1376  return 0;
1377 
1378  static const unsigned OpcTable[2][2][2] = {
1379  { { AArch64::SUBWrs, AArch64::SUBXrs },
1380  { AArch64::ADDWrs, AArch64::ADDXrs } },
1381  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1382  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1383  };
1384  bool Is64Bit = RetVT == MVT::i64;
1385  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1386  const TargetRegisterClass *RC =
1387  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388  unsigned ResultReg;
1389  if (WantResult)
1390  ResultReg = createResultReg(RC);
1391  else
1392  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 
1394  const MCInstrDesc &II = TII.get(Opc);
1395  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1396  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1397  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1398  .addReg(LHSReg)
1399  .addReg(RHSReg)
1400  .addImm(getShifterImm(ShiftType, ShiftImm));
1401  return ResultReg;
1402 }
1403 
1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1405  unsigned RHSReg,
1407  uint64_t ShiftImm, bool SetFlags,
1408  bool WantResult) {
1409  assert(LHSReg && RHSReg && "Invalid register number.");
1410  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414  return 0;
1415 
1416  if (ShiftImm >= 4)
1417  return 0;
1418 
1419  static const unsigned OpcTable[2][2][2] = {
1420  { { AArch64::SUBWrx, AArch64::SUBXrx },
1421  { AArch64::ADDWrx, AArch64::ADDXrx } },
1422  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1424  };
1425  bool Is64Bit = RetVT == MVT::i64;
1426  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427  const TargetRegisterClass *RC = nullptr;
1428  if (SetFlags)
1429  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430  else
1431  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432  unsigned ResultReg;
1433  if (WantResult)
1434  ResultReg = createResultReg(RC);
1435  else
1436  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438  const MCInstrDesc &II = TII.get(Opc);
1439  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442  .addReg(LHSReg)
1443  .addReg(RHSReg)
1444  .addImm(getArithExtendImm(ExtType, ShiftImm));
1445  return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449  Type *Ty = LHS->getType();
1450  EVT EVT = TLI.getValueType(DL, Ty, true);
1451  if (!EVT.isSimple())
1452  return false;
1453  MVT VT = EVT.getSimpleVT();
1454 
1455  switch (VT.SimpleTy) {
1456  default:
1457  return false;
1458  case MVT::i1:
1459  case MVT::i8:
1460  case MVT::i16:
1461  case MVT::i32:
1462  case MVT::i64:
1463  return emitICmp(VT, LHS, RHS, IsZExt);
1464  case MVT::f32:
1465  case MVT::f64:
1466  return emitFCmp(VT, LHS, RHS);
1467  }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471  bool IsZExt) {
1472  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1477  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1478  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1479 }
1480 
1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1482  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1483  return false;
1484 
1485  // Check to see if the 2nd operand is a constant that we can encode directly
1486  // in the compare.
1487  bool UseImm = false;
1488  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1489  if (CFP->isZero() && !CFP->isNegative())
1490  UseImm = true;
1491 
1492  unsigned LHSReg = getRegForValue(LHS);
1493  if (!LHSReg)
1494  return false;
1495 
1496  if (UseImm) {
1497  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1498  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1499  .addReg(LHSReg);
1500  return true;
1501  }
1502 
1503  unsigned RHSReg = getRegForValue(RHS);
1504  if (!RHSReg)
1505  return false;
1506 
1507  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1508  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1509  .addReg(LHSReg)
1510  .addReg(RHSReg);
1511  return true;
1512 }
1513 
1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1515  bool SetFlags, bool WantResult, bool IsZExt) {
1516  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1517  IsZExt);
1518 }
1519 
1520 /// This method is a wrapper to simplify add emission.
1521 ///
1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1523 /// that fails, then try to materialize the immediate into a register and use
1524 /// emitAddSub_rr instead.
1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1526  unsigned ResultReg;
1527  if (Imm < 0)
1528  ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1529  else
1530  ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1531 
1532  if (ResultReg)
1533  return ResultReg;
1534 
1535  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1536  if (!CReg)
1537  return 0;
1538 
1539  ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1540  return ResultReg;
1541 }
1542 
1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1544  bool SetFlags, bool WantResult, bool IsZExt) {
1545  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1546  IsZExt);
1547 }
1548 
1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1550  unsigned RHSReg, bool WantResult) {
1551  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1552  /*SetFlags=*/true, WantResult);
1553 }
1554 
1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1556  unsigned RHSReg,
1557  AArch64_AM::ShiftExtendType ShiftType,
1558  uint64_t ShiftImm, bool WantResult) {
1559  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1560  ShiftImm, /*SetFlags=*/true, WantResult);
1561 }
1562 
1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1564  const Value *LHS, const Value *RHS) {
1565  // Canonicalize immediates to the RHS first.
1566  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1567  std::swap(LHS, RHS);
1568 
1569  // Canonicalize mul by power-of-2 to the RHS.
1570  if (LHS->hasOneUse() && isValueAvailable(LHS))
1571  if (isMulPowOf2(LHS))
1572  std::swap(LHS, RHS);
1573 
1574  // Canonicalize shift immediate to the RHS.
1575  if (LHS->hasOneUse() && isValueAvailable(LHS))
1576  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1577  if (isa<ConstantInt>(SI->getOperand(1)))
1578  std::swap(LHS, RHS);
1579 
1580  unsigned LHSReg = getRegForValue(LHS);
1581  if (!LHSReg)
1582  return 0;
1583 
1584  unsigned ResultReg = 0;
1585  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1586  uint64_t Imm = C->getZExtValue();
1587  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1588  }
1589  if (ResultReg)
1590  return ResultReg;
1591 
1592  // Check if the mul can be folded into the instruction.
1593  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1594  if (isMulPowOf2(RHS)) {
1595  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1596  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1597 
1598  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1599  if (C->getValue().isPowerOf2())
1600  std::swap(MulLHS, MulRHS);
1601 
1602  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1603  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1604 
1605  unsigned RHSReg = getRegForValue(MulLHS);
1606  if (!RHSReg)
1607  return 0;
1608  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1609  if (ResultReg)
1610  return ResultReg;
1611  }
1612  }
1613 
1614  // Check if the shift can be folded into the instruction.
1615  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1616  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1617  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1618  uint64_t ShiftVal = C->getZExtValue();
1619  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1620  if (!RHSReg)
1621  return 0;
1622  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623  if (ResultReg)
1624  return ResultReg;
1625  }
1626  }
1627 
1628  unsigned RHSReg = getRegForValue(RHS);
1629  if (!RHSReg)
1630  return 0;
1631 
1632  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1633  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1634  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1635  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1636  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1637  }
1638  return ResultReg;
1639 }
1640 
1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1642  unsigned LHSReg, uint64_t Imm) {
1643  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1644  "ISD nodes are not consecutive!");
1645  static const unsigned OpcTable[3][2] = {
1646  { AArch64::ANDWri, AArch64::ANDXri },
1647  { AArch64::ORRWri, AArch64::ORRXri },
1648  { AArch64::EORWri, AArch64::EORXri }
1649  };
1650  const TargetRegisterClass *RC;
1651  unsigned Opc;
1652  unsigned RegSize;
1653  switch (RetVT.SimpleTy) {
1654  default:
1655  return 0;
1656  case MVT::i1:
1657  case MVT::i8:
1658  case MVT::i16:
1659  case MVT::i32: {
1660  unsigned Idx = ISDOpc - ISD::AND;
1661  Opc = OpcTable[Idx][0];
1662  RC = &AArch64::GPR32spRegClass;
1663  RegSize = 32;
1664  break;
1665  }
1666  case MVT::i64:
1667  Opc = OpcTable[ISDOpc - ISD::AND][1];
1668  RC = &AArch64::GPR64spRegClass;
1669  RegSize = 64;
1670  break;
1671  }
1672 
1673  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1674  return 0;
1675 
1676  unsigned ResultReg =
1677  fastEmitInst_ri(Opc, RC, LHSReg,
1678  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1679  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1680  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1682  }
1683  return ResultReg;
1684 }
1685 
1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1687  unsigned LHSReg, unsigned RHSReg,
1688  uint64_t ShiftImm) {
1689  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690  "ISD nodes are not consecutive!");
1691  static const unsigned OpcTable[3][2] = {
1692  { AArch64::ANDWrs, AArch64::ANDXrs },
1693  { AArch64::ORRWrs, AArch64::ORRXrs },
1694  { AArch64::EORWrs, AArch64::EORXrs }
1695  };
1696 
1697  // Don't deal with undefined shifts.
1698  if (ShiftImm >= RetVT.getSizeInBits())
1699  return 0;
1700 
1701  const TargetRegisterClass *RC;
1702  unsigned Opc;
1703  switch (RetVT.SimpleTy) {
1704  default:
1705  return 0;
1706  case MVT::i1:
1707  case MVT::i8:
1708  case MVT::i16:
1709  case MVT::i32:
1710  Opc = OpcTable[ISDOpc - ISD::AND][0];
1711  RC = &AArch64::GPR32RegClass;
1712  break;
1713  case MVT::i64:
1714  Opc = OpcTable[ISDOpc - ISD::AND][1];
1715  RC = &AArch64::GPR64RegClass;
1716  break;
1717  }
1718  unsigned ResultReg =
1719  fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1721  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1722  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1723  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1724  }
1725  return ResultReg;
1726 }
1727 
1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1729  uint64_t Imm) {
1730  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1731 }
1732 
1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1734  bool WantZExt, MachineMemOperand *MMO) {
1735  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1736  return 0;
1737 
1738  // Simplify this down to something we can handle.
1739  if (!simplifyAddress(Addr, VT))
1740  return 0;
1741 
1742  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1743  if (!ScaleFactor)
1744  llvm_unreachable("Unexpected value type.");
1745 
1746  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1747  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1748  bool UseScaled = true;
1749  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1750  UseScaled = false;
1751  ScaleFactor = 1;
1752  }
1753 
1754  static const unsigned GPOpcTable[2][8][4] = {
1755  // Sign-extend.
1756  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1757  AArch64::LDURXi },
1758  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1759  AArch64::LDURXi },
1760  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1761  AArch64::LDRXui },
1762  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1763  AArch64::LDRXui },
1764  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1765  AArch64::LDRXroX },
1766  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1767  AArch64::LDRXroX },
1768  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1769  AArch64::LDRXroW },
1770  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1771  AArch64::LDRXroW }
1772  },
1773  // Zero-extend.
1774  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1775  AArch64::LDURXi },
1776  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1777  AArch64::LDURXi },
1778  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1779  AArch64::LDRXui },
1780  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1781  AArch64::LDRXui },
1782  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1783  AArch64::LDRXroX },
1784  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1785  AArch64::LDRXroX },
1786  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1787  AArch64::LDRXroW },
1788  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1789  AArch64::LDRXroW }
1790  }
1791  };
1792 
1793  static const unsigned FPOpcTable[4][2] = {
1794  { AArch64::LDURSi, AArch64::LDURDi },
1795  { AArch64::LDRSui, AArch64::LDRDui },
1796  { AArch64::LDRSroX, AArch64::LDRDroX },
1797  { AArch64::LDRSroW, AArch64::LDRDroW }
1798  };
1799 
1800  unsigned Opc;
1801  const TargetRegisterClass *RC;
1802  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1803  Addr.getOffsetReg();
1804  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1805  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1806  Addr.getExtendType() == AArch64_AM::SXTW)
1807  Idx++;
1808 
1809  bool IsRet64Bit = RetVT == MVT::i64;
1810  switch (VT.SimpleTy) {
1811  default:
1812  llvm_unreachable("Unexpected value type.");
1813  case MVT::i1: // Intentional fall-through.
1814  case MVT::i8:
1815  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1816  RC = (IsRet64Bit && !WantZExt) ?
1817  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1818  break;
1819  case MVT::i16:
1820  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1821  RC = (IsRet64Bit && !WantZExt) ?
1822  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1823  break;
1824  case MVT::i32:
1825  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1826  RC = (IsRet64Bit && !WantZExt) ?
1827  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1828  break;
1829  case MVT::i64:
1830  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1831  RC = &AArch64::GPR64RegClass;
1832  break;
1833  case MVT::f32:
1834  Opc = FPOpcTable[Idx][0];
1835  RC = &AArch64::FPR32RegClass;
1836  break;
1837  case MVT::f64:
1838  Opc = FPOpcTable[Idx][1];
1839  RC = &AArch64::FPR64RegClass;
1840  break;
1841  }
1842 
1843  // Create the base instruction, then add the operands.
1844  unsigned ResultReg = createResultReg(RC);
1845  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1846  TII.get(Opc), ResultReg);
1847  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1848 
1849  // Loading an i1 requires special handling.
1850  if (VT == MVT::i1) {
1851  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1852  assert(ANDReg && "Unexpected AND instruction emission failure.");
1853  ResultReg = ANDReg;
1854  }
1855 
1856  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1857  // the 32bit reg to a 64bit reg.
1858  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1859  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1860  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1862  .addImm(0)
1863  .addReg(ResultReg, getKillRegState(true))
1864  .addImm(AArch64::sub_32);
1865  ResultReg = Reg64;
1866  }
1867  return ResultReg;
1868 }
1869 
1870 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1871  MVT VT;
1872  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1873  return false;
1874 
1875  if (VT.isVector())
1876  return selectOperator(I, I->getOpcode());
1877 
1878  unsigned ResultReg;
1879  switch (I->getOpcode()) {
1880  default:
1881  llvm_unreachable("Unexpected instruction.");
1882  case Instruction::Add:
1883  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1884  break;
1885  case Instruction::Sub:
1886  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1887  break;
1888  }
1889  if (!ResultReg)
1890  return false;
1891 
1892  updateValueMap(I, ResultReg);
1893  return true;
1894 }
1895 
1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1897  MVT VT;
1898  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1899  return false;
1900 
1901  if (VT.isVector())
1902  return selectOperator(I, I->getOpcode());
1903 
1904  unsigned ResultReg;
1905  switch (I->getOpcode()) {
1906  default:
1907  llvm_unreachable("Unexpected instruction.");
1908  case Instruction::And:
1909  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1910  break;
1911  case Instruction::Or:
1912  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1913  break;
1914  case Instruction::Xor:
1915  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1916  break;
1917  }
1918  if (!ResultReg)
1919  return false;
1920 
1921  updateValueMap(I, ResultReg);
1922  return true;
1923 }
1924 
1925 bool AArch64FastISel::selectLoad(const Instruction *I) {
1926  MVT VT;
1927  // Verify we have a legal type before going any further. Currently, we handle
1928  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1929  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1930  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1931  cast<LoadInst>(I)->isAtomic())
1932  return false;
1933 
1934  const Value *SV = I->getOperand(0);
1935  if (TLI.supportSwiftError()) {
1936  // Swifterror values can come from either a function parameter with
1937  // swifterror attribute or an alloca with swifterror attribute.
1938  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1939  if (Arg->hasSwiftErrorAttr())
1940  return false;
1941  }
1942 
1943  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1944  if (Alloca->isSwiftError())
1945  return false;
1946  }
1947  }
1948 
1949  // See if we can handle this address.
1950  Address Addr;
1951  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1952  return false;
1953 
1954  // Fold the following sign-/zero-extend into the load instruction.
1955  bool WantZExt = true;
1956  MVT RetVT = VT;
1957  const Value *IntExtVal = nullptr;
1958  if (I->hasOneUse()) {
1959  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1960  if (isTypeSupported(ZE->getType(), RetVT))
1961  IntExtVal = ZE;
1962  else
1963  RetVT = VT;
1964  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1965  if (isTypeSupported(SE->getType(), RetVT))
1966  IntExtVal = SE;
1967  else
1968  RetVT = VT;
1969  WantZExt = false;
1970  }
1971  }
1972 
1973  unsigned ResultReg =
1974  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1975  if (!ResultReg)
1976  return false;
1977 
1978  // There are a few different cases we have to handle, because the load or the
1979  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1980  // SelectionDAG. There is also an ordering issue when both instructions are in
1981  // different basic blocks.
1982  // 1.) The load instruction is selected by FastISel, but the integer extend
1983  // not. This usually happens when the integer extend is in a different
1984  // basic block and SelectionDAG took over for that basic block.
1985  // 2.) The load instruction is selected before the integer extend. This only
1986  // happens when the integer extend is in a different basic block.
1987  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1988  // by FastISel. This happens if there are instructions between the load
1989  // and the integer extend that couldn't be selected by FastISel.
1990  if (IntExtVal) {
1991  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1992  // could select it. Emit a copy to subreg if necessary. FastISel will remove
1993  // it when it selects the integer extend.
1994  unsigned Reg = lookUpRegForValue(IntExtVal);
1995  auto *MI = MRI.getUniqueVRegDef(Reg);
1996  if (!MI) {
1997  if (RetVT == MVT::i64 && VT <= MVT::i32) {
1998  if (WantZExt) {
1999  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2000  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2001  ResultReg = std::prev(I)->getOperand(0).getReg();
2002  removeDeadCode(I, std::next(I));
2003  } else
2004  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2005  AArch64::sub_32);
2006  }
2007  updateValueMap(I, ResultReg);
2008  return true;
2009  }
2010 
2011  // The integer extend has already been emitted - delete all the instructions
2012  // that have been emitted by the integer extend lowering code and use the
2013  // result from the load instruction directly.
2014  while (MI) {
2015  Reg = 0;
2016  for (auto &Opnd : MI->uses()) {
2017  if (Opnd.isReg()) {
2018  Reg = Opnd.getReg();
2019  break;
2020  }
2021  }
2023  removeDeadCode(I, std::next(I));
2024  MI = nullptr;
2025  if (Reg)
2027  }
2028  updateValueMap(IntExtVal, ResultReg);
2029  return true;
2030  }
2031 
2032  updateValueMap(I, ResultReg);
2033  return true;
2034 }
2035 
2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2037  unsigned AddrReg,
2038  MachineMemOperand *MMO) {
2039  unsigned Opc;
2040  switch (VT.SimpleTy) {
2041  default: return false;
2042  case MVT::i8: Opc = AArch64::STLRB; break;
2043  case MVT::i16: Opc = AArch64::STLRH; break;
2044  case MVT::i32: Opc = AArch64::STLRW; break;
2045  case MVT::i64: Opc = AArch64::STLRX; break;
2046  }
2047 
2048  const MCInstrDesc &II = TII.get(Opc);
2049  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2050  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2051  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2052  .addReg(SrcReg)
2053  .addReg(AddrReg)
2054  .addMemOperand(MMO);
2055  return true;
2056 }
2057 
2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2059  MachineMemOperand *MMO) {
2060  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2061  return false;
2062 
2063  // Simplify this down to something we can handle.
2064  if (!simplifyAddress(Addr, VT))
2065  return false;
2066 
2067  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2068  if (!ScaleFactor)
2069  llvm_unreachable("Unexpected value type.");
2070 
2071  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2072  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2073  bool UseScaled = true;
2074  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2075  UseScaled = false;
2076  ScaleFactor = 1;
2077  }
2078 
2079  static const unsigned OpcTable[4][6] = {
2080  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2081  AArch64::STURSi, AArch64::STURDi },
2082  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2083  AArch64::STRSui, AArch64::STRDui },
2084  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2085  AArch64::STRSroX, AArch64::STRDroX },
2086  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2087  AArch64::STRSroW, AArch64::STRDroW }
2088  };
2089 
2090  unsigned Opc;
2091  bool VTIsi1 = false;
2092  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2093  Addr.getOffsetReg();
2094  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2095  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2096  Addr.getExtendType() == AArch64_AM::SXTW)
2097  Idx++;
2098 
2099  switch (VT.SimpleTy) {
2100  default: llvm_unreachable("Unexpected value type.");
2101  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2102  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2103  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2104  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2105  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2106  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2107  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2108  }
2109 
2110  // Storing an i1 requires special handling.
2111  if (VTIsi1 && SrcReg != AArch64::WZR) {
2112  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2113  assert(ANDReg && "Unexpected AND instruction emission failure.");
2114  SrcReg = ANDReg;
2115  }
2116  // Create the base instruction, then add the operands.
2117  const MCInstrDesc &II = TII.get(Opc);
2118  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2119  MachineInstrBuilder MIB =
2120  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2121  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2122 
2123  return true;
2124 }
2125 
2126 bool AArch64FastISel::selectStore(const Instruction *I) {
2127  MVT VT;
2128  const Value *Op0 = I->getOperand(0);
2129  // Verify we have a legal type before going any further. Currently, we handle
2130  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2131  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2132  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2133  return false;
2134 
2135  const Value *PtrV = I->getOperand(1);
2136  if (TLI.supportSwiftError()) {
2137  // Swifterror values can come from either a function parameter with
2138  // swifterror attribute or an alloca with swifterror attribute.
2139  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2140  if (Arg->hasSwiftErrorAttr())
2141  return false;
2142  }
2143 
2144  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2145  if (Alloca->isSwiftError())
2146  return false;
2147  }
2148  }
2149 
2150  // Get the value to be stored into a register. Use the zero register directly
2151  // when possible to avoid an unnecessary copy and a wasted register.
2152  unsigned SrcReg = 0;
2153  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2154  if (CI->isZero())
2155  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2156  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2157  if (CF->isZero() && !CF->isNegative()) {
2158  VT = MVT::getIntegerVT(VT.getSizeInBits());
2159  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2160  }
2161  }
2162 
2163  if (!SrcReg)
2164  SrcReg = getRegForValue(Op0);
2165 
2166  if (!SrcReg)
2167  return false;
2168 
2169  auto *SI = cast<StoreInst>(I);
2170 
2171  // Try to emit a STLR for seq_cst/release.
2172  if (SI->isAtomic()) {
2173  AtomicOrdering Ord = SI->getOrdering();
2174  // The non-atomic instructions are sufficient for relaxed stores.
2175  if (isReleaseOrStronger(Ord)) {
2176  // The STLR addressing mode only supports a base reg; pass that directly.
2177  unsigned AddrReg = getRegForValue(PtrV);
2178  return emitStoreRelease(VT, SrcReg, AddrReg,
2179  createMachineMemOperandFor(I));
2180  }
2181  }
2182 
2183  // See if we can handle this address.
2184  Address Addr;
2185  if (!computeAddress(PtrV, Addr, Op0->getType()))
2186  return false;
2187 
2188  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2189  return false;
2190  return true;
2191 }
2192 
2194  switch (Pred) {
2195  case CmpInst::FCMP_ONE:
2196  case CmpInst::FCMP_UEQ:
2197  default:
2198  // AL is our "false" for now. The other two need more compares.
2199  return AArch64CC::AL;
2200  case CmpInst::ICMP_EQ:
2201  case CmpInst::FCMP_OEQ:
2202  return AArch64CC::EQ;
2203  case CmpInst::ICMP_SGT:
2204  case CmpInst::FCMP_OGT:
2205  return AArch64CC::GT;
2206  case CmpInst::ICMP_SGE:
2207  case CmpInst::FCMP_OGE:
2208  return AArch64CC::GE;
2209  case CmpInst::ICMP_UGT:
2210  case CmpInst::FCMP_UGT:
2211  return AArch64CC::HI;
2212  case CmpInst::FCMP_OLT:
2213  return AArch64CC::MI;
2214  case CmpInst::ICMP_ULE:
2215  case CmpInst::FCMP_OLE:
2216  return AArch64CC::LS;
2217  case CmpInst::FCMP_ORD:
2218  return AArch64CC::VC;
2219  case CmpInst::FCMP_UNO:
2220  return AArch64CC::VS;
2221  case CmpInst::FCMP_UGE:
2222  return AArch64CC::PL;
2223  case CmpInst::ICMP_SLT:
2224  case CmpInst::FCMP_ULT:
2225  return AArch64CC::LT;
2226  case CmpInst::ICMP_SLE:
2227  case CmpInst::FCMP_ULE:
2228  return AArch64CC::LE;
2229  case CmpInst::FCMP_UNE:
2230  case CmpInst::ICMP_NE:
2231  return AArch64CC::NE;
2232  case CmpInst::ICMP_UGE:
2233  return AArch64CC::HS;
2234  case CmpInst::ICMP_ULT:
2235  return AArch64CC::LO;
2236  }
2237 }
2238 
2239 /// Try to emit a combined compare-and-branch instruction.
2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2241  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2242  // will not be produced, as they are conditional branch instructions that do
2243  // not set flags.
2244  if (FuncInfo.MF->getFunction().hasFnAttribute(
2245  Attribute::SpeculativeLoadHardening))
2246  return false;
2247 
2248  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2249  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2250  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2251 
2252  const Value *LHS = CI->getOperand(0);
2253  const Value *RHS = CI->getOperand(1);
2254 
2255  MVT VT;
2256  if (!isTypeSupported(LHS->getType(), VT))
2257  return false;
2258 
2259  unsigned BW = VT.getSizeInBits();
2260  if (BW > 64)
2261  return false;
2262 
2263  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2264  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2265 
2266  // Try to take advantage of fallthrough opportunities.
2267  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2268  std::swap(TBB, FBB);
2270  }
2271 
2272  int TestBit = -1;
2273  bool IsCmpNE;
2274  switch (Predicate) {
2275  default:
2276  return false;
2277  case CmpInst::ICMP_EQ:
2278  case CmpInst::ICMP_NE:
2279  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2280  std::swap(LHS, RHS);
2281 
2282  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2283  return false;
2284 
2285  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2286  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2287  const Value *AndLHS = AI->getOperand(0);
2288  const Value *AndRHS = AI->getOperand(1);
2289 
2290  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2291  if (C->getValue().isPowerOf2())
2292  std::swap(AndLHS, AndRHS);
2293 
2294  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2295  if (C->getValue().isPowerOf2()) {
2296  TestBit = C->getValue().logBase2();
2297  LHS = AndLHS;
2298  }
2299  }
2300 
2301  if (VT == MVT::i1)
2302  TestBit = 0;
2303 
2304  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2305  break;
2306  case CmpInst::ICMP_SLT:
2307  case CmpInst::ICMP_SGE:
2308  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2309  return false;
2310 
2311  TestBit = BW - 1;
2312  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2313  break;
2314  case CmpInst::ICMP_SGT:
2315  case CmpInst::ICMP_SLE:
2316  if (!isa<ConstantInt>(RHS))
2317  return false;
2318 
2319  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2320  return false;
2321 
2322  TestBit = BW - 1;
2323  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2324  break;
2325  } // end switch
2326 
2327  static const unsigned OpcTable[2][2][2] = {
2328  { {AArch64::CBZW, AArch64::CBZX },
2329  {AArch64::CBNZW, AArch64::CBNZX} },
2330  { {AArch64::TBZW, AArch64::TBZX },
2331  {AArch64::TBNZW, AArch64::TBNZX} }
2332  };
2333 
2334  bool IsBitTest = TestBit != -1;
2335  bool Is64Bit = BW == 64;
2336  if (TestBit < 32 && TestBit >= 0)
2337  Is64Bit = false;
2338 
2339  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2340  const MCInstrDesc &II = TII.get(Opc);
2341 
2342  unsigned SrcReg = getRegForValue(LHS);
2343  if (!SrcReg)
2344  return false;
2345 
2346  if (BW == 64 && !Is64Bit)
2347  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2348 
2349  if ((BW < 32) && !IsBitTest)
2350  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2351 
2352  // Emit the combined compare and branch instruction.
2353  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2354  MachineInstrBuilder MIB =
2355  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2356  .addReg(SrcReg);
2357  if (IsBitTest)
2358  MIB.addImm(TestBit);
2359  MIB.addMBB(TBB);
2360 
2361  finishCondBranch(BI->getParent(), TBB, FBB);
2362  return true;
2363 }
2364 
2365 bool AArch64FastISel::selectBranch(const Instruction *I) {
2366  const BranchInst *BI = cast<BranchInst>(I);
2367  if (BI->isUnconditional()) {
2368  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2369  fastEmitBranch(MSucc, BI->getDebugLoc());
2370  return true;
2371  }
2372 
2373  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2374  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2375 
2376  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2377  if (CI->hasOneUse() && isValueAvailable(CI)) {
2378  // Try to optimize or fold the cmp.
2379  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2380  switch (Predicate) {
2381  default:
2382  break;
2383  case CmpInst::FCMP_FALSE:
2384  fastEmitBranch(FBB, DbgLoc);
2385  return true;
2386  case CmpInst::FCMP_TRUE:
2387  fastEmitBranch(TBB, DbgLoc);
2388  return true;
2389  }
2390 
2391  // Try to emit a combined compare-and-branch first.
2392  if (emitCompareAndBranch(BI))
2393  return true;
2394 
2395  // Try to take advantage of fallthrough opportunities.
2396  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2397  std::swap(TBB, FBB);
2399  }
2400 
2401  // Emit the cmp.
2402  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2403  return false;
2404 
2405  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2406  // instruction.
2409  switch (Predicate) {
2410  default:
2411  break;
2412  case CmpInst::FCMP_UEQ:
2413  ExtraCC = AArch64CC::EQ;
2414  CC = AArch64CC::VS;
2415  break;
2416  case CmpInst::FCMP_ONE:
2417  ExtraCC = AArch64CC::MI;
2418  CC = AArch64CC::GT;
2419  break;
2420  }
2421  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2422 
2423  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2424  if (ExtraCC != AArch64CC::AL) {
2425  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2426  .addImm(ExtraCC)
2427  .addMBB(TBB);
2428  }
2429 
2430  // Emit the branch.
2431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2432  .addImm(CC)
2433  .addMBB(TBB);
2434 
2435  finishCondBranch(BI->getParent(), TBB, FBB);
2436  return true;
2437  }
2438  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2439  uint64_t Imm = CI->getZExtValue();
2440  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2442  .addMBB(Target);
2443 
2444  // Obtain the branch probability and add the target to the successor list.
2445  if (FuncInfo.BPI) {
2446  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2447  BI->getParent(), Target->getBasicBlock());
2448  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2449  } else
2450  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2451  return true;
2452  } else {
2454  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2455  // Fake request the condition, otherwise the intrinsic might be completely
2456  // optimized away.
2457  unsigned CondReg = getRegForValue(BI->getCondition());
2458  if (!CondReg)
2459  return false;
2460 
2461  // Emit the branch.
2462  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2463  .addImm(CC)
2464  .addMBB(TBB);
2465 
2466  finishCondBranch(BI->getParent(), TBB, FBB);
2467  return true;
2468  }
2469  }
2470 
2471  unsigned CondReg = getRegForValue(BI->getCondition());
2472  if (CondReg == 0)
2473  return false;
2474 
2475  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2476  unsigned Opcode = AArch64::TBNZW;
2477  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2478  std::swap(TBB, FBB);
2479  Opcode = AArch64::TBZW;
2480  }
2481 
2482  const MCInstrDesc &II = TII.get(Opcode);
2483  unsigned ConstrainedCondReg
2484  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2486  .addReg(ConstrainedCondReg)
2487  .addImm(0)
2488  .addMBB(TBB);
2489 
2490  finishCondBranch(BI->getParent(), TBB, FBB);
2491  return true;
2492 }
2493 
2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2495  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2496  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2497  if (AddrReg == 0)
2498  return false;
2499 
2500  // Emit the indirect branch.
2501  const MCInstrDesc &II = TII.get(AArch64::BR);
2502  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2504 
2505  // Make sure the CFG is up-to-date.
2506  for (auto *Succ : BI->successors())
2507  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2508 
2509  return true;
2510 }
2511 
2512 bool AArch64FastISel::selectCmp(const Instruction *I) {
2513  const CmpInst *CI = cast<CmpInst>(I);
2514 
2515  // Vectors of i1 are weird: bail out.
2516  if (CI->getType()->isVectorTy())
2517  return false;
2518 
2519  // Try to optimize or fold the cmp.
2520  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2521  unsigned ResultReg = 0;
2522  switch (Predicate) {
2523  default:
2524  break;
2525  case CmpInst::FCMP_FALSE:
2526  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2527  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2528  TII.get(TargetOpcode::COPY), ResultReg)
2529  .addReg(AArch64::WZR, getKillRegState(true));
2530  break;
2531  case CmpInst::FCMP_TRUE:
2532  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2533  break;
2534  }
2535 
2536  if (ResultReg) {
2537  updateValueMap(I, ResultReg);
2538  return true;
2539  }
2540 
2541  // Emit the cmp.
2542  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2543  return false;
2544 
2545  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 
2547  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2548  // condition codes are inverted, because they are used by CSINC.
2549  static unsigned CondCodeTable[2][2] = {
2552  };
2553  unsigned *CondCodes = nullptr;
2554  switch (Predicate) {
2555  default:
2556  break;
2557  case CmpInst::FCMP_UEQ:
2558  CondCodes = &CondCodeTable[0][0];
2559  break;
2560  case CmpInst::FCMP_ONE:
2561  CondCodes = &CondCodeTable[1][0];
2562  break;
2563  }
2564 
2565  if (CondCodes) {
2566  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2568  TmpReg1)
2569  .addReg(AArch64::WZR, getKillRegState(true))
2570  .addReg(AArch64::WZR, getKillRegState(true))
2571  .addImm(CondCodes[0]);
2572  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2573  ResultReg)
2574  .addReg(TmpReg1, getKillRegState(true))
2575  .addReg(AArch64::WZR, getKillRegState(true))
2576  .addImm(CondCodes[1]);
2577 
2578  updateValueMap(I, ResultReg);
2579  return true;
2580  }
2581 
2582  // Now set a register based on the comparison.
2584  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2585  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  ResultReg)
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(invertedCC);
2591 
2592  updateValueMap(I, ResultReg);
2593  return true;
2594 }
2595 
2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2597 /// value.
2599  if (!SI->getType()->isIntegerTy(1))
2600  return false;
2601 
2602  const Value *Src1Val, *Src2Val;
2603  unsigned Opc = 0;
2604  bool NeedExtraOp = false;
2605  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2606  if (CI->isOne()) {
2607  Src1Val = SI->getCondition();
2608  Src2Val = SI->getFalseValue();
2609  Opc = AArch64::ORRWrr;
2610  } else {
2611  assert(CI->isZero());
2612  Src1Val = SI->getFalseValue();
2613  Src2Val = SI->getCondition();
2614  Opc = AArch64::BICWrr;
2615  }
2616  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2617  if (CI->isOne()) {
2618  Src1Val = SI->getCondition();
2619  Src2Val = SI->getTrueValue();
2620  Opc = AArch64::ORRWrr;
2621  NeedExtraOp = true;
2622  } else {
2623  assert(CI->isZero());
2624  Src1Val = SI->getCondition();
2625  Src2Val = SI->getTrueValue();
2626  Opc = AArch64::ANDWrr;
2627  }
2628  }
2629 
2630  if (!Opc)
2631  return false;
2632 
2633  unsigned Src1Reg = getRegForValue(Src1Val);
2634  if (!Src1Reg)
2635  return false;
2636 
2637  unsigned Src2Reg = getRegForValue(Src2Val);
2638  if (!Src2Reg)
2639  return false;
2640 
2641  if (NeedExtraOp)
2642  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2643 
2644  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2645  Src2Reg);
2646  updateValueMap(SI, ResultReg);
2647  return true;
2648 }
2649 
2650 bool AArch64FastISel::selectSelect(const Instruction *I) {
2651  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2652  MVT VT;
2653  if (!isTypeSupported(I->getType(), VT))
2654  return false;
2655 
2656  unsigned Opc;
2657  const TargetRegisterClass *RC;
2658  switch (VT.SimpleTy) {
2659  default:
2660  return false;
2661  case MVT::i1:
2662  case MVT::i8:
2663  case MVT::i16:
2664  case MVT::i32:
2665  Opc = AArch64::CSELWr;
2666  RC = &AArch64::GPR32RegClass;
2667  break;
2668  case MVT::i64:
2669  Opc = AArch64::CSELXr;
2670  RC = &AArch64::GPR64RegClass;
2671  break;
2672  case MVT::f32:
2673  Opc = AArch64::FCSELSrrr;
2674  RC = &AArch64::FPR32RegClass;
2675  break;
2676  case MVT::f64:
2677  Opc = AArch64::FCSELDrrr;
2678  RC = &AArch64::FPR64RegClass;
2679  break;
2680  }
2681 
2682  const SelectInst *SI = cast<SelectInst>(I);
2683  const Value *Cond = SI->getCondition();
2686 
2687  if (optimizeSelect(SI))
2688  return true;
2689 
2690  // Try to pickup the flags, so we don't have to emit another compare.
2691  if (foldXALUIntrinsic(CC, I, Cond)) {
2692  // Fake request the condition to force emission of the XALU intrinsic.
2693  unsigned CondReg = getRegForValue(Cond);
2694  if (!CondReg)
2695  return false;
2696  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2697  isValueAvailable(Cond)) {
2698  const auto *Cmp = cast<CmpInst>(Cond);
2699  // Try to optimize or fold the cmp.
2700  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2701  const Value *FoldSelect = nullptr;
2702  switch (Predicate) {
2703  default:
2704  break;
2705  case CmpInst::FCMP_FALSE:
2706  FoldSelect = SI->getFalseValue();
2707  break;
2708  case CmpInst::FCMP_TRUE:
2709  FoldSelect = SI->getTrueValue();
2710  break;
2711  }
2712 
2713  if (FoldSelect) {
2714  unsigned SrcReg = getRegForValue(FoldSelect);
2715  if (!SrcReg)
2716  return false;
2717 
2718  updateValueMap(I, SrcReg);
2719  return true;
2720  }
2721 
2722  // Emit the cmp.
2723  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2724  return false;
2725 
2726  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2727  CC = getCompareCC(Predicate);
2728  switch (Predicate) {
2729  default:
2730  break;
2731  case CmpInst::FCMP_UEQ:
2732  ExtraCC = AArch64CC::EQ;
2733  CC = AArch64CC::VS;
2734  break;
2735  case CmpInst::FCMP_ONE:
2736  ExtraCC = AArch64CC::MI;
2737  CC = AArch64CC::GT;
2738  break;
2739  }
2740  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2741  } else {
2742  unsigned CondReg = getRegForValue(Cond);
2743  if (!CondReg)
2744  return false;
2745 
2746  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2747  CondReg = constrainOperandRegClass(II, CondReg, 1);
2748 
2749  // Emit a TST instruction (ANDS wzr, reg, #imm).
2750  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2751  AArch64::WZR)
2752  .addReg(CondReg)
2754  }
2755 
2756  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2757  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2758 
2759  if (!Src1Reg || !Src2Reg)
2760  return false;
2761 
2762  if (ExtraCC != AArch64CC::AL)
2763  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2764 
2765  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2766  updateValueMap(I, ResultReg);
2767  return true;
2768 }
2769 
2770 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2771  Value *V = I->getOperand(0);
2772  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2773  return false;
2774 
2775  unsigned Op = getRegForValue(V);
2776  if (Op == 0)
2777  return false;
2778 
2779  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2781  ResultReg).addReg(Op);
2782  updateValueMap(I, ResultReg);
2783  return true;
2784 }
2785 
2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2787  Value *V = I->getOperand(0);
2788  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2789  return false;
2790 
2791  unsigned Op = getRegForValue(V);
2792  if (Op == 0)
2793  return false;
2794 
2795  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2796  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2797  ResultReg).addReg(Op);
2798  updateValueMap(I, ResultReg);
2799  return true;
2800 }
2801 
2802 // FPToUI and FPToSI
2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2804  MVT DestVT;
2805  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2806  return false;
2807 
2808  unsigned SrcReg = getRegForValue(I->getOperand(0));
2809  if (SrcReg == 0)
2810  return false;
2811 
2812  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2813  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2814  return false;
2815 
2816  unsigned Opc;
2817  if (SrcVT == MVT::f64) {
2818  if (Signed)
2819  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2820  else
2821  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2822  } else {
2823  if (Signed)
2824  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2825  else
2826  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2827  }
2828  unsigned ResultReg = createResultReg(
2829  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2831  .addReg(SrcReg);
2832  updateValueMap(I, ResultReg);
2833  return true;
2834 }
2835 
2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2837  MVT DestVT;
2838  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2839  return false;
2840  // Let regular ISEL handle FP16
2841  if (DestVT == MVT::f16)
2842  return false;
2843 
2844  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2845  "Unexpected value type.");
2846 
2847  unsigned SrcReg = getRegForValue(I->getOperand(0));
2848  if (!SrcReg)
2849  return false;
2850 
2851  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2852 
2853  // Handle sign-extension.
2854  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2855  SrcReg =
2856  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2857  if (!SrcReg)
2858  return false;
2859  }
2860 
2861  unsigned Opc;
2862  if (SrcVT == MVT::i64) {
2863  if (Signed)
2864  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2865  else
2866  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2867  } else {
2868  if (Signed)
2869  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2870  else
2871  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2872  }
2873 
2874  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2875  updateValueMap(I, ResultReg);
2876  return true;
2877 }
2878 
2879 bool AArch64FastISel::fastLowerArguments() {
2880  if (!FuncInfo.CanLowerReturn)
2881  return false;
2882 
2883  const Function *F = FuncInfo.Fn;
2884  if (F->isVarArg())
2885  return false;
2886 
2887  CallingConv::ID CC = F->getCallingConv();
2888  if (CC != CallingConv::C && CC != CallingConv::Swift)
2889  return false;
2890 
2891  if (Subtarget->hasCustomCallingConv())
2892  return false;
2893 
2894  // Only handle simple cases of up to 8 GPR and FPR each.
2895  unsigned GPRCnt = 0;
2896  unsigned FPRCnt = 0;
2897  for (auto const &Arg : F->args()) {
2898  if (Arg.hasAttribute(Attribute::ByVal) ||
2899  Arg.hasAttribute(Attribute::InReg) ||
2900  Arg.hasAttribute(Attribute::StructRet) ||
2901  Arg.hasAttribute(Attribute::SwiftSelf) ||
2902  Arg.hasAttribute(Attribute::SwiftError) ||
2903  Arg.hasAttribute(Attribute::Nest))
2904  return false;
2905 
2906  Type *ArgTy = Arg.getType();
2907  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2908  return false;
2909 
2910  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2911  if (!ArgVT.isSimple())
2912  return false;
2913 
2914  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2915  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2916  return false;
2917 
2918  if (VT.isVector() &&
2919  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2920  return false;
2921 
2922  if (VT >= MVT::i1 && VT <= MVT::i64)
2923  ++GPRCnt;
2924  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2925  VT.is128BitVector())
2926  ++FPRCnt;
2927  else
2928  return false;
2929 
2930  if (GPRCnt > 8 || FPRCnt > 8)
2931  return false;
2932  }
2933 
2934  static const MCPhysReg Registers[6][8] = {
2935  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2936  AArch64::W5, AArch64::W6, AArch64::W7 },
2937  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2938  AArch64::X5, AArch64::X6, AArch64::X7 },
2939  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2940  AArch64::H5, AArch64::H6, AArch64::H7 },
2941  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2942  AArch64::S5, AArch64::S6, AArch64::S7 },
2943  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2944  AArch64::D5, AArch64::D6, AArch64::D7 },
2945  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2946  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2947  };
2948 
2949  unsigned GPRIdx = 0;
2950  unsigned FPRIdx = 0;
2951  for (auto const &Arg : F->args()) {
2952  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2953  unsigned SrcReg;
2954  const TargetRegisterClass *RC;
2955  if (VT >= MVT::i1 && VT <= MVT::i32) {
2956  SrcReg = Registers[0][GPRIdx++];
2957  RC = &AArch64::GPR32RegClass;
2958  VT = MVT::i32;
2959  } else if (VT == MVT::i64) {
2960  SrcReg = Registers[1][GPRIdx++];
2961  RC = &AArch64::GPR64RegClass;
2962  } else if (VT == MVT::f16) {
2963  SrcReg = Registers[2][FPRIdx++];
2964  RC = &AArch64::FPR16RegClass;
2965  } else if (VT == MVT::f32) {
2966  SrcReg = Registers[3][FPRIdx++];
2967  RC = &AArch64::FPR32RegClass;
2968  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2969  SrcReg = Registers[4][FPRIdx++];
2970  RC = &AArch64::FPR64RegClass;
2971  } else if (VT.is128BitVector()) {
2972  SrcReg = Registers[5][FPRIdx++];
2973  RC = &AArch64::FPR128RegClass;
2974  } else
2975  llvm_unreachable("Unexpected value type.");
2976 
2977  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2978  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2979  // Without this, EmitLiveInCopies may eliminate the livein if its only
2980  // use is a bitcast (which isn't turned into an instruction).
2981  unsigned ResultReg = createResultReg(RC);
2982  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2983  TII.get(TargetOpcode::COPY), ResultReg)
2984  .addReg(DstReg, getKillRegState(true));
2985  updateValueMap(&Arg, ResultReg);
2986  }
2987  return true;
2988 }
2989 
2990 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2991  SmallVectorImpl<MVT> &OutVTs,
2992  unsigned &NumBytes) {
2993  CallingConv::ID CC = CLI.CallConv;
2995  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2996  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2997 
2998  // Get a count of how many bytes are to be pushed on the stack.
2999  NumBytes = CCInfo.getNextStackOffset();
3000 
3001  // Issue CALLSEQ_START
3002  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3003  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3004  .addImm(NumBytes).addImm(0);
3005 
3006  // Process the args.
3007  for (CCValAssign &VA : ArgLocs) {
3008  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3009  MVT ArgVT = OutVTs[VA.getValNo()];
3010 
3011  unsigned ArgReg = getRegForValue(ArgVal);
3012  if (!ArgReg)
3013  return false;
3014 
3015  // Handle arg promotion: SExt, ZExt, AExt.
3016  switch (VA.getLocInfo()) {
3017  case CCValAssign::Full:
3018  break;
3019  case CCValAssign::SExt: {
3020  MVT DestVT = VA.getLocVT();
3021  MVT SrcVT = ArgVT;
3022  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3023  if (!ArgReg)
3024  return false;
3025  break;
3026  }
3027  case CCValAssign::AExt:
3028  // Intentional fall-through.
3029  case CCValAssign::ZExt: {
3030  MVT DestVT = VA.getLocVT();
3031  MVT SrcVT = ArgVT;
3032  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3033  if (!ArgReg)
3034  return false;
3035  break;
3036  }
3037  default:
3038  llvm_unreachable("Unknown arg promotion!");
3039  }
3040 
3041  // Now copy/store arg to correct locations.
3042  if (VA.isRegLoc() && !VA.needsCustom()) {
3043  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3044  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3045  CLI.OutRegs.push_back(VA.getLocReg());
3046  } else if (VA.needsCustom()) {
3047  // FIXME: Handle custom args.
3048  return false;
3049  } else {
3050  assert(VA.isMemLoc() && "Assuming store on stack.");
3051 
3052  // Don't emit stores for undef values.
3053  if (isa<UndefValue>(ArgVal))
3054  continue;
3055 
3056  // Need to store on the stack.
3057  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3058 
3059  unsigned BEAlign = 0;
3060  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3061  BEAlign = 8 - ArgSize;
3062 
3063  Address Addr;
3064  Addr.setKind(Address::RegBase);
3065  Addr.setReg(AArch64::SP);
3066  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3067 
3068  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3069  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3070  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3071  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3072 
3073  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3074  return false;
3075  }
3076  }
3077  return true;
3078 }
3079 
3080 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3081  unsigned NumBytes) {
3082  CallingConv::ID CC = CLI.CallConv;
3083 
3084  // Issue CALLSEQ_END
3085  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3086  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3087  .addImm(NumBytes).addImm(0);
3088 
3089  // Now the return value.
3090  if (RetVT != MVT::isVoid) {
3092  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3093  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3094 
3095  // Only handle a single return value.
3096  if (RVLocs.size() != 1)
3097  return false;
3098 
3099  // Copy all of the result registers out of their specified physreg.
3100  MVT CopyVT = RVLocs[0].getValVT();
3101 
3102  // TODO: Handle big-endian results
3103  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3104  return false;
3105 
3106  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3107  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3108  TII.get(TargetOpcode::COPY), ResultReg)
3109  .addReg(RVLocs[0].getLocReg());
3110  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3111 
3112  CLI.ResultReg = ResultReg;
3113  CLI.NumResultRegs = 1;
3114  }
3115 
3116  return true;
3117 }
3118 
3119 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3120  CallingConv::ID CC = CLI.CallConv;
3121  bool IsTailCall = CLI.IsTailCall;
3122  bool IsVarArg = CLI.IsVarArg;
3123  const Value *Callee = CLI.Callee;
3124  MCSymbol *Symbol = CLI.Symbol;
3125 
3126  if (!Callee && !Symbol)
3127  return false;
3128 
3129  // Allow SelectionDAG isel to handle tail calls.
3130  if (IsTailCall)
3131  return false;
3132 
3133  // FIXME: we could and should support this, but for now correctness at -O0 is
3134  // more important.
3135  if (Subtarget->isTargetILP32())
3136  return false;
3137 
3138  CodeModel::Model CM = TM.getCodeModel();
3139  // Only support the small-addressing and large code models.
3140  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3141  return false;
3142 
3143  // FIXME: Add large code model support for ELF.
3144  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3145  return false;
3146 
3147  // Let SDISel handle vararg functions.
3148  if (IsVarArg)
3149  return false;
3150 
3151  // FIXME: Only handle *simple* calls for now.
3152  MVT RetVT;
3153  if (CLI.RetTy->isVoidTy())
3154  RetVT = MVT::isVoid;
3155  else if (!isTypeLegal(CLI.RetTy, RetVT))
3156  return false;
3157 
3158  for (auto Flag : CLI.OutFlags)
3159  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3160  Flag.isSwiftSelf() || Flag.isSwiftError())
3161  return false;
3162 
3163  // Set up the argument vectors.
3164  SmallVector<MVT, 16> OutVTs;
3165  OutVTs.reserve(CLI.OutVals.size());
3166 
3167  for (auto *Val : CLI.OutVals) {
3168  MVT VT;
3169  if (!isTypeLegal(Val->getType(), VT) &&
3170  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3171  return false;
3172 
3173  // We don't handle vector parameters yet.
3174  if (VT.isVector() || VT.getSizeInBits() > 64)
3175  return false;
3176 
3177  OutVTs.push_back(VT);
3178  }
3179 
3180  Address Addr;
3181  if (Callee && !computeCallAddress(Callee, Addr))
3182  return false;
3183 
3184  // The weak function target may be zero; in that case we must use indirect
3185  // addressing via a stub on windows as it may be out of range for a
3186  // PC-relative jump.
3187  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3188  Addr.getGlobalValue()->hasExternalWeakLinkage())
3189  return false;
3190 
3191  // Handle the arguments now that we've gotten them.
3192  unsigned NumBytes;
3193  if (!processCallArgs(CLI, OutVTs, NumBytes))
3194  return false;
3195 
3196  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3197  if (RegInfo->isAnyArgRegReserved(*MF))
3198  RegInfo->emitReservedArgRegCallError(*MF);
3199 
3200  // Issue the call.
3201  MachineInstrBuilder MIB;
3202  if (Subtarget->useSmallAddressing()) {
3203  const MCInstrDesc &II =
3204  TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3205  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3206  if (Symbol)
3207  MIB.addSym(Symbol, 0);
3208  else if (Addr.getGlobalValue())
3209  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3210  else if (Addr.getReg()) {
3211  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3212  MIB.addReg(Reg);
3213  } else
3214  return false;
3215  } else {
3216  unsigned CallReg = 0;
3217  if (Symbol) {
3218  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3219  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3220  ADRPReg)
3222 
3223  CallReg = createResultReg(&AArch64::GPR64RegClass);
3224  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3225  TII.get(AArch64::LDRXui), CallReg)
3226  .addReg(ADRPReg)
3227  .addSym(Symbol,
3229  } else if (Addr.getGlobalValue())
3230  CallReg = materializeGV(Addr.getGlobalValue());
3231  else if (Addr.getReg())
3232  CallReg = Addr.getReg();
3233 
3234  if (!CallReg)
3235  return false;
3236 
3237  const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3238  CallReg = constrainOperandRegClass(II, CallReg, 0);
3239  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3240  }
3241 
3242  // Add implicit physical register uses to the call.
3243  for (auto Reg : CLI.OutRegs)
3245 
3246  // Add a register mask with the call-preserved registers.
3247  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3248  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3249 
3250  CLI.Call = MIB;
3251 
3252  // Finish off the call including any return values.
3253  return finishCall(CLI, RetVT, NumBytes);
3254 }
3255 
3256 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3257  if (Alignment)
3258  return Len / Alignment <= 4;
3259  else
3260  return Len < 32;
3261 }
3262 
3263 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3264  uint64_t Len, unsigned Alignment) {
3265  // Make sure we don't bloat code by inlining very large memcpy's.
3266  if (!isMemCpySmall(Len, Alignment))
3267  return false;
3268 
3269  int64_t UnscaledOffset = 0;
3270  Address OrigDest = Dest;
3271  Address OrigSrc = Src;
3272 
3273  while (Len) {
3274  MVT VT;
3275  if (!Alignment || Alignment >= 8) {
3276  if (Len >= 8)
3277  VT = MVT::i64;
3278  else if (Len >= 4)
3279  VT = MVT::i32;
3280  else if (Len >= 2)
3281  VT = MVT::i16;
3282  else {
3283  VT = MVT::i8;
3284  }
3285  } else {
3286  // Bound based on alignment.
3287  if (Len >= 4 && Alignment == 4)
3288  VT = MVT::i32;
3289  else if (Len >= 2 && Alignment == 2)
3290  VT = MVT::i16;
3291  else {
3292  VT = MVT::i8;
3293  }
3294  }
3295 
3296  unsigned ResultReg = emitLoad(VT, VT, Src);
3297  if (!ResultReg)
3298  return false;
3299 
3300  if (!emitStore(VT, ResultReg, Dest))
3301  return false;
3302 
3303  int64_t Size = VT.getSizeInBits() / 8;
3304  Len -= Size;
3305  UnscaledOffset += Size;
3306 
3307  // We need to recompute the unscaled offset for each iteration.
3308  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3309  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3310  }
3311 
3312  return true;
3313 }
3314 
3315 /// Check if it is possible to fold the condition from the XALU intrinsic
3316 /// into the user. The condition code will only be updated on success.
3317 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3318  const Instruction *I,
3319  const Value *Cond) {
3320  if (!isa<ExtractValueInst>(Cond))
3321  return false;
3322 
3323  const auto *EV = cast<ExtractValueInst>(Cond);
3324  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3325  return false;
3326 
3327  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3328  MVT RetVT;
3329  const Function *Callee = II->getCalledFunction();
3330  Type *RetTy =
3331  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3332  if (!isTypeLegal(RetTy, RetVT))
3333  return false;
3334 
3335  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3336  return false;
3337 
3338  const Value *LHS = II->getArgOperand(0);
3339  const Value *RHS = II->getArgOperand(1);
3340 
3341  // Canonicalize immediate to the RHS.
3342  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3343  std::swap(LHS, RHS);
3344 
3345  // Simplify multiplies.
3346  Intrinsic::ID IID = II->getIntrinsicID();
3347  switch (IID) {
3348  default:
3349  break;
3350  case Intrinsic::smul_with_overflow:
3351  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3352  if (C->getValue() == 2)
3353  IID = Intrinsic::sadd_with_overflow;
3354  break;
3355  case Intrinsic::umul_with_overflow:
3356  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3357  if (C->getValue() == 2)
3358  IID = Intrinsic::uadd_with_overflow;
3359  break;
3360  }
3361 
3362  AArch64CC::CondCode TmpCC;
3363  switch (IID) {
3364  default:
3365  return false;
3366  case Intrinsic::sadd_with_overflow:
3367  case Intrinsic::ssub_with_overflow:
3368  TmpCC = AArch64CC::VS;
3369  break;
3370  case Intrinsic::uadd_with_overflow:
3371  TmpCC = AArch64CC::HS;
3372  break;
3373  case Intrinsic::usub_with_overflow:
3374  TmpCC = AArch64CC::LO;
3375  break;
3376  case Intrinsic::smul_with_overflow:
3377  case Intrinsic::umul_with_overflow:
3378  TmpCC = AArch64CC::NE;
3379  break;
3380  }
3381 
3382  // Check if both instructions are in the same basic block.
3383  if (!isValueAvailable(II))
3384  return false;
3385 
3386  // Make sure nothing is in the way
3389  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3390  // We only expect extractvalue instructions between the intrinsic and the
3391  // instruction to be selected.
3392  if (!isa<ExtractValueInst>(Itr))
3393  return false;
3394 
3395  // Check that the extractvalue operand comes from the intrinsic.
3396  const auto *EVI = cast<ExtractValueInst>(Itr);
3397  if (EVI->getAggregateOperand() != II)
3398  return false;
3399  }
3400 
3401  CC = TmpCC;
3402  return true;
3403 }
3404 
3405 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3406  // FIXME: Handle more intrinsics.
3407  switch (II->getIntrinsicID()) {
3408  default: return false;
3409  case Intrinsic::frameaddress: {
3410  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3411  MFI.setFrameAddressIsTaken(true);
3412 
3413  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3414  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3415  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3416  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3417  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3418  // Recursively load frame address
3419  // ldr x0, [fp]
3420  // ldr x0, [x0]
3421  // ldr x0, [x0]
3422  // ...
3423  unsigned DestReg;
3424  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3425  while (Depth--) {
3426  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3427  SrcReg, 0);
3428  assert(DestReg && "Unexpected LDR instruction emission failure.");
3429  SrcReg = DestReg;
3430  }
3431 
3432  updateValueMap(II, SrcReg);
3433  return true;
3434  }
3435  case Intrinsic::sponentry: {
3436  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3437 
3438  // SP = FP + Fixed Object + 16
3439  int FI = MFI.CreateFixedObject(4, 0, false);
3440  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3442  TII.get(AArch64::ADDXri), ResultReg)
3443  .addFrameIndex(FI)
3444  .addImm(0)
3445  .addImm(0);
3446 
3447  updateValueMap(II, ResultReg);
3448  return true;
3449  }
3450  case Intrinsic::memcpy:
3451  case Intrinsic::memmove: {
3452  const auto *MTI = cast<MemTransferInst>(II);
3453  // Don't handle volatile.
3454  if (MTI->isVolatile())
3455  return false;
3456 
3457  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3458  // we would emit dead code because we don't currently handle memmoves.
3459  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3460  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3461  // Small memcpy's are common enough that we want to do them without a call
3462  // if possible.
3463  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3464  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3465  MTI->getSourceAlignment());
3466  if (isMemCpySmall(Len, Alignment)) {
3467  Address Dest, Src;
3468  if (!computeAddress(MTI->getRawDest(), Dest) ||
3469  !computeAddress(MTI->getRawSource(), Src))
3470  return false;
3471  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3472  return true;
3473  }
3474  }
3475 
3476  if (!MTI->getLength()->getType()->isIntegerTy(64))
3477  return false;
3478 
3479  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3480  // Fast instruction selection doesn't support the special
3481  // address spaces.
3482  return false;
3483 
3484  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3485  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3486  }
3487  case Intrinsic::memset: {
3488  const MemSetInst *MSI = cast<MemSetInst>(II);
3489  // Don't handle volatile.
3490  if (MSI->isVolatile())
3491  return false;
3492 
3493  if (!MSI->getLength()->getType()->isIntegerTy(64))
3494  return false;
3495 
3496  if (MSI->getDestAddressSpace() > 255)
3497  // Fast instruction selection doesn't support the special
3498  // address spaces.
3499  return false;
3500 
3501  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3502  }
3503  case Intrinsic::sin:
3504  case Intrinsic::cos:
3505  case Intrinsic::pow: {
3506  MVT RetVT;
3507  if (!isTypeLegal(II->getType(), RetVT))
3508  return false;
3509 
3510  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3511  return false;
3512 
3513  static const RTLIB::Libcall LibCallTable[3][2] = {
3514  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3515  { RTLIB::COS_F32, RTLIB::COS_F64 },
3516  { RTLIB::POW_F32, RTLIB::POW_F64 }
3517  };
3518  RTLIB::Libcall LC;
3519  bool Is64Bit = RetVT == MVT::f64;
3520  switch (II->getIntrinsicID()) {
3521  default:
3522  llvm_unreachable("Unexpected intrinsic.");
3523  case Intrinsic::sin:
3524  LC = LibCallTable[0][Is64Bit];
3525  break;
3526  case Intrinsic::cos:
3527  LC = LibCallTable[1][Is64Bit];
3528  break;
3529  case Intrinsic::pow:
3530  LC = LibCallTable[2][Is64Bit];
3531  break;
3532  }
3533 
3534  ArgListTy Args;
3535  Args.reserve(II->getNumArgOperands());
3536 
3537  // Populate the argument list.
3538  for (auto &Arg : II->arg_operands()) {
3539  ArgListEntry Entry;
3540  Entry.Val = Arg;
3541  Entry.Ty = Arg->getType();
3542  Args.push_back(Entry);
3543  }
3544 
3545  CallLoweringInfo CLI;
3546  MCContext &Ctx = MF->getContext();
3547  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3548  TLI.getLibcallName(LC), std::move(Args));
3549  if (!lowerCallTo(CLI))
3550  return false;
3551  updateValueMap(II, CLI.ResultReg);
3552  return true;
3553  }
3554  case Intrinsic::fabs: {
3555  MVT VT;
3556  if (!isTypeLegal(II->getType(), VT))
3557  return false;
3558 
3559  unsigned Opc;
3560  switch (VT.SimpleTy) {
3561  default:
3562  return false;
3563  case MVT::f32:
3564  Opc = AArch64::FABSSr;
3565  break;
3566  case MVT::f64:
3567  Opc = AArch64::FABSDr;
3568  break;
3569  }
3570  unsigned SrcReg = getRegForValue(II->getOperand(0));
3571  if (!SrcReg)
3572  return false;
3573  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3574  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3575  .addReg(SrcReg);
3576  updateValueMap(II, ResultReg);
3577  return true;
3578  }
3579  case Intrinsic::trap:
3580  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3581  .addImm(1);
3582  return true;
3583  case Intrinsic::debugtrap:
3584  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3585  .addImm(0xF000);
3586  return true;
3587 
3588  case Intrinsic::sqrt: {
3589  Type *RetTy = II->getCalledFunction()->getReturnType();
3590 
3591  MVT VT;
3592  if (!isTypeLegal(RetTy, VT))
3593  return false;
3594 
3595  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3596  if (!Op0Reg)
3597  return false;
3598 
3599  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3600  if (!ResultReg)
3601  return false;
3602 
3603  updateValueMap(II, ResultReg);
3604  return true;
3605  }
3606  case Intrinsic::sadd_with_overflow:
3607  case Intrinsic::uadd_with_overflow:
3608  case Intrinsic::ssub_with_overflow:
3609  case Intrinsic::usub_with_overflow:
3610  case Intrinsic::smul_with_overflow:
3611  case Intrinsic::umul_with_overflow: {
3612  // This implements the basic lowering of the xalu with overflow intrinsics.
3613  const Function *Callee = II->getCalledFunction();
3614  auto *Ty = cast<StructType>(Callee->getReturnType());
3615  Type *RetTy = Ty->getTypeAtIndex(0U);
3616 
3617  MVT VT;
3618  if (!isTypeLegal(RetTy, VT))
3619  return false;
3620 
3621  if (VT != MVT::i32 && VT != MVT::i64)
3622  return false;
3623 
3624  const Value *LHS = II->getArgOperand(0);
3625  const Value *RHS = II->getArgOperand(1);
3626  // Canonicalize immediate to the RHS.
3627  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3628  std::swap(LHS, RHS);
3629 
3630  // Simplify multiplies.
3631  Intrinsic::ID IID = II->getIntrinsicID();
3632  switch (IID) {
3633  default:
3634  break;
3635  case Intrinsic::smul_with_overflow:
3636  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3637  if (C->getValue() == 2) {
3638  IID = Intrinsic::sadd_with_overflow;
3639  RHS = LHS;
3640  }
3641  break;
3642  case Intrinsic::umul_with_overflow:
3643  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3644  if (C->getValue() == 2) {
3645  IID = Intrinsic::uadd_with_overflow;
3646  RHS = LHS;
3647  }
3648  break;
3649  }
3650 
3651  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3653  switch (IID) {
3654  default: llvm_unreachable("Unexpected intrinsic!");
3655  case Intrinsic::sadd_with_overflow:
3656  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3657  CC = AArch64CC::VS;
3658  break;
3659  case Intrinsic::uadd_with_overflow:
3660  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3661  CC = AArch64CC::HS;
3662  break;
3663  case Intrinsic::ssub_with_overflow:
3664  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3665  CC = AArch64CC::VS;
3666  break;
3667  case Intrinsic::usub_with_overflow:
3668  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3669  CC = AArch64CC::LO;
3670  break;
3671  case Intrinsic::smul_with_overflow: {
3672  CC = AArch64CC::NE;
3673  unsigned LHSReg = getRegForValue(LHS);
3674  if (!LHSReg)
3675  return false;
3676 
3677  unsigned RHSReg = getRegForValue(RHS);
3678  if (!RHSReg)
3679  return false;
3680 
3681  if (VT == MVT::i32) {
3682  MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3683  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 32);
3684  MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3685  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, AArch64::sub_32);
3686  emitSubs_rs(VT, ShiftReg, MulReg, AArch64_AM::ASR, 31,
3687  /*WantResult=*/false);
3688  } else {
3689  assert(VT == MVT::i64 && "Unexpected value type.");
3690  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3691  // reused in the next instruction.
3692  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3693  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3694  emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3695  /*WantResult=*/false);
3696  }
3697  break;
3698  }
3699  case Intrinsic::umul_with_overflow: {
3700  CC = AArch64CC::NE;
3701  unsigned LHSReg = getRegForValue(LHS);
3702  if (!LHSReg)
3703  return false;
3704 
3705  unsigned RHSReg = getRegForValue(RHS);
3706  if (!RHSReg)
3707  return false;
3708 
3709  if (VT == MVT::i32) {
3710  MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3711  emitSubs_rs(MVT::i64, AArch64::XZR, MulReg, AArch64_AM::LSR, 32,
3712  /*WantResult=*/false);
3713  MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3714  } else {
3715  assert(VT == MVT::i64 && "Unexpected value type.");
3716  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3717  // reused in the next instruction.
3718  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3719  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3720  emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3721  }
3722  break;
3723  }
3724  }
3725 
3726  if (MulReg) {
3727  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3728  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3729  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3730  }
3731 
3732  if (!ResultReg1)
3733  return false;
3734 
3735  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3736  AArch64::WZR, AArch64::WZR,
3737  getInvertedCondCode(CC));
3738  (void)ResultReg2;
3739  assert((ResultReg1 + 1) == ResultReg2 &&
3740  "Nonconsecutive result registers.");
3741  updateValueMap(II, ResultReg1, 2);
3742  return true;
3743  }
3744  }
3745  return false;
3746 }
3747 
3748 bool AArch64FastISel::selectRet(const Instruction *I) {
3749  const ReturnInst *Ret = cast<ReturnInst>(I);
3750  const Function &F = *I->getParent()->getParent();
3751 
3752  if (!FuncInfo.CanLowerReturn)
3753  return false;
3754 
3755  if (F.isVarArg())
3756  return false;
3757 
3758  if (TLI.supportSwiftError() &&
3759  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3760  return false;
3761 
3762  if (TLI.supportSplitCSR(FuncInfo.MF))
3763  return false;
3764 
3765  // Build a list of return value registers.
3766  SmallVector<unsigned, 4> RetRegs;
3767 
3768  if (Ret->getNumOperands() > 0) {
3769  CallingConv::ID CC = F.getCallingConv();
3771  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3772 
3773  // Analyze operands of the call, assigning locations to each operand.
3775  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3778  CCInfo.AnalyzeReturn(Outs, RetCC);
3779 
3780  // Only handle a single return value for now.
3781  if (ValLocs.size() != 1)
3782  return false;
3783 
3784  CCValAssign &VA = ValLocs[0];
3785  const Value *RV = Ret->getOperand(0);
3786 
3787  // Don't bother handling odd stuff for now.
3788  if ((VA.getLocInfo() != CCValAssign::Full) &&
3789  (VA.getLocInfo() != CCValAssign::BCvt))
3790  return false;
3791 
3792  // Only handle register returns for now.
3793  if (!VA.isRegLoc())
3794  return false;
3795 
3796  unsigned Reg = getRegForValue(RV);
3797  if (Reg == 0)
3798  return false;
3799 
3800  unsigned SrcReg = Reg + VA.getValNo();
3801  Register DestReg = VA.getLocReg();
3802  // Avoid a cross-class copy. This is very unlikely.
3803  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3804  return false;
3805 
3806  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3807  if (!RVEVT.isSimple())
3808  return false;
3809 
3810  // Vectors (of > 1 lane) in big endian need tricky handling.
3811  if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3812  !Subtarget->isLittleEndian())
3813  return false;
3814 
3815  MVT RVVT = RVEVT.getSimpleVT();
3816  if (RVVT == MVT::f128)
3817  return false;
3818 
3819  MVT DestVT = VA.getValVT();
3820  // Special handling for extended integers.
3821  if (RVVT != DestVT) {
3822  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3823  return false;
3824 
3825  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3826  return false;
3827 
3828  bool IsZExt = Outs[0].Flags.isZExt();
3829  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3830  if (SrcReg == 0)
3831  return false;
3832  }
3833 
3834  // "Callee" (i.e. value producer) zero extends pointers at function
3835  // boundary.
3836  if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3837  SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3838 
3839  // Make the copy.
3840  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3841  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3842 
3843  // Add register to return instruction.
3844  RetRegs.push_back(VA.getLocReg());
3845  }
3846 
3847  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3848  TII.get(AArch64::RET_ReallyLR));
3849  for (unsigned RetReg : RetRegs)
3850  MIB.addReg(RetReg, RegState::Implicit);
3851  return true;
3852 }
3853 
3854 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3855  Type *DestTy = I->getType();
3856  Value *Op = I->getOperand(0);
3857  Type *SrcTy = Op->getType();
3858 
3859  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3860  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3861  if (!SrcEVT.isSimple())
3862  return false;
3863  if (!DestEVT.isSimple())
3864  return false;
3865 
3866  MVT SrcVT = SrcEVT.getSimpleVT();
3867  MVT DestVT = DestEVT.getSimpleVT();
3868 
3869  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3870  SrcVT != MVT::i8)
3871  return false;
3872  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3873  DestVT != MVT::i1)
3874  return false;
3875 
3876  unsigned SrcReg = getRegForValue(Op);
3877  if (!SrcReg)
3878  return false;
3879 
3880  // If we're truncating from i64 to a smaller non-legal type then generate an
3881  // AND. Otherwise, we know the high bits are undefined and a truncate only
3882  // generate a COPY. We cannot mark the source register also as result
3883  // register, because this can incorrectly transfer the kill flag onto the
3884  // source register.
3885  unsigned ResultReg;
3886  if (SrcVT == MVT::i64) {
3887  uint64_t Mask = 0;
3888  switch (DestVT.SimpleTy) {
3889  default:
3890  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3891  return false;
3892  case MVT::i1:
3893  Mask = 0x1;
3894  break;
3895  case MVT::i8:
3896  Mask = 0xff;
3897  break;
3898  case MVT::i16:
3899  Mask = 0xffff;
3900  break;
3901  }
3902  // Issue an extract_subreg to get the lower 32-bits.
3903  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3904  AArch64::sub_32);
3905  // Create the AND instruction which performs the actual truncation.
3906  ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3907  assert(ResultReg && "Unexpected AND instruction emission failure.");
3908  } else {
3909  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3911  TII.get(TargetOpcode::COPY), ResultReg)
3912  .addReg(SrcReg);
3913  }
3914 
3915  updateValueMap(I, ResultReg);
3916  return true;
3917 }
3918 
3919 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3920  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3921  DestVT == MVT::i64) &&
3922  "Unexpected value type.");
3923  // Handle i8 and i16 as i32.
3924  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3925  DestVT = MVT::i32;
3926 
3927  if (IsZExt) {
3928  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3929  assert(ResultReg && "Unexpected AND instruction emission failure.");
3930  if (DestVT == MVT::i64) {
3931  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3932  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3933  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3934  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3935  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3936  .addImm(0)
3937  .addReg(ResultReg)
3938  .addImm(AArch64::sub_32);
3939  ResultReg = Reg64;
3940  }
3941  return ResultReg;
3942  } else {
3943  if (DestVT == MVT::i64) {
3944  // FIXME: We're SExt i1 to i64.
3945  return 0;
3946  }
3947  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3948  0, 0);
3949  }
3950 }
3951 
3952 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3953  unsigned Opc, ZReg;
3954  switch (RetVT.SimpleTy) {
3955  default: return 0;
3956  case MVT::i8:
3957  case MVT::i16:
3958  case MVT::i32:
3959  RetVT = MVT::i32;
3960  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3961  case MVT::i64:
3962  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3963  }
3964 
3965  const TargetRegisterClass *RC =
3966  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3967  return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3968 }
3969 
3970 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3971  if (RetVT != MVT::i64)
3972  return 0;
3973 
3974  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3975  Op0, Op1, AArch64::XZR);
3976 }
3977 
3978 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3979  if (RetVT != MVT::i64)
3980  return 0;
3981 
3982  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3983  Op0, Op1, AArch64::XZR);
3984 }
3985 
3986 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
3987  unsigned Op1Reg) {
3988  unsigned Opc = 0;
3989  bool NeedTrunc = false;
3990  uint64_t Mask = 0;
3991  switch (RetVT.SimpleTy) {
3992  default: return 0;
3993  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
3994  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3995  case MVT::i32: Opc = AArch64::LSLVWr; break;
3996  case MVT::i64: Opc = AArch64::LSLVXr; break;
3997  }
3998 
3999  const TargetRegisterClass *RC =
4000  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4001  if (NeedTrunc)
4002  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4003 
4004  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4005  if (NeedTrunc)
4006  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4007  return ResultReg;
4008 }
4009 
4010 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4011  uint64_t Shift, bool IsZExt) {
4012  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4013  "Unexpected source/return type pair.");
4014  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4015  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4016  "Unexpected source value type.");
4017  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4018  RetVT == MVT::i64) && "Unexpected return value type.");
4019 
4020  bool Is64Bit = (RetVT == MVT::i64);
4021  unsigned RegSize = Is64Bit ? 64 : 32;
4022  unsigned DstBits = RetVT.getSizeInBits();
4023  unsigned SrcBits = SrcVT.getSizeInBits();
4024  const TargetRegisterClass *RC =
4025  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4026 
4027  // Just emit a copy for "zero" shifts.
4028  if (Shift == 0) {
4029  if (RetVT == SrcVT) {
4030  unsigned ResultReg = createResultReg(RC);
4031  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4032  TII.get(TargetOpcode::COPY), ResultReg)
4033  .addReg(Op0);
4034  return ResultReg;
4035  } else
4036  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4037  }
4038 
4039  // Don't deal with undefined shifts.
4040  if (Shift >= DstBits)
4041  return 0;
4042 
4043  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4044  // {S|U}BFM Wd, Wn, #r, #s
4045  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4046 
4047  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4048  // %2 = shl i16 %1, 4
4049  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4050  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4051  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4052  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4053 
4054  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4055  // %2 = shl i16 %1, 8
4056  // Wd<32+7-24,32-24> = Wn<7:0>
4057  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4058  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4059  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4060 
4061  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4062  // %2 = shl i16 %1, 12
4063  // Wd<32+3-20,32-20> = Wn<3:0>
4064  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4065  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4066  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4067 
4068  unsigned ImmR = RegSize - Shift;
4069  // Limit the width to the length of the source type.
4070  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4071  static const unsigned OpcTable[2][2] = {
4072  {AArch64::SBFMWri, AArch64::SBFMXri},
4073  {AArch64::UBFMWri, AArch64::UBFMXri}
4074  };
4075  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4076  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4077  Register TmpReg = MRI.createVirtualRegister(RC);
4078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4079  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4080  .addImm(0)
4081  .addReg(Op0)
4082  .addImm(AArch64::sub_32);
4083  Op0 = TmpReg;
4084  }
4085  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4086 }
4087 
4088 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4089  unsigned Op1Reg) {
4090  unsigned Opc = 0;
4091  bool NeedTrunc = false;
4092  uint64_t Mask = 0;
4093  switch (RetVT.SimpleTy) {
4094  default: return 0;
4095  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4096  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4097  case MVT::i32: Opc = AArch64::LSRVWr; break;
4098  case MVT::i64: Opc = AArch64::LSRVXr; break;
4099  }
4100 
4101  const TargetRegisterClass *RC =
4102  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4103  if (NeedTrunc) {
4104  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4105  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4106  }
4107  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4108  if (NeedTrunc)
4109  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4110  return ResultReg;
4111 }
4112 
4113 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4114  uint64_t Shift, bool IsZExt) {
4115  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4116  "Unexpected source/return type pair.");
4117  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4118  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4119  "Unexpected source value type.");
4120  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4121  RetVT == MVT::i64) && "Unexpected return value type.");
4122 
4123  bool Is64Bit = (RetVT == MVT::i64);
4124  unsigned RegSize = Is64Bit ? 64 : 32;
4125  unsigned DstBits = RetVT.getSizeInBits();
4126  unsigned SrcBits = SrcVT.getSizeInBits();
4127  const TargetRegisterClass *RC =
4128  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4129 
4130  // Just emit a copy for "zero" shifts.
4131  if (Shift == 0) {
4132  if (RetVT == SrcVT) {
4133  unsigned ResultReg = createResultReg(RC);
4134  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4135  TII.get(TargetOpcode::COPY), ResultReg)
4136  .addReg(Op0);
4137  return ResultReg;
4138  } else
4139  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4140  }
4141 
4142  // Don't deal with undefined shifts.
4143  if (Shift >= DstBits)
4144  return 0;
4145 
4146  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4147  // {S|U}BFM Wd, Wn, #r, #s
4148  // Wd<s-r:0> = Wn<s:r> when r <= s
4149 
4150  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4151  // %2 = lshr i16 %1, 4
4152  // Wd<7-4:0> = Wn<7:4>
4153  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4154  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4155  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4156 
4157  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4158  // %2 = lshr i16 %1, 8
4159  // Wd<7-7,0> = Wn<7:7>
4160  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4161  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4162  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4163 
4164  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4165  // %2 = lshr i16 %1, 12
4166  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4167  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4168  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4169  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4170 
4171  if (Shift >= SrcBits && IsZExt)
4172  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4173 
4174  // It is not possible to fold a sign-extend into the LShr instruction. In this
4175  // case emit a sign-extend.
4176  if (!IsZExt) {
4177  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4178  if (!Op0)
4179  return 0;
4180  SrcVT = RetVT;
4181  SrcBits = SrcVT.getSizeInBits();
4182  IsZExt = true;
4183  }
4184 
4185  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4186  unsigned ImmS = SrcBits - 1;
4187  static const unsigned OpcTable[2][2] = {
4188  {AArch64::SBFMWri, AArch64::SBFMXri},
4189  {AArch64::UBFMWri, AArch64::UBFMXri}
4190  };
4191  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4192  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4193  Register TmpReg = MRI.createVirtualRegister(RC);
4194  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4195  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4196  .addImm(0)
4197  .addReg(Op0)
4198  .addImm(AArch64::sub_32);
4199  Op0 = TmpReg;
4200  }
4201  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4202 }
4203 
4204 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4205  unsigned Op1Reg) {
4206  unsigned Opc = 0;
4207  bool NeedTrunc = false;
4208  uint64_t Mask = 0;
4209  switch (RetVT.SimpleTy) {
4210  default: return 0;
4211  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4212  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4213  case MVT::i32: Opc = AArch64::ASRVWr; break;
4214  case MVT::i64: Opc = AArch64::ASRVXr; break;
4215  }
4216 
4217  const TargetRegisterClass *RC =
4218  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4219  if (NeedTrunc) {
4220  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4221  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4222  }
4223  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4224  if (NeedTrunc)
4225  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4226  return ResultReg;
4227 }
4228 
4229 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4230  uint64_t Shift, bool IsZExt) {
4231  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4232  "Unexpected source/return type pair.");
4233  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4234  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4235  "Unexpected source value type.");
4236  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4237  RetVT == MVT::i64) && "Unexpected return value type.");
4238 
4239  bool Is64Bit = (RetVT == MVT::i64);
4240  unsigned RegSize = Is64Bit ? 64 : 32;
4241  unsigned DstBits = RetVT.getSizeInBits();
4242  unsigned SrcBits = SrcVT.getSizeInBits();
4243  const TargetRegisterClass *RC =
4244  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4245 
4246  // Just emit a copy for "zero" shifts.
4247  if (Shift == 0) {
4248  if (RetVT == SrcVT) {
4249  unsigned ResultReg = createResultReg(RC);
4250  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4251  TII.get(TargetOpcode::COPY), ResultReg)
4252  .addReg(Op0);
4253  return ResultReg;
4254  } else
4255  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4256  }
4257 
4258  // Don't deal with undefined shifts.
4259  if (Shift >= DstBits)
4260  return 0;
4261 
4262  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4263  // {S|U}BFM Wd, Wn, #r, #s
4264  // Wd<s-r:0> = Wn<s:r> when r <= s
4265 
4266  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4267  // %2 = ashr i16 %1, 4
4268  // Wd<7-4:0> = Wn<7:4>
4269  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4270  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4271  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4272 
4273  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4274  // %2 = ashr i16 %1, 8
4275  // Wd<7-7,0> = Wn<7:7>
4276  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4277  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4278  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4279 
4280  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4281  // %2 = ashr i16 %1, 12
4282  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4283  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4284  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4285  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4286 
4287  if (Shift >= SrcBits && IsZExt)
4288  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4289 
4290  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4291  unsigned ImmS = SrcBits - 1;
4292  static const unsigned OpcTable[2][2] = {
4293  {AArch64::SBFMWri, AArch64::SBFMXri},
4294  {AArch64::UBFMWri, AArch64::UBFMXri}
4295  };
4296  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4297  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4298  Register TmpReg = MRI.createVirtualRegister(RC);
4299  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4300  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4301  .addImm(0)
4302  .addReg(Op0)
4303  .addImm(AArch64::sub_32);
4304  Op0 = TmpReg;
4305  }
4306  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4307 }
4308 
4309 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4310  bool IsZExt) {
4311  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4312 
4313  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4314  // DestVT are odd things, so test to make sure that they are both types we can
4315  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4316  // bail out to SelectionDAG.
4317  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4318  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4319  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4320  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4321  return 0;
4322 
4323  unsigned Opc;
4324  unsigned Imm = 0;
4325 
4326  switch (SrcVT.SimpleTy) {
4327  default:
4328  return 0;
4329  case MVT::i1:
4330  return emiti1Ext(SrcReg, DestVT, IsZExt);
4331  case MVT::i8:
4332  if (DestVT == MVT::i64)
4333  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4334  else
4335  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4336  Imm = 7;
4337  break;
4338  case MVT::i16:
4339  if (DestVT == MVT::i64)
4340  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4341  else
4342  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4343  Imm = 15;
4344  break;
4345  case MVT::i32:
4346  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4347  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4348  Imm = 31;
4349  break;
4350  }
4351 
4352  // Handle i8 and i16 as i32.
4353  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4354  DestVT = MVT::i32;
4355  else if (DestVT == MVT::i64) {
4356  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4357  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4358  TII.get(AArch64::SUBREG_TO_REG), Src64)
4359  .addImm(0)
4360  .addReg(SrcReg)
4361  .addImm(AArch64::sub_32);
4362  SrcReg = Src64;
4363  }
4364 
4365  const TargetRegisterClass *RC =
4366  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4367  return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4368 }
4369 
4370 static bool isZExtLoad(const MachineInstr *LI) {
4371  switch (LI->getOpcode()) {
4372  default:
4373  return false;
4374  case AArch64::LDURBBi:
4375  case AArch64::LDURHHi:
4376  case AArch64::LDURWi:
4377  case AArch64::LDRBBui:
4378  case AArch64::LDRHHui:
4379  case AArch64::LDRWui:
4380  case AArch64::LDRBBroX:
4381  case AArch64::LDRHHroX:
4382  case AArch64::LDRWroX:
4383  case AArch64::LDRBBroW:
4384  case AArch64::LDRHHroW:
4385  case AArch64::LDRWroW:
4386  return true;
4387  }
4388 }
4389 
4390 static bool isSExtLoad(const MachineInstr *LI) {
4391  switch (LI->getOpcode()) {
4392  default:
4393  return false;
4394  case AArch64::LDURSBWi:
4395  case AArch64::LDURSHWi:
4396  case AArch64::LDURSBXi:
4397  case AArch64::LDURSHXi:
4398  case AArch64::LDURSWi:
4399  case AArch64::LDRSBWui:
4400  case AArch64::LDRSHWui:
4401  case AArch64::LDRSBXui:
4402  case AArch64::LDRSHXui:
4403  case AArch64::LDRSWui:
4404  case AArch64::LDRSBWroX:
4405  case AArch64::LDRSHWroX:
4406  case AArch64::LDRSBXroX:
4407  case AArch64::LDRSHXroX:
4408  case AArch64::LDRSWroX:
4409  case AArch64::LDRSBWroW:
4410  case AArch64::LDRSHWroW:
4411  case AArch64::LDRSBXroW:
4412  case AArch64::LDRSHXroW:
4413  case AArch64::LDRSWroW:
4414  return true;
4415  }
4416 }
4417 
4418 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4419  MVT SrcVT) {
4420  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4421  if (!LI || !LI->hasOneUse())
4422  return false;
4423 
4424  // Check if the load instruction has already been selected.
4425  unsigned Reg = lookUpRegForValue(LI);
4426  if (!Reg)
4427  return false;
4428 
4430  if (!MI)
4431  return false;
4432 
4433  // Check if the correct load instruction has been emitted - SelectionDAG might
4434  // have emitted a zero-extending load, but we need a sign-extending load.
4435  bool IsZExt = isa<ZExtInst>(I);
4436  const auto *LoadMI = MI;
4437  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4438  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4439  Register LoadReg = MI->getOperand(1).getReg();
4440  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4441  assert(LoadMI && "Expected valid instruction");
4442  }
4443  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4444  return false;
4445 
4446  // Nothing to be done.
4447  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4448  updateValueMap(I, Reg);
4449  return true;
4450  }
4451 
4452  if (IsZExt) {
4453  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4455  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4456  .addImm(0)
4457  .addReg(Reg, getKillRegState(true))
4458  .addImm(AArch64::sub_32);
4459  Reg = Reg64;
4460  } else {
4461  assert((MI->getOpcode() == TargetOpcode::COPY &&
4462  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4463  "Expected copy instruction");
4464  Reg = MI->getOperand(1).getReg();
4466  removeDeadCode(I, std::next(I));
4467  }
4468  updateValueMap(I, Reg);
4469  return true;
4470 }
4471 
4472 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4473  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4474  "Unexpected integer extend instruction.");
4475  MVT RetVT;
4476  MVT SrcVT;
4477  if (!isTypeSupported(I->getType(), RetVT))
4478  return false;
4479 
4480  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4481  return false;
4482 
4483  // Try to optimize already sign-/zero-extended values from load instructions.
4484  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4485  return true;
4486 
4487  unsigned SrcReg = getRegForValue(I->getOperand(0));
4488  if (!SrcReg)
4489  return false;
4490 
4491  // Try to optimize already sign-/zero-extended values from function arguments.
4492  bool IsZExt = isa<ZExtInst>(I);
4493  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4494  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4495  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4496  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4497  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4498  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4499  .addImm(0)
4500  .addReg(SrcReg)
4501  .addImm(AArch64::sub_32);
4502  SrcReg = ResultReg;
4503  }
4504 
4505  updateValueMap(I, SrcReg);
4506  return true;
4507  }
4508  }
4509 
4510  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4511  if (!ResultReg)
4512  return false;
4513 
4514  updateValueMap(I, ResultReg);
4515  return true;
4516 }
4517 
4518 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4519  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4520  if (!DestEVT.isSimple())
4521  return false;
4522 
4523  MVT DestVT = DestEVT.getSimpleVT();
4524  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4525  return false;
4526 
4527  unsigned DivOpc;
4528  bool Is64bit = (DestVT == MVT::i64);
4529  switch (ISDOpcode) {
4530  default:
4531  return false;
4532  case ISD::SREM:
4533  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4534  break;
4535  case ISD::UREM:
4536  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4537  break;
4538  }
4539  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4540  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4541  if (!Src0Reg)
4542  return false;
4543 
4544  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4545  if (!Src1Reg)
4546  return false;
4547 
4548  const TargetRegisterClass *RC =
4549  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4550  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4551  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4552  // The remainder is computed as numerator - (quotient * denominator) using the
4553  // MSUB instruction.
4554  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4555  updateValueMap(I, ResultReg);
4556  return true;
4557 }
4558 
4559 bool AArch64FastISel::selectMul(const Instruction *I) {
4560  MVT VT;
4561  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4562  return false;
4563 
4564  if (VT.isVector())
4565  return selectBinaryOp(I, ISD::MUL);
4566 
4567  const Value *Src0 = I->getOperand(0);
4568  const Value *Src1 = I->getOperand(1);
4569  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4570  if (C->getValue().isPowerOf2())
4571  std::swap(Src0, Src1);
4572 
4573  // Try to simplify to a shift instruction.
4574  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4575  if (C->getValue().isPowerOf2()) {
4576  uint64_t ShiftVal = C->getValue().logBase2();
4577  MVT SrcVT = VT;
4578  bool IsZExt = true;
4579  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4580  if (!isIntExtFree(ZExt)) {
4581  MVT VT;
4582  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4583  SrcVT = VT;
4584  IsZExt = true;
4585  Src0 = ZExt->getOperand(0);
4586  }
4587  }
4588  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4589  if (!isIntExtFree(SExt)) {
4590  MVT VT;
4591  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4592  SrcVT = VT;
4593  IsZExt = false;
4594  Src0 = SExt->getOperand(0);
4595  }
4596  }
4597  }
4598 
4599  unsigned Src0Reg = getRegForValue(Src0);
4600  if (!Src0Reg)
4601  return false;
4602 
4603  unsigned ResultReg =
4604  emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4605 
4606  if (ResultReg) {
4607  updateValueMap(I, ResultReg);
4608  return true;
4609  }
4610  }
4611 
4612  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4613  if (!Src0Reg)
4614  return false;
4615 
4616  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4617  if (!Src1Reg)
4618  return false;
4619 
4620  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4621 
4622  if (!ResultReg)
4623  return false;
4624 
4625  updateValueMap(I, ResultReg);
4626  return true;
4627 }
4628 
4629 bool AArch64FastISel::selectShift(const Instruction *I) {
4630  MVT RetVT;
4631  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4632  return false;
4633 
4634  if (RetVT.isVector())
4635  return selectOperator(I, I->getOpcode());
4636 
4637  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4638  unsigned ResultReg = 0;
4639  uint64_t ShiftVal = C->getZExtValue();
4640  MVT SrcVT = RetVT;
4641  bool IsZExt = I->getOpcode() != Instruction::AShr;
4642  const Value *Op0 = I->getOperand(0);
4643  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4644  if (!isIntExtFree(ZExt)) {
4645  MVT TmpVT;
4646  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4647  SrcVT = TmpVT;
4648  IsZExt = true;
4649  Op0 = ZExt->getOperand(0);
4650  }
4651  }
4652  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4653  if (!isIntExtFree(SExt)) {
4654  MVT TmpVT;
4655  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4656  SrcVT = TmpVT;
4657  IsZExt = false;
4658  Op0 = SExt->getOperand(0);
4659  }
4660  }
4661  }
4662 
4663  unsigned Op0Reg = getRegForValue(Op0);
4664  if (!Op0Reg)
4665  return false;
4666 
4667  switch (I->getOpcode()) {
4668  default: llvm_unreachable("Unexpected instruction.");
4669  case Instruction::Shl:
4670  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4671  break;
4672  case Instruction::AShr:
4673  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4674  break;
4675  case Instruction::LShr:
4676  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4677  break;
4678  }
4679  if (!ResultReg)
4680  return false;
4681 
4682  updateValueMap(I, ResultReg);
4683  return true;
4684  }
4685 
4686  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4687  if (!Op0Reg)
4688  return false;
4689 
4690  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4691  if (!Op1Reg)
4692  return false;
4693 
4694  unsigned ResultReg = 0;
4695  switch (I->getOpcode()) {
4696  default: llvm_unreachable("Unexpected instruction.");
4697  case Instruction::Shl:
4698  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4699  break;
4700  case Instruction::AShr:
4701  ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4702  break;
4703  case Instruction::LShr:
4704  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4705  break;
4706  }
4707 
4708  if (!ResultReg)
4709  return false;
4710 
4711  updateValueMap(I, ResultReg);
4712  return true;
4713 }
4714 
4715 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4716  MVT RetVT, SrcVT;
4717 
4718  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4719  return false;
4720  if (!isTypeLegal(I->getType(), RetVT))
4721  return false;
4722 
4723  unsigned Opc;
4724  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4725  Opc = AArch64::FMOVWSr;
4726  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4727  Opc = AArch64::FMOVXDr;
4728  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4729  Opc = AArch64::FMOVSWr;
4730  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4731  Opc = AArch64::FMOVDXr;
4732  else
4733  return false;
4734 
4735  const TargetRegisterClass *RC = nullptr;
4736  switch (RetVT.SimpleTy) {
4737  default: llvm_unreachable("Unexpected value type.");
4738  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4739  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4740  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4741  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4742  }
4743  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4744  if (!Op0Reg)
4745  return false;
4746 
4747  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4748  if (!ResultReg)
4749  return false;
4750 
4751  updateValueMap(I, ResultReg);
4752  return true;
4753 }
4754 
4755 bool AArch64FastISel::selectFRem(const Instruction *I) {
4756  MVT RetVT;
4757  if (!isTypeLegal(I->getType(), RetVT))
4758  return false;
4759 
4760  RTLIB::Libcall LC;
4761  switch (RetVT.SimpleTy) {
4762  default:
4763  return false;
4764  case MVT::f32:
4765  LC = RTLIB::REM_F32;
4766  break;
4767  case MVT::f64:
4768  LC = RTLIB::REM_F64;
4769  break;
4770  }
4771 
4772  ArgListTy Args;
4773  Args.reserve(I->getNumOperands());
4774 
4775  // Populate the argument list.
4776  for (auto &Arg : I->operands()) {
4777  ArgListEntry Entry;
4778  Entry.Val = Arg;
4779  Entry.Ty = Arg->getType();
4780  Args.push_back(Entry);
4781  }
4782 
4783  CallLoweringInfo CLI;
4784  MCContext &Ctx = MF->getContext();
4785  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4786  TLI.getLibcallName(LC), std::move(Args));
4787  if (!lowerCallTo(CLI))
4788  return false;
4789  updateValueMap(I, CLI.ResultReg);
4790  return true;
4791 }
4792 
4793 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4794  MVT VT;
4795  if (!isTypeLegal(I->getType(), VT))
4796  return false;
4797 
4798  if (!isa<ConstantInt>(I->getOperand(1)))
4799  return selectBinaryOp(I, ISD::SDIV);
4800 
4801  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4802  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4803  !(C.isPowerOf2() || (-C).isPowerOf2()))
4804  return selectBinaryOp(I, ISD::SDIV);
4805 
4806  unsigned Lg2 = C.countTrailingZeros();
4807  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4808  if (!Src0Reg)
4809  return false;
4810 
4811  if (cast<BinaryOperator>(I)->isExact()) {
4812  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4813  if (!ResultReg)
4814  return false;
4815  updateValueMap(I, ResultReg);
4816  return true;
4817  }
4818 
4819  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4820  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4821  if (!AddReg)
4822  return false;
4823 
4824  // (Src0 < 0) ? Pow2 - 1 : 0;
4825  if (!emitICmp_ri(VT, Src0Reg, 0))
4826  return false;
4827 
4828  unsigned SelectOpc;
4829  const TargetRegisterClass *RC;
4830  if (VT == MVT::i64) {
4831  SelectOpc = AArch64::CSELXr;
4832  RC = &AArch64::GPR64RegClass;
4833  } else {
4834  SelectOpc = AArch64::CSELWr;
4835  RC = &AArch64::GPR32RegClass;
4836  }
4837  unsigned SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4838  AArch64CC::LT);
4839  if (!SelectReg)
4840  return false;
4841 
4842  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4843  // negate the result.
4844  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4845  unsigned ResultReg;
4846  if (C.isNegative())
4847  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4848  AArch64_AM::ASR, Lg2);
4849  else
4850  ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4851 
4852  if (!ResultReg)
4853  return false;
4854 
4855  updateValueMap(I, ResultReg);
4856  return true;
4857 }
4858 
4859 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4860 /// have to duplicate it for AArch64, because otherwise we would fail during the
4861 /// sign-extend emission.
4862 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4863  unsigned IdxN = getRegForValue(Idx);
4864  if (IdxN == 0)
4865  // Unhandled operand. Halt "fast" selection and bail.
4866  return 0;
4867 
4868  // If the index is smaller or larger than intptr_t, truncate or extend it.
4869  MVT PtrVT = TLI.getPointerTy(DL);
4870  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4871  if (IdxVT.bitsLT(PtrVT)) {
4872  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4873  } else if (IdxVT.bitsGT(PtrVT))
4874  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4875  return IdxN;
4876 }
4877 
4878 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4879 /// duplicate it for AArch64, because otherwise we would bail out even for
4880 /// simple cases. This is because the standard fastEmit functions don't cover
4881 /// MUL at all and ADD is lowered very inefficientily.
4882 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4883  if (Subtarget->isTargetILP32())
4884  return false;
4885 
4886  unsigned N = getRegForValue(I->getOperand(0));
4887  if (!N)
4888  return false;
4889 
4890  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4891  // into a single N = N + TotalOffset.
4892  uint64_t TotalOffs = 0;
4893  MVT VT = TLI.getPointerTy(DL);
4895  GTI != E; ++GTI) {
4896  const Value *Idx = GTI.getOperand();
4897  if (auto *StTy = GTI.getStructTypeOrNull()) {
4898  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4899  // N = N + Offset
4900  if (Field)
4901  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4902  } else {
4903  Type *Ty = GTI.getIndexedType();
4904 
4905  // If this is a constant subscript, handle it quickly.
4906  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4907  if (CI->isZero())
4908  continue;
4909  // N = N + Offset
4910  TotalOffs +=
4911  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4912  continue;
4913  }
4914  if (TotalOffs) {
4915  N = emitAdd_ri_(VT, N, TotalOffs);
4916  if (!N)
4917  return false;
4918  TotalOffs = 0;
4919  }
4920 
4921  // N = N + Idx * ElementSize;
4922  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4923  unsigned IdxN = getRegForGEPIndex(Idx);
4924  if (!IdxN)
4925  return false;
4926 
4927  if (ElementSize != 1) {
4928  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4929  if (!C)
4930  return false;
4931  IdxN = emitMul_rr(VT, IdxN, C);
4932  if (!IdxN)
4933  return false;
4934  }
4935  N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4936  if (!N)
4937  return false;
4938  }
4939  }
4940  if (TotalOffs) {
4941  N = emitAdd_ri_(VT, N, TotalOffs);
4942  if (!N)
4943  return false;
4944  }
4945  updateValueMap(I, N);
4946  return true;
4947 }
4948 
4949 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4950  assert(TM.getOptLevel() == CodeGenOpt::None &&
4951  "cmpxchg survived AtomicExpand at optlevel > -O0");
4952 
4953  auto *RetPairTy = cast<StructType>(I->getType());
4954  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4955  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4956  "cmpxchg has a non-i1 status result");
4957 
4958  MVT VT;
4959  if (!isTypeLegal(RetTy, VT))
4960  return false;
4961 
4962  const TargetRegisterClass *ResRC;
4963  unsigned Opc, CmpOpc;
4964  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4965  // extractvalue selection doesn't support that.
4966  if (VT == MVT::i32) {
4967  Opc = AArch64::CMP_SWAP_32;
4968  CmpOpc = AArch64::SUBSWrs;
4969  ResRC = &AArch64::GPR32RegClass;
4970  } else if (VT == MVT::i64) {
4971  Opc = AArch64::CMP_SWAP_64;
4972  CmpOpc = AArch64::SUBSXrs;
4973  ResRC = &AArch64::GPR64RegClass;
4974  } else {
4975  return false;
4976  }
4977 
4978  const MCInstrDesc &II = TII.get(Opc);
4979 
4980  const unsigned AddrReg = constrainOperandRegClass(
4981  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4982  const unsigned DesiredReg = constrainOperandRegClass(
4983  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4984  const unsigned NewReg = constrainOperandRegClass(
4985  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
4986 
4987  const unsigned ResultReg1 = createResultReg(ResRC);
4988  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
4989  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
4990 
4991  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
4992  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4993  .addDef(ResultReg1)
4994  .addDef(ScratchReg)
4995  .addUse(AddrReg)
4996  .addUse(DesiredReg)
4997  .addUse(NewReg);
4998 
4999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5000  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5001  .addUse(ResultReg1)
5002  .addUse(DesiredReg)
5003  .addImm(0);
5004 
5005  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5006  .addDef(ResultReg2)
5007  .addUse(AArch64::WZR)
5008  .addUse(AArch64::WZR)
5010 
5011  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5012  updateValueMap(I, ResultReg1, 2);
5013  return true;
5014 }
5015 
5016 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5017  switch (I->getOpcode()) {
5018  default:
5019  break;
5020  case Instruction::Add:
5021  case Instruction::Sub:
5022  return selectAddSub(I);
5023  case Instruction::Mul:
5024  return selectMul(I);
5025  case Instruction::SDiv:
5026  return selectSDiv(I);
5027  case Instruction::SRem:
5028  if (!selectBinaryOp(I, ISD::SREM))
5029  return selectRem(I, ISD::SREM);
5030  return true;
5031  case Instruction::URem:
5032  if (!selectBinaryOp(I, ISD::UREM))
5033  return selectRem(I, ISD::UREM);
5034  return true;
5035  case Instruction::Shl:
5036  case Instruction::LShr:
5037  case Instruction::AShr:
5038  return selectShift(I);
5039  case Instruction::And:
5040  case Instruction::Or:
5041  case Instruction::Xor:
5042  return selectLogicalOp(I);
5043  case Instruction::Br:
5044  return selectBranch(I);
5045  case Instruction::IndirectBr:
5046  return selectIndirectBr(I);
5047  case Instruction::BitCast:
5048  if (!FastISel::selectBitCast(I))
5049  return selectBitCast(I);
5050  return true;
5051  case Instruction::FPToSI:
5052  if (!selectCast(I, ISD::FP_TO_SINT))
5053  return selectFPToInt(I, /*Signed=*/true);
5054  return true;
5055  case Instruction::FPToUI:
5056  return selectFPToInt(I, /*Signed=*/false);
5057  case Instruction::ZExt:
5058  case Instruction::SExt:
5059  return selectIntExt(I);
5060  case Instruction::Trunc:
5061  if (!selectCast(I, ISD::TRUNCATE))
5062  return selectTrunc(I);
5063  return true;
5064  case Instruction::FPExt:
5065  return selectFPExt(I);
5066  case Instruction::FPTrunc:
5067  return selectFPTrunc(I);
5068  case Instruction::SIToFP:
5069  if (!selectCast(I, ISD::SINT_TO_FP))
5070  return selectIntToFP(I, /*Signed=*/true);
5071  return true;
5072  case Instruction::UIToFP:
5073  return selectIntToFP(I, /*Signed=*/false);
5074  case Instruction::Load:
5075  return selectLoad(I);
5076  case Instruction::Store:
5077  return selectStore(I);
5078  case Instruction::FCmp:
5079  case Instruction::ICmp:
5080  return selectCmp(I);
5081  case Instruction::Select:
5082  return selectSelect(I);
5083  case Instruction::Ret:
5084  return selectRet(I);
5085  case Instruction::FRem:
5086  return selectFRem(I);
5087  case Instruction::GetElementPtr:
5088  return selectGetElementPtr(I);
5089  case Instruction::AtomicCmpXchg:
5090  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5091  }
5092 
5093  // fall-back to target-independent instruction selection.
5094  return selectOperator(I, I->getOpcode());
5095 }
5096 
5097 namespace llvm {
5098 
5100  const TargetLibraryInfo *LibInfo) {
5101  return new AArch64FastISel(FuncInfo, LibInfo);
5102 }
5103 
5104 } // end namespace llvm
llvm::FunctionLoweringInfo::Fn
const Function * Fn
Definition: FunctionLoweringInfo.h:55
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:244
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:737
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:38
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:253
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:484
AArch64RegisterInfo.h
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:272
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:239
llvm::AArch64Subtarget::hasFPARMv8
bool hasFPARMv8() const
Definition: AArch64Subtarget.h:359
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4543
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1014
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:244
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:250
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:743
AArch64.h
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:2923
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:237
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:36
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:618
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Definition: CallingConv.h:58
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:34
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
optimizeSelect
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeSelect
Definition: README.txt:104
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:315
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
MCInstrDesc.h
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:240
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:71
llvm::Function
Definition: Function.h:61
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
GetElementPtrTypeIterator.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:124
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:103
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:606
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:347
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:730
ErrorHandling.h
llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition: AArch64InstrInfo.cpp:7211
llvm::AArch64RegisterInfo::isAnyArgRegReserved
bool isAnyArgRegReserved(const MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:336
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:744
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:823
Registers
SI Pre allocate WWM Registers
Definition: SIPreAllocateWWMRegs.cpp:77
AArch64BaseInfo.h
llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:481
MachineBasicBlock.h
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:770
APInt.h
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:749
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:316
Shift
bool Shift
Definition: README.txt:468
getCompareCC
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
Definition: AArch64FastISel.cpp:2193
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:299
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:475
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
DenseMap.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::AArch64RegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: AArch64RegisterInfo.cpp:410
llvm::CC_AArch64_GHC
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:295
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:46
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Operator.h
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:752
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::CallBase::getNumArgOperands
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1339
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:726
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition: RuntimeDyld.cpp:170
llvm::ARCISD::BL
@ BL
Definition: ARCISelLowering.h:34
F
#define F(x, y, z)
Definition: MD5.cpp:56
MachineRegisterInfo.h
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:146
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:124
MachineValueType.h
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:247
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:736
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
Instruction.h
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:117
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:226
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:91
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:40
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::codeview::EncodedFramePtrReg::FramePtr
@ FramePtr
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:39
GlobalValue.h
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:147
llvm::AArch64CC::VC
@ VC
Definition: AArch64BaseInfo.h:243
Constants.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:69
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
isZExtLoad
static bool isZExtLoad(const MachineInstr *LI)
Definition: AArch64FastISel.cpp:4370
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
FunctionLoweringInfo.h
llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:275
llvm::AArch64Subtarget::isLittleEndian
bool isLittleEndian() const
Definition: AArch64Subtarget.h:479
llvm::CC_AArch64_DarwinPCS
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::User
Definition: User.h:44
llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition: AArch64Subtarget.h:492
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:748
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:53
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
llvm::CC_AArch64_WebKit_JS
bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
MCSymbol.h
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:735
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:720
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:302
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:238
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MVT::isVoid
@ isVoid
Definition: MachineValueType.h:249
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::AArch64_AM::SXTB
@ SXTB
Definition: AArch64AddressingModes.h:45
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:732
llvm::AArch64_AM::LSR
@ LSR
Definition: AArch64AddressingModes.h:35
llvm::Instruction
Definition: Instruction.h:45
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::RetCC_AArch64_AAPCS
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::CC_AArch64_AAPCS
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:98
llvm::AArch64_AM::SXTH
@ SXTH
Definition: AArch64AddressingModes.h:46
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:724
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1632
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:255
APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:249
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:241
llvm::CallingConv::Swift
@ Swift
Definition: CallingConv.h:73
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87