LLVM  15.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  unsigned Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  unsigned getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  unsigned RHSReg, bool SetFlags = false,
207  bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217  uint64_t ShiftImm, bool SetFlags = false,
218  bool WantResult = true);
219 
220  // Emit functions.
221  bool emitCompareAndBranch(const BranchInst *BI);
222  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227  MachineMemOperand *MMO = nullptr);
228  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231  MachineMemOperand *MMO = nullptr);
232  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235  bool SetFlags = false, bool WantResult = true,
236  bool IsZExt = false);
237  unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239  bool SetFlags = false, bool WantResult = true,
240  bool IsZExt = false);
241  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242  bool WantResult = true);
243  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245  bool WantResult = true);
246  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247  const Value *RHS);
248  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249  uint64_t Imm);
250  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  unsigned RHSReg, uint64_t ShiftImm);
252  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253  unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258  bool IsZExt = true);
259  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261  bool IsZExt = true);
262  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264  bool IsZExt = false);
265 
266  unsigned materializeInt(const ConstantInt *CI, MVT VT);
267  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268  unsigned materializeGV(const GlobalValue *GV);
269 
270  // Call handling routines.
271 private:
272  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274  unsigned &NumBytes);
275  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276 
277 public:
278  // Backend specific FastISel code.
279  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280  unsigned fastMaterializeConstant(const Constant *C) override;
281  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282 
283  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284  const TargetLibraryInfo *LibInfo)
285  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286  Subtarget =
287  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
288  Context = &FuncInfo.Fn->getContext();
289  }
290 
291  bool fastSelectInstruction(const Instruction *I) override;
292 
293 #include "AArch64GenFastISel.inc"
294 };
295 
296 } // end anonymous namespace
297 
298 /// Check if the sign-/zero-extend will be a noop.
299 static bool isIntExtFree(const Instruction *I) {
300  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301  "Unexpected integer extend instruction.");
302  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303  "Unexpected value type.");
304  bool IsZExt = isa<ZExtInst>(I);
305 
306  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307  if (LI->hasOneUse())
308  return true;
309 
310  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312  return true;
313 
314  return false;
315 }
316 
317 /// Determine the implicit scale factor that is applied by a memory
318 /// operation for a given value type.
319 static unsigned getImplicitScaleFactor(MVT VT) {
320  switch (VT.SimpleTy) {
321  default:
322  return 0; // invalid
323  case MVT::i1: // fall-through
324  case MVT::i8:
325  return 1;
326  case MVT::i16:
327  return 2;
328  case MVT::i32: // fall-through
329  case MVT::f32:
330  return 4;
331  case MVT::i64: // fall-through
332  case MVT::f64:
333  return 8;
334  }
335 }
336 
337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338  if (CC == CallingConv::WebKit_JS)
339  return CC_AArch64_WebKit_JS;
340  if (CC == CallingConv::GHC)
341  return CC_AArch64_GHC;
342  if (CC == CallingConv::CFGuard_Check)
344  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
345 }
346 
347 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
348  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
349  "Alloca should always return a pointer.");
350 
351  // Don't handle dynamic allocas.
352  if (!FuncInfo.StaticAllocaMap.count(AI))
353  return 0;
354 
356  FuncInfo.StaticAllocaMap.find(AI);
357 
358  if (SI != FuncInfo.StaticAllocaMap.end()) {
359  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
360  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
361  ResultReg)
362  .addFrameIndex(SI->second)
363  .addImm(0)
364  .addImm(0);
365  return ResultReg;
366  }
367 
368  return 0;
369 }
370 
371 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
372  if (VT > MVT::i64)
373  return 0;
374 
375  if (!CI->isZero())
376  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
377 
378  // Create a copy from the zero register to materialize a "0" value.
379  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
380  : &AArch64::GPR32RegClass;
381  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
382  Register ResultReg = createResultReg(RC);
383  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
384  ResultReg).addReg(ZeroReg, getKillRegState(true));
385  return ResultReg;
386 }
387 
388 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
389  // Positive zero (+0.0) has to be materialized with a fmov from the zero
390  // register, because the immediate version of fmov cannot encode zero.
391  if (CFP->isNullValue())
392  return fastMaterializeFloatZero(CFP);
393 
394  if (VT != MVT::f32 && VT != MVT::f64)
395  return 0;
396 
397  const APFloat Val = CFP->getValueAPF();
398  bool Is64Bit = (VT == MVT::f64);
399  // This checks to see if we can use FMOV instructions to materialize
400  // a constant, otherwise we have to materialize via the constant pool.
401  int Imm =
402  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
403  if (Imm != -1) {
404  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
405  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
406  }
407 
408  // For the large code model materialize the FP constant in code.
409  if (TM.getCodeModel() == CodeModel::Large) {
410  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
411  const TargetRegisterClass *RC = Is64Bit ?
412  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
413 
414  Register TmpReg = createResultReg(RC);
415  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
417 
418  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
419  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
420  TII.get(TargetOpcode::COPY), ResultReg)
421  .addReg(TmpReg, getKillRegState(true));
422 
423  return ResultReg;
424  }
425 
426  // Materialize via constant pool. MachineConstantPool wants an explicit
427  // alignment.
428  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
429 
430  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
431  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
432  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
433  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
434 
435  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
436  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
437  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
438  .addReg(ADRPReg)
440  return ResultReg;
441 }
442 
443 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
444  // We can't handle thread-local variables quickly yet.
445  if (GV->isThreadLocal())
446  return 0;
447 
448  // MachO still uses GOT for large code-model accesses, but ELF requires
449  // movz/movk sequences, which FastISel doesn't handle yet.
450  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
451  return 0;
452 
453  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
454 
455  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
456  if (!DestEVT.isSimple())
457  return 0;
458 
459  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
460  unsigned ResultReg;
461 
462  if (OpFlags & AArch64II::MO_GOT) {
463  // ADRP + LDRX
464  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
465  ADRPReg)
466  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
467 
468  unsigned LdrOpc;
469  if (Subtarget->isTargetILP32()) {
470  ResultReg = createResultReg(&AArch64::GPR32RegClass);
471  LdrOpc = AArch64::LDRWui;
472  } else {
473  ResultReg = createResultReg(&AArch64::GPR64RegClass);
474  LdrOpc = AArch64::LDRXui;
475  }
476  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
477  ResultReg)
478  .addReg(ADRPReg)
480  AArch64II::MO_NC | OpFlags);
481  if (!Subtarget->isTargetILP32())
482  return ResultReg;
483 
484  // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
485  // so we must extend the result on ILP32.
486  Register Result64 = createResultReg(&AArch64::GPR64RegClass);
487  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
488  TII.get(TargetOpcode::SUBREG_TO_REG))
489  .addDef(Result64)
490  .addImm(0)
491  .addReg(ResultReg, RegState::Kill)
492  .addImm(AArch64::sub_32);
493  return Result64;
494  } else {
495  // ADRP + ADDX
496  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
497  ADRPReg)
498  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
499 
500  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
501  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
502  ResultReg)
503  .addReg(ADRPReg)
504  .addGlobalAddress(GV, 0,
506  .addImm(0);
507  }
508  return ResultReg;
509 }
510 
511 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
512  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
513 
514  // Only handle simple types.
515  if (!CEVT.isSimple())
516  return 0;
517  MVT VT = CEVT.getSimpleVT();
518  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
519  // 'null' pointers need to have a somewhat special treatment.
520  if (isa<ConstantPointerNull>(C)) {
521  assert(VT == MVT::i64 && "Expected 64-bit pointers");
522  return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
523  }
524 
525  if (const auto *CI = dyn_cast<ConstantInt>(C))
526  return materializeInt(CI, VT);
527  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
528  return materializeFP(CFP, VT);
529  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
530  return materializeGV(GV);
531 
532  return 0;
533 }
534 
535 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
536  assert(CFP->isNullValue() &&
537  "Floating-point constant is not a positive zero.");
538  MVT VT;
539  if (!isTypeLegal(CFP->getType(), VT))
540  return 0;
541 
542  if (VT != MVT::f32 && VT != MVT::f64)
543  return 0;
544 
545  bool Is64Bit = (VT == MVT::f64);
546  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
547  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
548  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
549 }
550 
551 /// Check if the multiply is by a power-of-2 constant.
552 static bool isMulPowOf2(const Value *I) {
553  if (const auto *MI = dyn_cast<MulOperator>(I)) {
554  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
555  if (C->getValue().isPowerOf2())
556  return true;
557  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
558  if (C->getValue().isPowerOf2())
559  return true;
560  }
561  return false;
562 }
563 
564 // Computes the address to get to an object.
565 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
566 {
567  const User *U = nullptr;
568  unsigned Opcode = Instruction::UserOp1;
569  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
570  // Don't walk into other basic blocks unless the object is an alloca from
571  // another block, otherwise it may not have a virtual register assigned.
572  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
573  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
574  Opcode = I->getOpcode();
575  U = I;
576  }
577  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
578  Opcode = C->getOpcode();
579  U = C;
580  }
581 
582  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
583  if (Ty->getAddressSpace() > 255)
584  // Fast instruction selection doesn't support the special
585  // address spaces.
586  return false;
587 
588  switch (Opcode) {
589  default:
590  break;
591  case Instruction::BitCast:
592  // Look through bitcasts.
593  return computeAddress(U->getOperand(0), Addr, Ty);
594 
595  case Instruction::IntToPtr:
596  // Look past no-op inttoptrs.
597  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
598  TLI.getPointerTy(DL))
599  return computeAddress(U->getOperand(0), Addr, Ty);
600  break;
601 
602  case Instruction::PtrToInt:
603  // Look past no-op ptrtoints.
604  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
605  return computeAddress(U->getOperand(0), Addr, Ty);
606  break;
607 
608  case Instruction::GetElementPtr: {
609  Address SavedAddr = Addr;
610  uint64_t TmpOffset = Addr.getOffset();
611 
612  // Iterate through the GEP folding the constants into offsets where
613  // we can.
614  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
615  GTI != E; ++GTI) {
616  const Value *Op = GTI.getOperand();
617  if (StructType *STy = GTI.getStructTypeOrNull()) {
618  const StructLayout *SL = DL.getStructLayout(STy);
619  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
620  TmpOffset += SL->getElementOffset(Idx);
621  } else {
622  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
623  while (true) {
624  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
625  // Constant-offset addressing.
626  TmpOffset += CI->getSExtValue() * S;
627  break;
628  }
629  if (canFoldAddIntoGEP(U, Op)) {
630  // A compatible add with a constant operand. Fold the constant.
631  ConstantInt *CI =
632  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
633  TmpOffset += CI->getSExtValue() * S;
634  // Iterate on the other operand.
635  Op = cast<AddOperator>(Op)->getOperand(0);
636  continue;
637  }
638  // Unsupported
639  goto unsupported_gep;
640  }
641  }
642  }
643 
644  // Try to grab the base operand now.
645  Addr.setOffset(TmpOffset);
646  if (computeAddress(U->getOperand(0), Addr, Ty))
647  return true;
648 
649  // We failed, restore everything and try the other options.
650  Addr = SavedAddr;
651 
652  unsupported_gep:
653  break;
654  }
655  case Instruction::Alloca: {
656  const AllocaInst *AI = cast<AllocaInst>(Obj);
658  FuncInfo.StaticAllocaMap.find(AI);
659  if (SI != FuncInfo.StaticAllocaMap.end()) {
660  Addr.setKind(Address::FrameIndexBase);
661  Addr.setFI(SI->second);
662  return true;
663  }
664  break;
665  }
666  case Instruction::Add: {
667  // Adds of constants are common and easy enough.
668  const Value *LHS = U->getOperand(0);
669  const Value *RHS = U->getOperand(1);
670 
671  if (isa<ConstantInt>(LHS))
672  std::swap(LHS, RHS);
673 
674  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
676  return computeAddress(LHS, Addr, Ty);
677  }
678 
679  Address Backup = Addr;
680  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
681  return true;
682  Addr = Backup;
683 
684  break;
685  }
686  case Instruction::Sub: {
687  // Subs of constants are common and easy enough.
688  const Value *LHS = U->getOperand(0);
689  const Value *RHS = U->getOperand(1);
690 
691  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
692  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
693  return computeAddress(LHS, Addr, Ty);
694  }
695  break;
696  }
697  case Instruction::Shl: {
698  if (Addr.getOffsetReg())
699  break;
700 
701  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
702  if (!CI)
703  break;
704 
705  unsigned Val = CI->getZExtValue();
706  if (Val < 1 || Val > 3)
707  break;
708 
709  uint64_t NumBytes = 0;
710  if (Ty && Ty->isSized()) {
711  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
712  NumBytes = NumBits / 8;
713  if (!isPowerOf2_64(NumBits))
714  NumBytes = 0;
715  }
716 
717  if (NumBytes != (1ULL << Val))
718  break;
719 
720  Addr.setShift(Val);
721  Addr.setExtendType(AArch64_AM::LSL);
722 
723  const Value *Src = U->getOperand(0);
724  if (const auto *I = dyn_cast<Instruction>(Src)) {
725  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
726  // Fold the zext or sext when it won't become a noop.
727  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
728  if (!isIntExtFree(ZE) &&
729  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
730  Addr.setExtendType(AArch64_AM::UXTW);
731  Src = ZE->getOperand(0);
732  }
733  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
734  if (!isIntExtFree(SE) &&
735  SE->getOperand(0)->getType()->isIntegerTy(32)) {
736  Addr.setExtendType(AArch64_AM::SXTW);
737  Src = SE->getOperand(0);
738  }
739  }
740  }
741  }
742 
743  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
744  if (AI->getOpcode() == Instruction::And) {
745  const Value *LHS = AI->getOperand(0);
746  const Value *RHS = AI->getOperand(1);
747 
748  if (const auto *C = dyn_cast<ConstantInt>(LHS))
749  if (C->getValue() == 0xffffffff)
750  std::swap(LHS, RHS);
751 
752  if (const auto *C = dyn_cast<ConstantInt>(RHS))
753  if (C->getValue() == 0xffffffff) {
754  Addr.setExtendType(AArch64_AM::UXTW);
755  Register Reg = getRegForValue(LHS);
756  if (!Reg)
757  return false;
758  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
759  Addr.setOffsetReg(Reg);
760  return true;
761  }
762  }
763 
764  Register Reg = getRegForValue(Src);
765  if (!Reg)
766  return false;
767  Addr.setOffsetReg(Reg);
768  return true;
769  }
770  case Instruction::Mul: {
771  if (Addr.getOffsetReg())
772  break;
773 
774  if (!isMulPowOf2(U))
775  break;
776 
777  const Value *LHS = U->getOperand(0);
778  const Value *RHS = U->getOperand(1);
779 
780  // Canonicalize power-of-2 value to the RHS.
781  if (const auto *C = dyn_cast<ConstantInt>(LHS))
782  if (C->getValue().isPowerOf2())
783  std::swap(LHS, RHS);
784 
785  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
786  const auto *C = cast<ConstantInt>(RHS);
787  unsigned Val = C->getValue().logBase2();
788  if (Val < 1 || Val > 3)
789  break;
790 
791  uint64_t NumBytes = 0;
792  if (Ty && Ty->isSized()) {
793  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
794  NumBytes = NumBits / 8;
795  if (!isPowerOf2_64(NumBits))
796  NumBytes = 0;
797  }
798 
799  if (NumBytes != (1ULL << Val))
800  break;
801 
802  Addr.setShift(Val);
803  Addr.setExtendType(AArch64_AM::LSL);
804 
805  const Value *Src = LHS;
806  if (const auto *I = dyn_cast<Instruction>(Src)) {
807  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
808  // Fold the zext or sext when it won't become a noop.
809  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
810  if (!isIntExtFree(ZE) &&
811  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
812  Addr.setExtendType(AArch64_AM::UXTW);
813  Src = ZE->getOperand(0);
814  }
815  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
816  if (!isIntExtFree(SE) &&
817  SE->getOperand(0)->getType()->isIntegerTy(32)) {
818  Addr.setExtendType(AArch64_AM::SXTW);
819  Src = SE->getOperand(0);
820  }
821  }
822  }
823  }
824 
825  Register Reg = getRegForValue(Src);
826  if (!Reg)
827  return false;
828  Addr.setOffsetReg(Reg);
829  return true;
830  }
831  case Instruction::And: {
832  if (Addr.getOffsetReg())
833  break;
834 
835  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
836  break;
837 
838  const Value *LHS = U->getOperand(0);
839  const Value *RHS = U->getOperand(1);
840 
841  if (const auto *C = dyn_cast<ConstantInt>(LHS))
842  if (C->getValue() == 0xffffffff)
843  std::swap(LHS, RHS);
844 
845  if (const auto *C = dyn_cast<ConstantInt>(RHS))
846  if (C->getValue() == 0xffffffff) {
847  Addr.setShift(0);
848  Addr.setExtendType(AArch64_AM::LSL);
849  Addr.setExtendType(AArch64_AM::UXTW);
850 
851  Register Reg = getRegForValue(LHS);
852  if (!Reg)
853  return false;
854  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
855  Addr.setOffsetReg(Reg);
856  return true;
857  }
858  break;
859  }
860  case Instruction::SExt:
861  case Instruction::ZExt: {
862  if (!Addr.getReg() || Addr.getOffsetReg())
863  break;
864 
865  const Value *Src = nullptr;
866  // Fold the zext or sext when it won't become a noop.
867  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
868  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
869  Addr.setExtendType(AArch64_AM::UXTW);
870  Src = ZE->getOperand(0);
871  }
872  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
873  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
874  Addr.setExtendType(AArch64_AM::SXTW);
875  Src = SE->getOperand(0);
876  }
877  }
878 
879  if (!Src)
880  break;
881 
882  Addr.setShift(0);
883  Register Reg = getRegForValue(Src);
884  if (!Reg)
885  return false;
886  Addr.setOffsetReg(Reg);
887  return true;
888  }
889  } // end switch
890 
891  if (Addr.isRegBase() && !Addr.getReg()) {
892  Register Reg = getRegForValue(Obj);
893  if (!Reg)
894  return false;
895  Addr.setReg(Reg);
896  return true;
897  }
898 
899  if (!Addr.getOffsetReg()) {
900  Register Reg = getRegForValue(Obj);
901  if (!Reg)
902  return false;
903  Addr.setOffsetReg(Reg);
904  return true;
905  }
906 
907  return false;
908 }
909 
910 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
911  const User *U = nullptr;
912  unsigned Opcode = Instruction::UserOp1;
913  bool InMBB = true;
914 
915  if (const auto *I = dyn_cast<Instruction>(V)) {
916  Opcode = I->getOpcode();
917  U = I;
918  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
919  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
920  Opcode = C->getOpcode();
921  U = C;
922  }
923 
924  switch (Opcode) {
925  default: break;
926  case Instruction::BitCast:
927  // Look past bitcasts if its operand is in the same BB.
928  if (InMBB)
929  return computeCallAddress(U->getOperand(0), Addr);
930  break;
931  case Instruction::IntToPtr:
932  // Look past no-op inttoptrs if its operand is in the same BB.
933  if (InMBB &&
934  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
935  TLI.getPointerTy(DL))
936  return computeCallAddress(U->getOperand(0), Addr);
937  break;
938  case Instruction::PtrToInt:
939  // Look past no-op ptrtoints if its operand is in the same BB.
940  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
941  return computeCallAddress(U->getOperand(0), Addr);
942  break;
943  }
944 
945  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
946  Addr.setGlobalValue(GV);
947  return true;
948  }
949 
950  // If all else fails, try to materialize the value in a register.
951  if (!Addr.getGlobalValue()) {
952  Addr.setReg(getRegForValue(V));
953  return Addr.getReg() != 0;
954  }
955 
956  return false;
957 }
958 
959 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
960  EVT evt = TLI.getValueType(DL, Ty, true);
961 
962  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
963  return false;
964 
965  // Only handle simple types.
966  if (evt == MVT::Other || !evt.isSimple())
967  return false;
968  VT = evt.getSimpleVT();
969 
970  // This is a legal type, but it's not something we handle in fast-isel.
971  if (VT == MVT::f128)
972  return false;
973 
974  // Handle all other legal types, i.e. a register that will directly hold this
975  // value.
976  return TLI.isTypeLegal(VT);
977 }
978 
979 /// Determine if the value type is supported by FastISel.
980 ///
981 /// FastISel for AArch64 can handle more value types than are legal. This adds
982 /// simple value type such as i1, i8, and i16.
983 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
984  if (Ty->isVectorTy() && !IsVectorAllowed)
985  return false;
986 
987  if (isTypeLegal(Ty, VT))
988  return true;
989 
990  // If this is a type than can be sign or zero-extended to a basic operation
991  // go ahead and accept it now.
992  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
993  return true;
994 
995  return false;
996 }
997 
998 bool AArch64FastISel::isValueAvailable(const Value *V) const {
999  if (!isa<Instruction>(V))
1000  return true;
1001 
1002  const auto *I = cast<Instruction>(V);
1003  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1004 }
1005 
1006 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1007  if (Subtarget->isTargetILP32())
1008  return false;
1009 
1010  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1011  if (!ScaleFactor)
1012  return false;
1013 
1014  bool ImmediateOffsetNeedsLowering = false;
1015  bool RegisterOffsetNeedsLowering = false;
1016  int64_t Offset = Addr.getOffset();
1017  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1018  ImmediateOffsetNeedsLowering = true;
1019  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1020  !isUInt<12>(Offset / ScaleFactor))
1021  ImmediateOffsetNeedsLowering = true;
1022 
1023  // Cannot encode an offset register and an immediate offset in the same
1024  // instruction. Fold the immediate offset into the load/store instruction and
1025  // emit an additional add to take care of the offset register.
1026  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1027  RegisterOffsetNeedsLowering = true;
1028 
1029  // Cannot encode zero register as base.
1030  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1031  RegisterOffsetNeedsLowering = true;
1032 
1033  // If this is a stack pointer and the offset needs to be simplified then put
1034  // the alloca address into a register, set the base type back to register and
1035  // continue. This should almost never happen.
1036  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1037  {
1038  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1039  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1040  ResultReg)
1041  .addFrameIndex(Addr.getFI())
1042  .addImm(0)
1043  .addImm(0);
1044  Addr.setKind(Address::RegBase);
1045  Addr.setReg(ResultReg);
1046  }
1047 
1048  if (RegisterOffsetNeedsLowering) {
1049  unsigned ResultReg = 0;
1050  if (Addr.getReg()) {
1051  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1052  Addr.getExtendType() == AArch64_AM::UXTW )
1053  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1054  Addr.getOffsetReg(), Addr.getExtendType(),
1055  Addr.getShift());
1056  else
1057  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1058  Addr.getOffsetReg(), AArch64_AM::LSL,
1059  Addr.getShift());
1060  } else {
1061  if (Addr.getExtendType() == AArch64_AM::UXTW)
1062  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1063  Addr.getShift(), /*IsZExt=*/true);
1064  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1065  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1066  Addr.getShift(), /*IsZExt=*/false);
1067  else
1068  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1069  Addr.getShift());
1070  }
1071  if (!ResultReg)
1072  return false;
1073 
1074  Addr.setReg(ResultReg);
1075  Addr.setOffsetReg(0);
1076  Addr.setShift(0);
1077  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1078  }
1079 
1080  // Since the offset is too large for the load/store instruction get the
1081  // reg+offset into a register.
1082  if (ImmediateOffsetNeedsLowering) {
1083  unsigned ResultReg;
1084  if (Addr.getReg())
1085  // Try to fold the immediate into the add instruction.
1086  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1087  else
1088  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1089 
1090  if (!ResultReg)
1091  return false;
1092  Addr.setReg(ResultReg);
1093  Addr.setOffset(0);
1094  }
1095  return true;
1096 }
1097 
1098 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1099  const MachineInstrBuilder &MIB,
1101  unsigned ScaleFactor,
1102  MachineMemOperand *MMO) {
1103  int64_t Offset = Addr.getOffset() / ScaleFactor;
1104  // Frame base works a bit differently. Handle it separately.
1105  if (Addr.isFIBase()) {
1106  int FI = Addr.getFI();
1107  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1108  // and alignment should be based on the VT.
1109  MMO = FuncInfo.MF->getMachineMemOperand(
1110  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1111  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1112  // Now add the rest of the operands.
1113  MIB.addFrameIndex(FI).addImm(Offset);
1114  } else {
1115  assert(Addr.isRegBase() && "Unexpected address kind.");
1116  const MCInstrDesc &II = MIB->getDesc();
1117  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1118  Addr.setReg(
1119  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1120  Addr.setOffsetReg(
1121  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1122  if (Addr.getOffsetReg()) {
1123  assert(Addr.getOffset() == 0 && "Unexpected offset");
1124  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1125  Addr.getExtendType() == AArch64_AM::SXTX;
1126  MIB.addReg(Addr.getReg());
1127  MIB.addReg(Addr.getOffsetReg());
1128  MIB.addImm(IsSigned);
1129  MIB.addImm(Addr.getShift() != 0);
1130  } else
1131  MIB.addReg(Addr.getReg()).addImm(Offset);
1132  }
1133 
1134  if (MMO)
1135  MIB.addMemOperand(MMO);
1136 }
1137 
1138 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1139  const Value *RHS, bool SetFlags,
1140  bool WantResult, bool IsZExt) {
1142  bool NeedExtend = false;
1143  switch (RetVT.SimpleTy) {
1144  default:
1145  return 0;
1146  case MVT::i1:
1147  NeedExtend = true;
1148  break;
1149  case MVT::i8:
1150  NeedExtend = true;
1151  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1152  break;
1153  case MVT::i16:
1154  NeedExtend = true;
1155  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1156  break;
1157  case MVT::i32: // fall-through
1158  case MVT::i64:
1159  break;
1160  }
1161  MVT SrcVT = RetVT;
1162  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1163 
1164  // Canonicalize immediates to the RHS first.
1165  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1166  std::swap(LHS, RHS);
1167 
1168  // Canonicalize mul by power of 2 to the RHS.
1169  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1170  if (isMulPowOf2(LHS))
1171  std::swap(LHS, RHS);
1172 
1173  // Canonicalize shift immediate to the RHS.
1174  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1175  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1176  if (isa<ConstantInt>(SI->getOperand(1)))
1177  if (SI->getOpcode() == Instruction::Shl ||
1178  SI->getOpcode() == Instruction::LShr ||
1179  SI->getOpcode() == Instruction::AShr )
1180  std::swap(LHS, RHS);
1181 
1182  Register LHSReg = getRegForValue(LHS);
1183  if (!LHSReg)
1184  return 0;
1185 
1186  if (NeedExtend)
1187  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1188 
1189  unsigned ResultReg = 0;
1190  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1191  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1192  if (C->isNegative())
1193  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1194  WantResult);
1195  else
1196  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1197  WantResult);
1198  } else if (const auto *C = dyn_cast<Constant>(RHS))
1199  if (C->isNullValue())
1200  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1201 
1202  if (ResultReg)
1203  return ResultReg;
1204 
1205  // Only extend the RHS within the instruction if there is a valid extend type.
1206  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1207  isValueAvailable(RHS)) {
1208  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1209  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1210  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1211  Register RHSReg = getRegForValue(SI->getOperand(0));
1212  if (!RHSReg)
1213  return 0;
1214  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1215  C->getZExtValue(), SetFlags, WantResult);
1216  }
1217  Register RHSReg = getRegForValue(RHS);
1218  if (!RHSReg)
1219  return 0;
1220  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1221  SetFlags, WantResult);
1222  }
1223 
1224  // Check if the mul can be folded into the instruction.
1225  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1226  if (isMulPowOf2(RHS)) {
1227  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1228  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1229 
1230  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1231  if (C->getValue().isPowerOf2())
1232  std::swap(MulLHS, MulRHS);
1233 
1234  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1235  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1236  Register RHSReg = getRegForValue(MulLHS);
1237  if (!RHSReg)
1238  return 0;
1239  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1240  ShiftVal, SetFlags, WantResult);
1241  if (ResultReg)
1242  return ResultReg;
1243  }
1244  }
1245 
1246  // Check if the shift can be folded into the instruction.
1247  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1248  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1249  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1251  switch (SI->getOpcode()) {
1252  default: break;
1253  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1254  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1255  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1256  }
1257  uint64_t ShiftVal = C->getZExtValue();
1258  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1259  Register RHSReg = getRegForValue(SI->getOperand(0));
1260  if (!RHSReg)
1261  return 0;
1262  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1263  ShiftVal, SetFlags, WantResult);
1264  if (ResultReg)
1265  return ResultReg;
1266  }
1267  }
1268  }
1269  }
1270 
1271  Register RHSReg = getRegForValue(RHS);
1272  if (!RHSReg)
1273  return 0;
1274 
1275  if (NeedExtend)
1276  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1277 
1278  return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1279 }
1280 
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282  unsigned RHSReg, bool SetFlags,
1283  bool WantResult) {
1284  assert(LHSReg && RHSReg && "Invalid register number.");
1285 
1286  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1287  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1288  return 0;
1289 
1290  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1291  return 0;
1292 
1293  static const unsigned OpcTable[2][2][2] = {
1294  { { AArch64::SUBWrr, AArch64::SUBXrr },
1295  { AArch64::ADDWrr, AArch64::ADDXrr } },
1296  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1297  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1298  };
1299  bool Is64Bit = RetVT == MVT::i64;
1300  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1301  const TargetRegisterClass *RC =
1302  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1303  unsigned ResultReg;
1304  if (WantResult)
1305  ResultReg = createResultReg(RC);
1306  else
1307  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1308 
1309  const MCInstrDesc &II = TII.get(Opc);
1310  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1311  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1312  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1313  .addReg(LHSReg)
1314  .addReg(RHSReg);
1315  return ResultReg;
1316 }
1317 
1318 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1319  uint64_t Imm, bool SetFlags,
1320  bool WantResult) {
1321  assert(LHSReg && "Invalid register number.");
1322 
1323  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1324  return 0;
1325 
1326  unsigned ShiftImm;
1327  if (isUInt<12>(Imm))
1328  ShiftImm = 0;
1329  else if ((Imm & 0xfff000) == Imm) {
1330  ShiftImm = 12;
1331  Imm >>= 12;
1332  } else
1333  return 0;
1334 
1335  static const unsigned OpcTable[2][2][2] = {
1336  { { AArch64::SUBWri, AArch64::SUBXri },
1337  { AArch64::ADDWri, AArch64::ADDXri } },
1338  { { AArch64::SUBSWri, AArch64::SUBSXri },
1339  { AArch64::ADDSWri, AArch64::ADDSXri } }
1340  };
1341  bool Is64Bit = RetVT == MVT::i64;
1342  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1343  const TargetRegisterClass *RC;
1344  if (SetFlags)
1345  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1346  else
1347  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1348  unsigned ResultReg;
1349  if (WantResult)
1350  ResultReg = createResultReg(RC);
1351  else
1352  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1353 
1354  const MCInstrDesc &II = TII.get(Opc);
1355  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1356  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1357  .addReg(LHSReg)
1358  .addImm(Imm)
1359  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1360  return ResultReg;
1361 }
1362 
1363 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1364  unsigned RHSReg,
1365  AArch64_AM::ShiftExtendType ShiftType,
1366  uint64_t ShiftImm, bool SetFlags,
1367  bool WantResult) {
1368  assert(LHSReg && RHSReg && "Invalid register number.");
1369  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1370  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1371 
1372  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1373  return 0;
1374 
1375  // Don't deal with undefined shifts.
1376  if (ShiftImm >= RetVT.getSizeInBits())
1377  return 0;
1378 
1379  static const unsigned OpcTable[2][2][2] = {
1380  { { AArch64::SUBWrs, AArch64::SUBXrs },
1381  { AArch64::ADDWrs, AArch64::ADDXrs } },
1382  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1383  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1384  };
1385  bool Is64Bit = RetVT == MVT::i64;
1386  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1387  const TargetRegisterClass *RC =
1388  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1389  unsigned ResultReg;
1390  if (WantResult)
1391  ResultReg = createResultReg(RC);
1392  else
1393  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1394 
1395  const MCInstrDesc &II = TII.get(Opc);
1396  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1397  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1398  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1399  .addReg(LHSReg)
1400  .addReg(RHSReg)
1401  .addImm(getShifterImm(ShiftType, ShiftImm));
1402  return ResultReg;
1403 }
1404 
1405 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1406  unsigned RHSReg,
1408  uint64_t ShiftImm, bool SetFlags,
1409  bool WantResult) {
1410  assert(LHSReg && RHSReg && "Invalid register number.");
1411  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1412  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1413 
1414  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1415  return 0;
1416 
1417  if (ShiftImm >= 4)
1418  return 0;
1419 
1420  static const unsigned OpcTable[2][2][2] = {
1421  { { AArch64::SUBWrx, AArch64::SUBXrx },
1422  { AArch64::ADDWrx, AArch64::ADDXrx } },
1423  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1424  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1425  };
1426  bool Is64Bit = RetVT == MVT::i64;
1427  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1428  const TargetRegisterClass *RC = nullptr;
1429  if (SetFlags)
1430  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1431  else
1432  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1433  unsigned ResultReg;
1434  if (WantResult)
1435  ResultReg = createResultReg(RC);
1436  else
1437  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1438 
1439  const MCInstrDesc &II = TII.get(Opc);
1440  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1441  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1442  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1443  .addReg(LHSReg)
1444  .addReg(RHSReg)
1445  .addImm(getArithExtendImm(ExtType, ShiftImm));
1446  return ResultReg;
1447 }
1448 
1449 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1450  Type *Ty = LHS->getType();
1451  EVT EVT = TLI.getValueType(DL, Ty, true);
1452  if (!EVT.isSimple())
1453  return false;
1454  MVT VT = EVT.getSimpleVT();
1455 
1456  switch (VT.SimpleTy) {
1457  default:
1458  return false;
1459  case MVT::i1:
1460  case MVT::i8:
1461  case MVT::i16:
1462  case MVT::i32:
1463  case MVT::i64:
1464  return emitICmp(VT, LHS, RHS, IsZExt);
1465  case MVT::f32:
1466  case MVT::f64:
1467  return emitFCmp(VT, LHS, RHS);
1468  }
1469 }
1470 
1471 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1472  bool IsZExt) {
1473  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1474  IsZExt) != 0;
1475 }
1476 
1477 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1478  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1479  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480 }
1481 
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484  return false;
1485 
1486  // Check to see if the 2nd operand is a constant that we can encode directly
1487  // in the compare.
1488  bool UseImm = false;
1489  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490  if (CFP->isZero() && !CFP->isNegative())
1491  UseImm = true;
1492 
1493  Register LHSReg = getRegForValue(LHS);
1494  if (!LHSReg)
1495  return false;
1496 
1497  if (UseImm) {
1498  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1500  .addReg(LHSReg);
1501  return true;
1502  }
1503 
1504  Register RHSReg = getRegForValue(RHS);
1505  if (!RHSReg)
1506  return false;
1507 
1508  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1509  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1510  .addReg(LHSReg)
1511  .addReg(RHSReg);
1512  return true;
1513 }
1514 
1515 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1516  bool SetFlags, bool WantResult, bool IsZExt) {
1517  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1518  IsZExt);
1519 }
1520 
1521 /// This method is a wrapper to simplify add emission.
1522 ///
1523 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1524 /// that fails, then try to materialize the immediate into a register and use
1525 /// emitAddSub_rr instead.
1526 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1527  unsigned ResultReg;
1528  if (Imm < 0)
1529  ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1530  else
1531  ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1532 
1533  if (ResultReg)
1534  return ResultReg;
1535 
1536  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1537  if (!CReg)
1538  return 0;
1539 
1540  ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1541  return ResultReg;
1542 }
1543 
1544 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1545  bool SetFlags, bool WantResult, bool IsZExt) {
1546  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1547  IsZExt);
1548 }
1549 
1550 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1551  unsigned RHSReg, bool WantResult) {
1552  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1553  /*SetFlags=*/true, WantResult);
1554 }
1555 
1556 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1557  unsigned RHSReg,
1558  AArch64_AM::ShiftExtendType ShiftType,
1559  uint64_t ShiftImm, bool WantResult) {
1560  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1561  ShiftImm, /*SetFlags=*/true, WantResult);
1562 }
1563 
1564 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1565  const Value *LHS, const Value *RHS) {
1566  // Canonicalize immediates to the RHS first.
1567  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1568  std::swap(LHS, RHS);
1569 
1570  // Canonicalize mul by power-of-2 to the RHS.
1571  if (LHS->hasOneUse() && isValueAvailable(LHS))
1572  if (isMulPowOf2(LHS))
1573  std::swap(LHS, RHS);
1574 
1575  // Canonicalize shift immediate to the RHS.
1576  if (LHS->hasOneUse() && isValueAvailable(LHS))
1577  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1578  if (isa<ConstantInt>(SI->getOperand(1)))
1579  std::swap(LHS, RHS);
1580 
1581  Register LHSReg = getRegForValue(LHS);
1582  if (!LHSReg)
1583  return 0;
1584 
1585  unsigned ResultReg = 0;
1586  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1587  uint64_t Imm = C->getZExtValue();
1588  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1589  }
1590  if (ResultReg)
1591  return ResultReg;
1592 
1593  // Check if the mul can be folded into the instruction.
1594  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1595  if (isMulPowOf2(RHS)) {
1596  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1597  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1598 
1599  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1600  if (C->getValue().isPowerOf2())
1601  std::swap(MulLHS, MulRHS);
1602 
1603  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1604  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1605 
1606  Register RHSReg = getRegForValue(MulLHS);
1607  if (!RHSReg)
1608  return 0;
1609  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1610  if (ResultReg)
1611  return ResultReg;
1612  }
1613  }
1614 
1615  // Check if the shift can be folded into the instruction.
1616  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1617  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1618  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1619  uint64_t ShiftVal = C->getZExtValue();
1620  Register RHSReg = getRegForValue(SI->getOperand(0));
1621  if (!RHSReg)
1622  return 0;
1623  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1624  if (ResultReg)
1625  return ResultReg;
1626  }
1627  }
1628 
1629  Register RHSReg = getRegForValue(RHS);
1630  if (!RHSReg)
1631  return 0;
1632 
1633  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1634  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1635  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1636  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1637  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1638  }
1639  return ResultReg;
1640 }
1641 
1642 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1643  unsigned LHSReg, uint64_t Imm) {
1644  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1645  "ISD nodes are not consecutive!");
1646  static const unsigned OpcTable[3][2] = {
1647  { AArch64::ANDWri, AArch64::ANDXri },
1648  { AArch64::ORRWri, AArch64::ORRXri },
1649  { AArch64::EORWri, AArch64::EORXri }
1650  };
1651  const TargetRegisterClass *RC;
1652  unsigned Opc;
1653  unsigned RegSize;
1654  switch (RetVT.SimpleTy) {
1655  default:
1656  return 0;
1657  case MVT::i1:
1658  case MVT::i8:
1659  case MVT::i16:
1660  case MVT::i32: {
1661  unsigned Idx = ISDOpc - ISD::AND;
1662  Opc = OpcTable[Idx][0];
1663  RC = &AArch64::GPR32spRegClass;
1664  RegSize = 32;
1665  break;
1666  }
1667  case MVT::i64:
1668  Opc = OpcTable[ISDOpc - ISD::AND][1];
1669  RC = &AArch64::GPR64spRegClass;
1670  RegSize = 64;
1671  break;
1672  }
1673 
1675  return 0;
1676 
1677  Register ResultReg =
1678  fastEmitInst_ri(Opc, RC, LHSReg,
1680  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1681  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1682  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1683  }
1684  return ResultReg;
1685 }
1686 
1687 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1688  unsigned LHSReg, unsigned RHSReg,
1689  uint64_t ShiftImm) {
1690  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1691  "ISD nodes are not consecutive!");
1692  static const unsigned OpcTable[3][2] = {
1693  { AArch64::ANDWrs, AArch64::ANDXrs },
1694  { AArch64::ORRWrs, AArch64::ORRXrs },
1695  { AArch64::EORWrs, AArch64::EORXrs }
1696  };
1697 
1698  // Don't deal with undefined shifts.
1699  if (ShiftImm >= RetVT.getSizeInBits())
1700  return 0;
1701 
1702  const TargetRegisterClass *RC;
1703  unsigned Opc;
1704  switch (RetVT.SimpleTy) {
1705  default:
1706  return 0;
1707  case MVT::i1:
1708  case MVT::i8:
1709  case MVT::i16:
1710  case MVT::i32:
1711  Opc = OpcTable[ISDOpc - ISD::AND][0];
1712  RC = &AArch64::GPR32RegClass;
1713  break;
1714  case MVT::i64:
1715  Opc = OpcTable[ISDOpc - ISD::AND][1];
1716  RC = &AArch64::GPR64RegClass;
1717  break;
1718  }
1719  Register ResultReg =
1720  fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1722  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1723  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1724  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1725  }
1726  return ResultReg;
1727 }
1728 
1729 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1730  uint64_t Imm) {
1731  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1732 }
1733 
1734 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1735  bool WantZExt, MachineMemOperand *MMO) {
1736  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1737  return 0;
1738 
1739  // Simplify this down to something we can handle.
1740  if (!simplifyAddress(Addr, VT))
1741  return 0;
1742 
1743  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1744  if (!ScaleFactor)
1745  llvm_unreachable("Unexpected value type.");
1746 
1747  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1748  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1749  bool UseScaled = true;
1750  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1751  UseScaled = false;
1752  ScaleFactor = 1;
1753  }
1754 
1755  static const unsigned GPOpcTable[2][8][4] = {
1756  // Sign-extend.
1757  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1758  AArch64::LDURXi },
1759  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1760  AArch64::LDURXi },
1761  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1762  AArch64::LDRXui },
1763  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1764  AArch64::LDRXui },
1765  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1766  AArch64::LDRXroX },
1767  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1768  AArch64::LDRXroX },
1769  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1770  AArch64::LDRXroW },
1771  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1772  AArch64::LDRXroW }
1773  },
1774  // Zero-extend.
1775  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1776  AArch64::LDURXi },
1777  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1778  AArch64::LDURXi },
1779  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1780  AArch64::LDRXui },
1781  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1782  AArch64::LDRXui },
1783  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1784  AArch64::LDRXroX },
1785  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1786  AArch64::LDRXroX },
1787  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1788  AArch64::LDRXroW },
1789  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1790  AArch64::LDRXroW }
1791  }
1792  };
1793 
1794  static const unsigned FPOpcTable[4][2] = {
1795  { AArch64::LDURSi, AArch64::LDURDi },
1796  { AArch64::LDRSui, AArch64::LDRDui },
1797  { AArch64::LDRSroX, AArch64::LDRDroX },
1798  { AArch64::LDRSroW, AArch64::LDRDroW }
1799  };
1800 
1801  unsigned Opc;
1802  const TargetRegisterClass *RC;
1803  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1804  Addr.getOffsetReg();
1805  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1806  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1807  Addr.getExtendType() == AArch64_AM::SXTW)
1808  Idx++;
1809 
1810  bool IsRet64Bit = RetVT == MVT::i64;
1811  switch (VT.SimpleTy) {
1812  default:
1813  llvm_unreachable("Unexpected value type.");
1814  case MVT::i1: // Intentional fall-through.
1815  case MVT::i8:
1816  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1817  RC = (IsRet64Bit && !WantZExt) ?
1818  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1819  break;
1820  case MVT::i16:
1821  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1822  RC = (IsRet64Bit && !WantZExt) ?
1823  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1824  break;
1825  case MVT::i32:
1826  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1827  RC = (IsRet64Bit && !WantZExt) ?
1828  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1829  break;
1830  case MVT::i64:
1831  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1832  RC = &AArch64::GPR64RegClass;
1833  break;
1834  case MVT::f32:
1835  Opc = FPOpcTable[Idx][0];
1836  RC = &AArch64::FPR32RegClass;
1837  break;
1838  case MVT::f64:
1839  Opc = FPOpcTable[Idx][1];
1840  RC = &AArch64::FPR64RegClass;
1841  break;
1842  }
1843 
1844  // Create the base instruction, then add the operands.
1845  Register ResultReg = createResultReg(RC);
1846  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1847  TII.get(Opc), ResultReg);
1848  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1849 
1850  // Loading an i1 requires special handling.
1851  if (VT == MVT::i1) {
1852  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1853  assert(ANDReg && "Unexpected AND instruction emission failure.");
1854  ResultReg = ANDReg;
1855  }
1856 
1857  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1858  // the 32bit reg to a 64bit reg.
1859  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1860  Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1861  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1862  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1863  .addImm(0)
1864  .addReg(ResultReg, getKillRegState(true))
1865  .addImm(AArch64::sub_32);
1866  ResultReg = Reg64;
1867  }
1868  return ResultReg;
1869 }
1870 
1871 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1872  MVT VT;
1873  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1874  return false;
1875 
1876  if (VT.isVector())
1877  return selectOperator(I, I->getOpcode());
1878 
1879  unsigned ResultReg;
1880  switch (I->getOpcode()) {
1881  default:
1882  llvm_unreachable("Unexpected instruction.");
1883  case Instruction::Add:
1884  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1885  break;
1886  case Instruction::Sub:
1887  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1888  break;
1889  }
1890  if (!ResultReg)
1891  return false;
1892 
1893  updateValueMap(I, ResultReg);
1894  return true;
1895 }
1896 
1897 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1898  MVT VT;
1899  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1900  return false;
1901 
1902  if (VT.isVector())
1903  return selectOperator(I, I->getOpcode());
1904 
1905  unsigned ResultReg;
1906  switch (I->getOpcode()) {
1907  default:
1908  llvm_unreachable("Unexpected instruction.");
1909  case Instruction::And:
1910  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1911  break;
1912  case Instruction::Or:
1913  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1914  break;
1915  case Instruction::Xor:
1916  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1917  break;
1918  }
1919  if (!ResultReg)
1920  return false;
1921 
1922  updateValueMap(I, ResultReg);
1923  return true;
1924 }
1925 
1926 bool AArch64FastISel::selectLoad(const Instruction *I) {
1927  MVT VT;
1928  // Verify we have a legal type before going any further. Currently, we handle
1929  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1930  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1931  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1932  cast<LoadInst>(I)->isAtomic())
1933  return false;
1934 
1935  const Value *SV = I->getOperand(0);
1936  if (TLI.supportSwiftError()) {
1937  // Swifterror values can come from either a function parameter with
1938  // swifterror attribute or an alloca with swifterror attribute.
1939  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1940  if (Arg->hasSwiftErrorAttr())
1941  return false;
1942  }
1943 
1944  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1945  if (Alloca->isSwiftError())
1946  return false;
1947  }
1948  }
1949 
1950  // See if we can handle this address.
1951  Address Addr;
1952  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1953  return false;
1954 
1955  // Fold the following sign-/zero-extend into the load instruction.
1956  bool WantZExt = true;
1957  MVT RetVT = VT;
1958  const Value *IntExtVal = nullptr;
1959  if (I->hasOneUse()) {
1960  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1961  if (isTypeSupported(ZE->getType(), RetVT))
1962  IntExtVal = ZE;
1963  else
1964  RetVT = VT;
1965  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1966  if (isTypeSupported(SE->getType(), RetVT))
1967  IntExtVal = SE;
1968  else
1969  RetVT = VT;
1970  WantZExt = false;
1971  }
1972  }
1973 
1974  unsigned ResultReg =
1975  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1976  if (!ResultReg)
1977  return false;
1978 
1979  // There are a few different cases we have to handle, because the load or the
1980  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1981  // SelectionDAG. There is also an ordering issue when both instructions are in
1982  // different basic blocks.
1983  // 1.) The load instruction is selected by FastISel, but the integer extend
1984  // not. This usually happens when the integer extend is in a different
1985  // basic block and SelectionDAG took over for that basic block.
1986  // 2.) The load instruction is selected before the integer extend. This only
1987  // happens when the integer extend is in a different basic block.
1988  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1989  // by FastISel. This happens if there are instructions between the load
1990  // and the integer extend that couldn't be selected by FastISel.
1991  if (IntExtVal) {
1992  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1993  // could select it. Emit a copy to subreg if necessary. FastISel will remove
1994  // it when it selects the integer extend.
1995  Register Reg = lookUpRegForValue(IntExtVal);
1996  auto *MI = MRI.getUniqueVRegDef(Reg);
1997  if (!MI) {
1998  if (RetVT == MVT::i64 && VT <= MVT::i32) {
1999  if (WantZExt) {
2000  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2001  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2002  ResultReg = std::prev(I)->getOperand(0).getReg();
2003  removeDeadCode(I, std::next(I));
2004  } else
2005  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2006  AArch64::sub_32);
2007  }
2008  updateValueMap(I, ResultReg);
2009  return true;
2010  }
2011 
2012  // The integer extend has already been emitted - delete all the instructions
2013  // that have been emitted by the integer extend lowering code and use the
2014  // result from the load instruction directly.
2015  while (MI) {
2016  Reg = 0;
2017  for (auto &Opnd : MI->uses()) {
2018  if (Opnd.isReg()) {
2019  Reg = Opnd.getReg();
2020  break;
2021  }
2022  }
2024  removeDeadCode(I, std::next(I));
2025  MI = nullptr;
2026  if (Reg)
2028  }
2029  updateValueMap(IntExtVal, ResultReg);
2030  return true;
2031  }
2032 
2033  updateValueMap(I, ResultReg);
2034  return true;
2035 }
2036 
2037 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2038  unsigned AddrReg,
2039  MachineMemOperand *MMO) {
2040  unsigned Opc;
2041  switch (VT.SimpleTy) {
2042  default: return false;
2043  case MVT::i8: Opc = AArch64::STLRB; break;
2044  case MVT::i16: Opc = AArch64::STLRH; break;
2045  case MVT::i32: Opc = AArch64::STLRW; break;
2046  case MVT::i64: Opc = AArch64::STLRX; break;
2047  }
2048 
2049  const MCInstrDesc &II = TII.get(Opc);
2050  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2051  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2052  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2053  .addReg(SrcReg)
2054  .addReg(AddrReg)
2055  .addMemOperand(MMO);
2056  return true;
2057 }
2058 
2059 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2060  MachineMemOperand *MMO) {
2061  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2062  return false;
2063 
2064  // Simplify this down to something we can handle.
2065  if (!simplifyAddress(Addr, VT))
2066  return false;
2067 
2068  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2069  if (!ScaleFactor)
2070  llvm_unreachable("Unexpected value type.");
2071 
2072  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2073  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2074  bool UseScaled = true;
2075  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2076  UseScaled = false;
2077  ScaleFactor = 1;
2078  }
2079 
2080  static const unsigned OpcTable[4][6] = {
2081  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2082  AArch64::STURSi, AArch64::STURDi },
2083  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2084  AArch64::STRSui, AArch64::STRDui },
2085  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2086  AArch64::STRSroX, AArch64::STRDroX },
2087  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2088  AArch64::STRSroW, AArch64::STRDroW }
2089  };
2090 
2091  unsigned Opc;
2092  bool VTIsi1 = false;
2093  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2094  Addr.getOffsetReg();
2095  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2096  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2097  Addr.getExtendType() == AArch64_AM::SXTW)
2098  Idx++;
2099 
2100  switch (VT.SimpleTy) {
2101  default: llvm_unreachable("Unexpected value type.");
2102  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2103  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2104  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2105  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2106  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2107  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2108  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2109  }
2110 
2111  // Storing an i1 requires special handling.
2112  if (VTIsi1 && SrcReg != AArch64::WZR) {
2113  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2114  assert(ANDReg && "Unexpected AND instruction emission failure.");
2115  SrcReg = ANDReg;
2116  }
2117  // Create the base instruction, then add the operands.
2118  const MCInstrDesc &II = TII.get(Opc);
2119  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2120  MachineInstrBuilder MIB =
2121  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2122  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2123 
2124  return true;
2125 }
2126 
2127 bool AArch64FastISel::selectStore(const Instruction *I) {
2128  MVT VT;
2129  const Value *Op0 = I->getOperand(0);
2130  // Verify we have a legal type before going any further. Currently, we handle
2131  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2132  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2133  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2134  return false;
2135 
2136  const Value *PtrV = I->getOperand(1);
2137  if (TLI.supportSwiftError()) {
2138  // Swifterror values can come from either a function parameter with
2139  // swifterror attribute or an alloca with swifterror attribute.
2140  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2141  if (Arg->hasSwiftErrorAttr())
2142  return false;
2143  }
2144 
2145  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2146  if (Alloca->isSwiftError())
2147  return false;
2148  }
2149  }
2150 
2151  // Get the value to be stored into a register. Use the zero register directly
2152  // when possible to avoid an unnecessary copy and a wasted register.
2153  unsigned SrcReg = 0;
2154  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2155  if (CI->isZero())
2156  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2157  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2158  if (CF->isZero() && !CF->isNegative()) {
2159  VT = MVT::getIntegerVT(VT.getSizeInBits());
2160  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2161  }
2162  }
2163 
2164  if (!SrcReg)
2165  SrcReg = getRegForValue(Op0);
2166 
2167  if (!SrcReg)
2168  return false;
2169 
2170  auto *SI = cast<StoreInst>(I);
2171 
2172  // Try to emit a STLR for seq_cst/release.
2173  if (SI->isAtomic()) {
2174  AtomicOrdering Ord = SI->getOrdering();
2175  // The non-atomic instructions are sufficient for relaxed stores.
2176  if (isReleaseOrStronger(Ord)) {
2177  // The STLR addressing mode only supports a base reg; pass that directly.
2178  Register AddrReg = getRegForValue(PtrV);
2179  return emitStoreRelease(VT, SrcReg, AddrReg,
2180  createMachineMemOperandFor(I));
2181  }
2182  }
2183 
2184  // See if we can handle this address.
2185  Address Addr;
2186  if (!computeAddress(PtrV, Addr, Op0->getType()))
2187  return false;
2188 
2189  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2190  return false;
2191  return true;
2192 }
2193 
2195  switch (Pred) {
2196  case CmpInst::FCMP_ONE:
2197  case CmpInst::FCMP_UEQ:
2198  default:
2199  // AL is our "false" for now. The other two need more compares.
2200  return AArch64CC::AL;
2201  case CmpInst::ICMP_EQ:
2202  case CmpInst::FCMP_OEQ:
2203  return AArch64CC::EQ;
2204  case CmpInst::ICMP_SGT:
2205  case CmpInst::FCMP_OGT:
2206  return AArch64CC::GT;
2207  case CmpInst::ICMP_SGE:
2208  case CmpInst::FCMP_OGE:
2209  return AArch64CC::GE;
2210  case CmpInst::ICMP_UGT:
2211  case CmpInst::FCMP_UGT:
2212  return AArch64CC::HI;
2213  case CmpInst::FCMP_OLT:
2214  return AArch64CC::MI;
2215  case CmpInst::ICMP_ULE:
2216  case CmpInst::FCMP_OLE:
2217  return AArch64CC::LS;
2218  case CmpInst::FCMP_ORD:
2219  return AArch64CC::VC;
2220  case CmpInst::FCMP_UNO:
2221  return AArch64CC::VS;
2222  case CmpInst::FCMP_UGE:
2223  return AArch64CC::PL;
2224  case CmpInst::ICMP_SLT:
2225  case CmpInst::FCMP_ULT:
2226  return AArch64CC::LT;
2227  case CmpInst::ICMP_SLE:
2228  case CmpInst::FCMP_ULE:
2229  return AArch64CC::LE;
2230  case CmpInst::FCMP_UNE:
2231  case CmpInst::ICMP_NE:
2232  return AArch64CC::NE;
2233  case CmpInst::ICMP_UGE:
2234  return AArch64CC::HS;
2235  case CmpInst::ICMP_ULT:
2236  return AArch64CC::LO;
2237  }
2238 }
2239 
2240 /// Try to emit a combined compare-and-branch instruction.
2241 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2242  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2243  // will not be produced, as they are conditional branch instructions that do
2244  // not set flags.
2245  if (FuncInfo.MF->getFunction().hasFnAttribute(
2246  Attribute::SpeculativeLoadHardening))
2247  return false;
2248 
2249  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2250  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2251  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2252 
2253  const Value *LHS = CI->getOperand(0);
2254  const Value *RHS = CI->getOperand(1);
2255 
2256  MVT VT;
2257  if (!isTypeSupported(LHS->getType(), VT))
2258  return false;
2259 
2260  unsigned BW = VT.getSizeInBits();
2261  if (BW > 64)
2262  return false;
2263 
2264  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2265  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2266 
2267  // Try to take advantage of fallthrough opportunities.
2268  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2269  std::swap(TBB, FBB);
2271  }
2272 
2273  int TestBit = -1;
2274  bool IsCmpNE;
2275  switch (Predicate) {
2276  default:
2277  return false;
2278  case CmpInst::ICMP_EQ:
2279  case CmpInst::ICMP_NE:
2280  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2281  std::swap(LHS, RHS);
2282 
2283  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2284  return false;
2285 
2286  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2287  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2288  const Value *AndLHS = AI->getOperand(0);
2289  const Value *AndRHS = AI->getOperand(1);
2290 
2291  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2292  if (C->getValue().isPowerOf2())
2293  std::swap(AndLHS, AndRHS);
2294 
2295  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2296  if (C->getValue().isPowerOf2()) {
2297  TestBit = C->getValue().logBase2();
2298  LHS = AndLHS;
2299  }
2300  }
2301 
2302  if (VT == MVT::i1)
2303  TestBit = 0;
2304 
2305  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2306  break;
2307  case CmpInst::ICMP_SLT:
2308  case CmpInst::ICMP_SGE:
2309  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2310  return false;
2311 
2312  TestBit = BW - 1;
2313  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2314  break;
2315  case CmpInst::ICMP_SGT:
2316  case CmpInst::ICMP_SLE:
2317  if (!isa<ConstantInt>(RHS))
2318  return false;
2319 
2320  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2321  return false;
2322 
2323  TestBit = BW - 1;
2324  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2325  break;
2326  } // end switch
2327 
2328  static const unsigned OpcTable[2][2][2] = {
2329  { {AArch64::CBZW, AArch64::CBZX },
2330  {AArch64::CBNZW, AArch64::CBNZX} },
2331  { {AArch64::TBZW, AArch64::TBZX },
2332  {AArch64::TBNZW, AArch64::TBNZX} }
2333  };
2334 
2335  bool IsBitTest = TestBit != -1;
2336  bool Is64Bit = BW == 64;
2337  if (TestBit < 32 && TestBit >= 0)
2338  Is64Bit = false;
2339 
2340  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2341  const MCInstrDesc &II = TII.get(Opc);
2342 
2343  Register SrcReg = getRegForValue(LHS);
2344  if (!SrcReg)
2345  return false;
2346 
2347  if (BW == 64 && !Is64Bit)
2348  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2349 
2350  if ((BW < 32) && !IsBitTest)
2351  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2352 
2353  // Emit the combined compare and branch instruction.
2354  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2355  MachineInstrBuilder MIB =
2356  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2357  .addReg(SrcReg);
2358  if (IsBitTest)
2359  MIB.addImm(TestBit);
2360  MIB.addMBB(TBB);
2361 
2362  finishCondBranch(BI->getParent(), TBB, FBB);
2363  return true;
2364 }
2365 
2366 bool AArch64FastISel::selectBranch(const Instruction *I) {
2367  const BranchInst *BI = cast<BranchInst>(I);
2368  if (BI->isUnconditional()) {
2369  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2370  fastEmitBranch(MSucc, BI->getDebugLoc());
2371  return true;
2372  }
2373 
2374  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2375  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2376 
2377  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2378  if (CI->hasOneUse() && isValueAvailable(CI)) {
2379  // Try to optimize or fold the cmp.
2380  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2381  switch (Predicate) {
2382  default:
2383  break;
2384  case CmpInst::FCMP_FALSE:
2385  fastEmitBranch(FBB, DbgLoc);
2386  return true;
2387  case CmpInst::FCMP_TRUE:
2388  fastEmitBranch(TBB, DbgLoc);
2389  return true;
2390  }
2391 
2392  // Try to emit a combined compare-and-branch first.
2393  if (emitCompareAndBranch(BI))
2394  return true;
2395 
2396  // Try to take advantage of fallthrough opportunities.
2397  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2398  std::swap(TBB, FBB);
2400  }
2401 
2402  // Emit the cmp.
2403  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2404  return false;
2405 
2406  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2407  // instruction.
2410  switch (Predicate) {
2411  default:
2412  break;
2413  case CmpInst::FCMP_UEQ:
2414  ExtraCC = AArch64CC::EQ;
2415  CC = AArch64CC::VS;
2416  break;
2417  case CmpInst::FCMP_ONE:
2418  ExtraCC = AArch64CC::MI;
2419  CC = AArch64CC::GT;
2420  break;
2421  }
2422  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2423 
2424  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2425  if (ExtraCC != AArch64CC::AL) {
2426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2427  .addImm(ExtraCC)
2428  .addMBB(TBB);
2429  }
2430 
2431  // Emit the branch.
2432  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2433  .addImm(CC)
2434  .addMBB(TBB);
2435 
2436  finishCondBranch(BI->getParent(), TBB, FBB);
2437  return true;
2438  }
2439  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2440  uint64_t Imm = CI->getZExtValue();
2441  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2442  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2443  .addMBB(Target);
2444 
2445  // Obtain the branch probability and add the target to the successor list.
2446  if (FuncInfo.BPI) {
2447  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2448  BI->getParent(), Target->getBasicBlock());
2449  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2450  } else
2451  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2452  return true;
2453  } else {
2455  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2456  // Fake request the condition, otherwise the intrinsic might be completely
2457  // optimized away.
2458  Register CondReg = getRegForValue(BI->getCondition());
2459  if (!CondReg)
2460  return false;
2461 
2462  // Emit the branch.
2463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2464  .addImm(CC)
2465  .addMBB(TBB);
2466 
2467  finishCondBranch(BI->getParent(), TBB, FBB);
2468  return true;
2469  }
2470  }
2471 
2472  Register CondReg = getRegForValue(BI->getCondition());
2473  if (CondReg == 0)
2474  return false;
2475 
2476  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2477  unsigned Opcode = AArch64::TBNZW;
2478  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2479  std::swap(TBB, FBB);
2480  Opcode = AArch64::TBZW;
2481  }
2482 
2483  const MCInstrDesc &II = TII.get(Opcode);
2484  Register ConstrainedCondReg
2485  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2487  .addReg(ConstrainedCondReg)
2488  .addImm(0)
2489  .addMBB(TBB);
2490 
2491  finishCondBranch(BI->getParent(), TBB, FBB);
2492  return true;
2493 }
2494 
2495 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2496  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2497  Register AddrReg = getRegForValue(BI->getOperand(0));
2498  if (AddrReg == 0)
2499  return false;
2500 
2501  // Emit the indirect branch.
2502  const MCInstrDesc &II = TII.get(AArch64::BR);
2503  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2505 
2506  // Make sure the CFG is up-to-date.
2507  for (auto *Succ : BI->successors())
2508  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2509 
2510  return true;
2511 }
2512 
2513 bool AArch64FastISel::selectCmp(const Instruction *I) {
2514  const CmpInst *CI = cast<CmpInst>(I);
2515 
2516  // Vectors of i1 are weird: bail out.
2517  if (CI->getType()->isVectorTy())
2518  return false;
2519 
2520  // Try to optimize or fold the cmp.
2521  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2522  unsigned ResultReg = 0;
2523  switch (Predicate) {
2524  default:
2525  break;
2526  case CmpInst::FCMP_FALSE:
2527  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2528  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2529  TII.get(TargetOpcode::COPY), ResultReg)
2530  .addReg(AArch64::WZR, getKillRegState(true));
2531  break;
2532  case CmpInst::FCMP_TRUE:
2533  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2534  break;
2535  }
2536 
2537  if (ResultReg) {
2538  updateValueMap(I, ResultReg);
2539  return true;
2540  }
2541 
2542  // Emit the cmp.
2543  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2544  return false;
2545 
2546  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2547 
2548  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2549  // condition codes are inverted, because they are used by CSINC.
2550  static unsigned CondCodeTable[2][2] = {
2553  };
2554  unsigned *CondCodes = nullptr;
2555  switch (Predicate) {
2556  default:
2557  break;
2558  case CmpInst::FCMP_UEQ:
2559  CondCodes = &CondCodeTable[0][0];
2560  break;
2561  case CmpInst::FCMP_ONE:
2562  CondCodes = &CondCodeTable[1][0];
2563  break;
2564  }
2565 
2566  if (CondCodes) {
2567  Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2568  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2569  TmpReg1)
2570  .addReg(AArch64::WZR, getKillRegState(true))
2571  .addReg(AArch64::WZR, getKillRegState(true))
2572  .addImm(CondCodes[0]);
2573  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2574  ResultReg)
2575  .addReg(TmpReg1, getKillRegState(true))
2576  .addReg(AArch64::WZR, getKillRegState(true))
2577  .addImm(CondCodes[1]);
2578 
2579  updateValueMap(I, ResultReg);
2580  return true;
2581  }
2582 
2583  // Now set a register based on the comparison.
2585  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2586  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2587  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2588  ResultReg)
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addReg(AArch64::WZR, getKillRegState(true))
2591  .addImm(invertedCC);
2592 
2593  updateValueMap(I, ResultReg);
2594  return true;
2595 }
2596 
2597 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2598 /// value.
2600  if (!SI->getType()->isIntegerTy(1))
2601  return false;
2602 
2603  const Value *Src1Val, *Src2Val;
2604  unsigned Opc = 0;
2605  bool NeedExtraOp = false;
2606  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2607  if (CI->isOne()) {
2608  Src1Val = SI->getCondition();
2609  Src2Val = SI->getFalseValue();
2610  Opc = AArch64::ORRWrr;
2611  } else {
2612  assert(CI->isZero());
2613  Src1Val = SI->getFalseValue();
2614  Src2Val = SI->getCondition();
2615  Opc = AArch64::BICWrr;
2616  }
2617  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2618  if (CI->isOne()) {
2619  Src1Val = SI->getCondition();
2620  Src2Val = SI->getTrueValue();
2621  Opc = AArch64::ORRWrr;
2622  NeedExtraOp = true;
2623  } else {
2624  assert(CI->isZero());
2625  Src1Val = SI->getCondition();
2626  Src2Val = SI->getTrueValue();
2627  Opc = AArch64::ANDWrr;
2628  }
2629  }
2630 
2631  if (!Opc)
2632  return false;
2633 
2634  Register Src1Reg = getRegForValue(Src1Val);
2635  if (!Src1Reg)
2636  return false;
2637 
2638  Register Src2Reg = getRegForValue(Src2Val);
2639  if (!Src2Reg)
2640  return false;
2641 
2642  if (NeedExtraOp)
2643  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2644 
2645  Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2646  Src2Reg);
2647  updateValueMap(SI, ResultReg);
2648  return true;
2649 }
2650 
2651 bool AArch64FastISel::selectSelect(const Instruction *I) {
2652  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2653  MVT VT;
2654  if (!isTypeSupported(I->getType(), VT))
2655  return false;
2656 
2657  unsigned Opc;
2658  const TargetRegisterClass *RC;
2659  switch (VT.SimpleTy) {
2660  default:
2661  return false;
2662  case MVT::i1:
2663  case MVT::i8:
2664  case MVT::i16:
2665  case MVT::i32:
2666  Opc = AArch64::CSELWr;
2667  RC = &AArch64::GPR32RegClass;
2668  break;
2669  case MVT::i64:
2670  Opc = AArch64::CSELXr;
2671  RC = &AArch64::GPR64RegClass;
2672  break;
2673  case MVT::f32:
2674  Opc = AArch64::FCSELSrrr;
2675  RC = &AArch64::FPR32RegClass;
2676  break;
2677  case MVT::f64:
2678  Opc = AArch64::FCSELDrrr;
2679  RC = &AArch64::FPR64RegClass;
2680  break;
2681  }
2682 
2683  const SelectInst *SI = cast<SelectInst>(I);
2684  const Value *Cond = SI->getCondition();
2687 
2688  if (optimizeSelect(SI))
2689  return true;
2690 
2691  // Try to pickup the flags, so we don't have to emit another compare.
2692  if (foldXALUIntrinsic(CC, I, Cond)) {
2693  // Fake request the condition to force emission of the XALU intrinsic.
2694  Register CondReg = getRegForValue(Cond);
2695  if (!CondReg)
2696  return false;
2697  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2698  isValueAvailable(Cond)) {
2699  const auto *Cmp = cast<CmpInst>(Cond);
2700  // Try to optimize or fold the cmp.
2701  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2702  const Value *FoldSelect = nullptr;
2703  switch (Predicate) {
2704  default:
2705  break;
2706  case CmpInst::FCMP_FALSE:
2707  FoldSelect = SI->getFalseValue();
2708  break;
2709  case CmpInst::FCMP_TRUE:
2710  FoldSelect = SI->getTrueValue();
2711  break;
2712  }
2713 
2714  if (FoldSelect) {
2715  Register SrcReg = getRegForValue(FoldSelect);
2716  if (!SrcReg)
2717  return false;
2718 
2719  updateValueMap(I, SrcReg);
2720  return true;
2721  }
2722 
2723  // Emit the cmp.
2724  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2725  return false;
2726 
2727  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2728  CC = getCompareCC(Predicate);
2729  switch (Predicate) {
2730  default:
2731  break;
2732  case CmpInst::FCMP_UEQ:
2733  ExtraCC = AArch64CC::EQ;
2734  CC = AArch64CC::VS;
2735  break;
2736  case CmpInst::FCMP_ONE:
2737  ExtraCC = AArch64CC::MI;
2738  CC = AArch64CC::GT;
2739  break;
2740  }
2741  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2742  } else {
2743  Register CondReg = getRegForValue(Cond);
2744  if (!CondReg)
2745  return false;
2746 
2747  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2748  CondReg = constrainOperandRegClass(II, CondReg, 1);
2749 
2750  // Emit a TST instruction (ANDS wzr, reg, #imm).
2751  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2752  AArch64::WZR)
2753  .addReg(CondReg)
2755  }
2756 
2757  Register Src1Reg = getRegForValue(SI->getTrueValue());
2758  Register Src2Reg = getRegForValue(SI->getFalseValue());
2759 
2760  if (!Src1Reg || !Src2Reg)
2761  return false;
2762 
2763  if (ExtraCC != AArch64CC::AL)
2764  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2765 
2766  Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2767  updateValueMap(I, ResultReg);
2768  return true;
2769 }
2770 
2771 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2772  Value *V = I->getOperand(0);
2773  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2774  return false;
2775 
2776  Register Op = getRegForValue(V);
2777  if (Op == 0)
2778  return false;
2779 
2780  Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2781  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2782  ResultReg).addReg(Op);
2783  updateValueMap(I, ResultReg);
2784  return true;
2785 }
2786 
2787 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2788  Value *V = I->getOperand(0);
2789  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2790  return false;
2791 
2792  Register Op = getRegForValue(V);
2793  if (Op == 0)
2794  return false;
2795 
2796  Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2797  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2798  ResultReg).addReg(Op);
2799  updateValueMap(I, ResultReg);
2800  return true;
2801 }
2802 
2803 // FPToUI and FPToSI
2804 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2805  MVT DestVT;
2806  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2807  return false;
2808 
2809  Register SrcReg = getRegForValue(I->getOperand(0));
2810  if (SrcReg == 0)
2811  return false;
2812 
2813  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2814  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2815  return false;
2816 
2817  unsigned Opc;
2818  if (SrcVT == MVT::f64) {
2819  if (Signed)
2820  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2821  else
2822  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2823  } else {
2824  if (Signed)
2825  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2826  else
2827  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2828  }
2829  Register ResultReg = createResultReg(
2830  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2831  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2832  .addReg(SrcReg);
2833  updateValueMap(I, ResultReg);
2834  return true;
2835 }
2836 
2837 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2838  MVT DestVT;
2839  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2840  return false;
2841  // Let regular ISEL handle FP16
2842  if (DestVT == MVT::f16)
2843  return false;
2844 
2845  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2846  "Unexpected value type.");
2847 
2848  Register SrcReg = getRegForValue(I->getOperand(0));
2849  if (!SrcReg)
2850  return false;
2851 
2852  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2853 
2854  // Handle sign-extension.
2855  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2856  SrcReg =
2857  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2858  if (!SrcReg)
2859  return false;
2860  }
2861 
2862  unsigned Opc;
2863  if (SrcVT == MVT::i64) {
2864  if (Signed)
2865  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2866  else
2867  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2868  } else {
2869  if (Signed)
2870  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2871  else
2872  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2873  }
2874 
2875  Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2876  updateValueMap(I, ResultReg);
2877  return true;
2878 }
2879 
2880 bool AArch64FastISel::fastLowerArguments() {
2881  if (!FuncInfo.CanLowerReturn)
2882  return false;
2883 
2884  const Function *F = FuncInfo.Fn;
2885  if (F->isVarArg())
2886  return false;
2887 
2888  CallingConv::ID CC = F->getCallingConv();
2889  if (CC != CallingConv::C && CC != CallingConv::Swift)
2890  return false;
2891 
2892  if (Subtarget->hasCustomCallingConv())
2893  return false;
2894 
2895  // Only handle simple cases of up to 8 GPR and FPR each.
2896  unsigned GPRCnt = 0;
2897  unsigned FPRCnt = 0;
2898  for (auto const &Arg : F->args()) {
2899  if (Arg.hasAttribute(Attribute::ByVal) ||
2900  Arg.hasAttribute(Attribute::InReg) ||
2901  Arg.hasAttribute(Attribute::StructRet) ||
2902  Arg.hasAttribute(Attribute::SwiftSelf) ||
2903  Arg.hasAttribute(Attribute::SwiftAsync) ||
2904  Arg.hasAttribute(Attribute::SwiftError) ||
2905  Arg.hasAttribute(Attribute::Nest))
2906  return false;
2907 
2908  Type *ArgTy = Arg.getType();
2909  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2910  return false;
2911 
2912  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2913  if (!ArgVT.isSimple())
2914  return false;
2915 
2916  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2917  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2918  return false;
2919 
2920  if (VT.isVector() &&
2921  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2922  return false;
2923 
2924  if (VT >= MVT::i1 && VT <= MVT::i64)
2925  ++GPRCnt;
2926  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2927  VT.is128BitVector())
2928  ++FPRCnt;
2929  else
2930  return false;
2931 
2932  if (GPRCnt > 8 || FPRCnt > 8)
2933  return false;
2934  }
2935 
2936  static const MCPhysReg Registers[6][8] = {
2937  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2938  AArch64::W5, AArch64::W6, AArch64::W7 },
2939  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2940  AArch64::X5, AArch64::X6, AArch64::X7 },
2941  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2942  AArch64::H5, AArch64::H6, AArch64::H7 },
2943  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2944  AArch64::S5, AArch64::S6, AArch64::S7 },
2945  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2946  AArch64::D5, AArch64::D6, AArch64::D7 },
2947  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2948  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2949  };
2950 
2951  unsigned GPRIdx = 0;
2952  unsigned FPRIdx = 0;
2953  for (auto const &Arg : F->args()) {
2954  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2955  unsigned SrcReg;
2956  const TargetRegisterClass *RC;
2957  if (VT >= MVT::i1 && VT <= MVT::i32) {
2958  SrcReg = Registers[0][GPRIdx++];
2959  RC = &AArch64::GPR32RegClass;
2960  VT = MVT::i32;
2961  } else if (VT == MVT::i64) {
2962  SrcReg = Registers[1][GPRIdx++];
2963  RC = &AArch64::GPR64RegClass;
2964  } else if (VT == MVT::f16) {
2965  SrcReg = Registers[2][FPRIdx++];
2966  RC = &AArch64::FPR16RegClass;
2967  } else if (VT == MVT::f32) {
2968  SrcReg = Registers[3][FPRIdx++];
2969  RC = &AArch64::FPR32RegClass;
2970  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2971  SrcReg = Registers[4][FPRIdx++];
2972  RC = &AArch64::FPR64RegClass;
2973  } else if (VT.is128BitVector()) {
2974  SrcReg = Registers[5][FPRIdx++];
2975  RC = &AArch64::FPR128RegClass;
2976  } else
2977  llvm_unreachable("Unexpected value type.");
2978 
2979  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2980  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2981  // Without this, EmitLiveInCopies may eliminate the livein if its only
2982  // use is a bitcast (which isn't turned into an instruction).
2983  Register ResultReg = createResultReg(RC);
2984  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2985  TII.get(TargetOpcode::COPY), ResultReg)
2986  .addReg(DstReg, getKillRegState(true));
2987  updateValueMap(&Arg, ResultReg);
2988  }
2989  return true;
2990 }
2991 
2992 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2993  SmallVectorImpl<MVT> &OutVTs,
2994  unsigned &NumBytes) {
2995  CallingConv::ID CC = CLI.CallConv;
2997  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2998  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2999 
3000  // Get a count of how many bytes are to be pushed on the stack.
3001  NumBytes = CCInfo.getNextStackOffset();
3002 
3003  // Issue CALLSEQ_START
3004  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3005  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3006  .addImm(NumBytes).addImm(0);
3007 
3008  // Process the args.
3009  for (CCValAssign &VA : ArgLocs) {
3010  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3011  MVT ArgVT = OutVTs[VA.getValNo()];
3012 
3013  Register ArgReg = getRegForValue(ArgVal);
3014  if (!ArgReg)
3015  return false;
3016 
3017  // Handle arg promotion: SExt, ZExt, AExt.
3018  switch (VA.getLocInfo()) {
3019  case CCValAssign::Full:
3020  break;
3021  case CCValAssign::SExt: {
3022  MVT DestVT = VA.getLocVT();
3023  MVT SrcVT = ArgVT;
3024  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3025  if (!ArgReg)
3026  return false;
3027  break;
3028  }
3029  case CCValAssign::AExt:
3030  // Intentional fall-through.
3031  case CCValAssign::ZExt: {
3032  MVT DestVT = VA.getLocVT();
3033  MVT SrcVT = ArgVT;
3034  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3035  if (!ArgReg)
3036  return false;
3037  break;
3038  }
3039  default:
3040  llvm_unreachable("Unknown arg promotion!");
3041  }
3042 
3043  // Now copy/store arg to correct locations.
3044  if (VA.isRegLoc() && !VA.needsCustom()) {
3045  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3046  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3047  CLI.OutRegs.push_back(VA.getLocReg());
3048  } else if (VA.needsCustom()) {
3049  // FIXME: Handle custom args.
3050  return false;
3051  } else {
3052  assert(VA.isMemLoc() && "Assuming store on stack.");
3053 
3054  // Don't emit stores for undef values.
3055  if (isa<UndefValue>(ArgVal))
3056  continue;
3057 
3058  // Need to store on the stack.
3059  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3060 
3061  unsigned BEAlign = 0;
3062  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3063  BEAlign = 8 - ArgSize;
3064 
3065  Address Addr;
3066  Addr.setKind(Address::RegBase);
3067  Addr.setReg(AArch64::SP);
3068  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3069 
3070  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3071  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3072  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3074 
3075  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3076  return false;
3077  }
3078  }
3079  return true;
3080 }
3081 
3082 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3083  unsigned NumBytes) {
3084  CallingConv::ID CC = CLI.CallConv;
3085 
3086  // Issue CALLSEQ_END
3087  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3088  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3089  .addImm(NumBytes).addImm(0);
3090 
3091  // Now the return value.
3092  if (RetVT != MVT::isVoid) {
3094  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3095  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3096 
3097  // Only handle a single return value.
3098  if (RVLocs.size() != 1)
3099  return false;
3100 
3101  // Copy all of the result registers out of their specified physreg.
3102  MVT CopyVT = RVLocs[0].getValVT();
3103 
3104  // TODO: Handle big-endian results
3105  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3106  return false;
3107 
3108  Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3109  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3110  TII.get(TargetOpcode::COPY), ResultReg)
3111  .addReg(RVLocs[0].getLocReg());
3112  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3113 
3114  CLI.ResultReg = ResultReg;
3115  CLI.NumResultRegs = 1;
3116  }
3117 
3118  return true;
3119 }
3120 
3121 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3122  CallingConv::ID CC = CLI.CallConv;
3123  bool IsTailCall = CLI.IsTailCall;
3124  bool IsVarArg = CLI.IsVarArg;
3125  const Value *Callee = CLI.Callee;
3126  MCSymbol *Symbol = CLI.Symbol;
3127 
3128  if (!Callee && !Symbol)
3129  return false;
3130 
3131  // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3132  // a bti instruction following the call.
3133  if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3134  !Subtarget->noBTIAtReturnTwice() &&
3135  MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3136  return false;
3137 
3138  // Allow SelectionDAG isel to handle tail calls.
3139  if (IsTailCall)
3140  return false;
3141 
3142  // FIXME: we could and should support this, but for now correctness at -O0 is
3143  // more important.
3144  if (Subtarget->isTargetILP32())
3145  return false;
3146 
3147  CodeModel::Model CM = TM.getCodeModel();
3148  // Only support the small-addressing and large code models.
3149  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3150  return false;
3151 
3152  // FIXME: Add large code model support for ELF.
3153  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3154  return false;
3155 
3156  // Let SDISel handle vararg functions.
3157  if (IsVarArg)
3158  return false;
3159 
3160  // FIXME: Only handle *simple* calls for now.
3161  MVT RetVT;
3162  if (CLI.RetTy->isVoidTy())
3163  RetVT = MVT::isVoid;
3164  else if (!isTypeLegal(CLI.RetTy, RetVT))
3165  return false;
3166 
3167  for (auto Flag : CLI.OutFlags)
3168  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3169  Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3170  return false;
3171 
3172  // Set up the argument vectors.
3173  SmallVector<MVT, 16> OutVTs;
3174  OutVTs.reserve(CLI.OutVals.size());
3175 
3176  for (auto *Val : CLI.OutVals) {
3177  MVT VT;
3178  if (!isTypeLegal(Val->getType(), VT) &&
3179  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3180  return false;
3181 
3182  // We don't handle vector parameters yet.
3183  if (VT.isVector() || VT.getSizeInBits() > 64)
3184  return false;
3185 
3186  OutVTs.push_back(VT);
3187  }
3188 
3189  Address Addr;
3190  if (Callee && !computeCallAddress(Callee, Addr))
3191  return false;
3192 
3193  // The weak function target may be zero; in that case we must use indirect
3194  // addressing via a stub on windows as it may be out of range for a
3195  // PC-relative jump.
3196  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3197  Addr.getGlobalValue()->hasExternalWeakLinkage())
3198  return false;
3199 
3200  // Handle the arguments now that we've gotten them.
3201  unsigned NumBytes;
3202  if (!processCallArgs(CLI, OutVTs, NumBytes))
3203  return false;
3204 
3205  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3206  if (RegInfo->isAnyArgRegReserved(*MF))
3207  RegInfo->emitReservedArgRegCallError(*MF);
3208 
3209  // Issue the call.
3210  MachineInstrBuilder MIB;
3211  if (Subtarget->useSmallAddressing()) {
3212  const MCInstrDesc &II =
3213  TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3214  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3215  if (Symbol)
3216  MIB.addSym(Symbol, 0);
3217  else if (Addr.getGlobalValue())
3218  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3219  else if (Addr.getReg()) {
3220  Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3221  MIB.addReg(Reg);
3222  } else
3223  return false;
3224  } else {
3225  unsigned CallReg = 0;
3226  if (Symbol) {
3227  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3228  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3229  ADRPReg)
3231 
3232  CallReg = createResultReg(&AArch64::GPR64RegClass);
3233  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3234  TII.get(AArch64::LDRXui), CallReg)
3235  .addReg(ADRPReg)
3236  .addSym(Symbol,
3238  } else if (Addr.getGlobalValue())
3239  CallReg = materializeGV(Addr.getGlobalValue());
3240  else if (Addr.getReg())
3241  CallReg = Addr.getReg();
3242 
3243  if (!CallReg)
3244  return false;
3245 
3246  const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3247  CallReg = constrainOperandRegClass(II, CallReg, 0);
3248  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3249  }
3250 
3251  // Add implicit physical register uses to the call.
3252  for (auto Reg : CLI.OutRegs)
3254 
3255  // Add a register mask with the call-preserved registers.
3256  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3257  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3258 
3259  CLI.Call = MIB;
3260 
3261  // Finish off the call including any return values.
3262  return finishCall(CLI, RetVT, NumBytes);
3263 }
3264 
3265 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3266  if (Alignment)
3267  return Len / Alignment <= 4;
3268  else
3269  return Len < 32;
3270 }
3271 
3272 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3273  uint64_t Len, unsigned Alignment) {
3274  // Make sure we don't bloat code by inlining very large memcpy's.
3275  if (!isMemCpySmall(Len, Alignment))
3276  return false;
3277 
3278  int64_t UnscaledOffset = 0;
3279  Address OrigDest = Dest;
3280  Address OrigSrc = Src;
3281 
3282  while (Len) {
3283  MVT VT;
3284  if (!Alignment || Alignment >= 8) {
3285  if (Len >= 8)
3286  VT = MVT::i64;
3287  else if (Len >= 4)
3288  VT = MVT::i32;
3289  else if (Len >= 2)
3290  VT = MVT::i16;
3291  else {
3292  VT = MVT::i8;
3293  }
3294  } else {
3295  // Bound based on alignment.
3296  if (Len >= 4 && Alignment == 4)
3297  VT = MVT::i32;
3298  else if (Len >= 2 && Alignment == 2)
3299  VT = MVT::i16;
3300  else {
3301  VT = MVT::i8;
3302  }
3303  }
3304 
3305  unsigned ResultReg = emitLoad(VT, VT, Src);
3306  if (!ResultReg)
3307  return false;
3308 
3309  if (!emitStore(VT, ResultReg, Dest))
3310  return false;
3311 
3312  int64_t Size = VT.getSizeInBits() / 8;
3313  Len -= Size;
3314  UnscaledOffset += Size;
3315 
3316  // We need to recompute the unscaled offset for each iteration.
3317  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3318  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3319  }
3320 
3321  return true;
3322 }
3323 
3324 /// Check if it is possible to fold the condition from the XALU intrinsic
3325 /// into the user. The condition code will only be updated on success.
3326 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3327  const Instruction *I,
3328  const Value *Cond) {
3329  if (!isa<ExtractValueInst>(Cond))
3330  return false;
3331 
3332  const auto *EV = cast<ExtractValueInst>(Cond);
3333  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3334  return false;
3335 
3336  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3337  MVT RetVT;
3338  const Function *Callee = II->getCalledFunction();
3339  Type *RetTy =
3340  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3341  if (!isTypeLegal(RetTy, RetVT))
3342  return false;
3343 
3344  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3345  return false;
3346 
3347  const Value *LHS = II->getArgOperand(0);
3348  const Value *RHS = II->getArgOperand(1);
3349 
3350  // Canonicalize immediate to the RHS.
3351  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3352  std::swap(LHS, RHS);
3353 
3354  // Simplify multiplies.
3355  Intrinsic::ID IID = II->getIntrinsicID();
3356  switch (IID) {
3357  default:
3358  break;
3359  case Intrinsic::smul_with_overflow:
3360  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3361  if (C->getValue() == 2)
3362  IID = Intrinsic::sadd_with_overflow;
3363  break;
3364  case Intrinsic::umul_with_overflow:
3365  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3366  if (C->getValue() == 2)
3367  IID = Intrinsic::uadd_with_overflow;
3368  break;
3369  }
3370 
3371  AArch64CC::CondCode TmpCC;
3372  switch (IID) {
3373  default:
3374  return false;
3375  case Intrinsic::sadd_with_overflow:
3376  case Intrinsic::ssub_with_overflow:
3377  TmpCC = AArch64CC::VS;
3378  break;
3379  case Intrinsic::uadd_with_overflow:
3380  TmpCC = AArch64CC::HS;
3381  break;
3382  case Intrinsic::usub_with_overflow:
3383  TmpCC = AArch64CC::LO;
3384  break;
3385  case Intrinsic::smul_with_overflow:
3386  case Intrinsic::umul_with_overflow:
3387  TmpCC = AArch64CC::NE;
3388  break;
3389  }
3390 
3391  // Check if both instructions are in the same basic block.
3392  if (!isValueAvailable(II))
3393  return false;
3394 
3395  // Make sure nothing is in the way
3398  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3399  // We only expect extractvalue instructions between the intrinsic and the
3400  // instruction to be selected.
3401  if (!isa<ExtractValueInst>(Itr))
3402  return false;
3403 
3404  // Check that the extractvalue operand comes from the intrinsic.
3405  const auto *EVI = cast<ExtractValueInst>(Itr);
3406  if (EVI->getAggregateOperand() != II)
3407  return false;
3408  }
3409 
3410  CC = TmpCC;
3411  return true;
3412 }
3413 
3414 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3415  // FIXME: Handle more intrinsics.
3416  switch (II->getIntrinsicID()) {
3417  default: return false;
3418  case Intrinsic::frameaddress: {
3419  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3420  MFI.setFrameAddressIsTaken(true);
3421 
3422  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3423  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3424  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3425  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3426  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3427  // Recursively load frame address
3428  // ldr x0, [fp]
3429  // ldr x0, [x0]
3430  // ldr x0, [x0]
3431  // ...
3432  unsigned DestReg;
3433  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3434  while (Depth--) {
3435  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3436  SrcReg, 0);
3437  assert(DestReg && "Unexpected LDR instruction emission failure.");
3438  SrcReg = DestReg;
3439  }
3440 
3441  updateValueMap(II, SrcReg);
3442  return true;
3443  }
3444  case Intrinsic::sponentry: {
3445  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446 
3447  // SP = FP + Fixed Object + 16
3448  int FI = MFI.CreateFixedObject(4, 0, false);
3449  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3451  TII.get(AArch64::ADDXri), ResultReg)
3452  .addFrameIndex(FI)
3453  .addImm(0)
3454  .addImm(0);
3455 
3456  updateValueMap(II, ResultReg);
3457  return true;
3458  }
3459  case Intrinsic::memcpy:
3460  case Intrinsic::memmove: {
3461  const auto *MTI = cast<MemTransferInst>(II);
3462  // Don't handle volatile.
3463  if (MTI->isVolatile())
3464  return false;
3465 
3466  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3467  // we would emit dead code because we don't currently handle memmoves.
3468  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3469  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3470  // Small memcpy's are common enough that we want to do them without a call
3471  // if possible.
3472  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3473  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3474  MTI->getSourceAlignment());
3475  if (isMemCpySmall(Len, Alignment)) {
3476  Address Dest, Src;
3477  if (!computeAddress(MTI->getRawDest(), Dest) ||
3478  !computeAddress(MTI->getRawSource(), Src))
3479  return false;
3480  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3481  return true;
3482  }
3483  }
3484 
3485  if (!MTI->getLength()->getType()->isIntegerTy(64))
3486  return false;
3487 
3488  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3489  // Fast instruction selection doesn't support the special
3490  // address spaces.
3491  return false;
3492 
3493  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3494  return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3495  }
3496  case Intrinsic::memset: {
3497  const MemSetInst *MSI = cast<MemSetInst>(II);
3498  // Don't handle volatile.
3499  if (MSI->isVolatile())
3500  return false;
3501 
3502  if (!MSI->getLength()->getType()->isIntegerTy(64))
3503  return false;
3504 
3505  if (MSI->getDestAddressSpace() > 255)
3506  // Fast instruction selection doesn't support the special
3507  // address spaces.
3508  return false;
3509 
3510  return lowerCallTo(II, "memset", II->arg_size() - 1);
3511  }
3512  case Intrinsic::sin:
3513  case Intrinsic::cos:
3514  case Intrinsic::pow: {
3515  MVT RetVT;
3516  if (!isTypeLegal(II->getType(), RetVT))
3517  return false;
3518 
3519  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3520  return false;
3521 
3522  static const RTLIB::Libcall LibCallTable[3][2] = {
3523  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3524  { RTLIB::COS_F32, RTLIB::COS_F64 },
3525  { RTLIB::POW_F32, RTLIB::POW_F64 }
3526  };
3527  RTLIB::Libcall LC;
3528  bool Is64Bit = RetVT == MVT::f64;
3529  switch (II->getIntrinsicID()) {
3530  default:
3531  llvm_unreachable("Unexpected intrinsic.");
3532  case Intrinsic::sin:
3533  LC = LibCallTable[0][Is64Bit];
3534  break;
3535  case Intrinsic::cos:
3536  LC = LibCallTable[1][Is64Bit];
3537  break;
3538  case Intrinsic::pow:
3539  LC = LibCallTable[2][Is64Bit];
3540  break;
3541  }
3542 
3543  ArgListTy Args;
3544  Args.reserve(II->arg_size());
3545 
3546  // Populate the argument list.
3547  for (auto &Arg : II->args()) {
3548  ArgListEntry Entry;
3549  Entry.Val = Arg;
3550  Entry.Ty = Arg->getType();
3551  Args.push_back(Entry);
3552  }
3553 
3554  CallLoweringInfo CLI;
3555  MCContext &Ctx = MF->getContext();
3556  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3557  TLI.getLibcallName(LC), std::move(Args));
3558  if (!lowerCallTo(CLI))
3559  return false;
3560  updateValueMap(II, CLI.ResultReg);
3561  return true;
3562  }
3563  case Intrinsic::fabs: {
3564  MVT VT;
3565  if (!isTypeLegal(II->getType(), VT))
3566  return false;
3567 
3568  unsigned Opc;
3569  switch (VT.SimpleTy) {
3570  default:
3571  return false;
3572  case MVT::f32:
3573  Opc = AArch64::FABSSr;
3574  break;
3575  case MVT::f64:
3576  Opc = AArch64::FABSDr;
3577  break;
3578  }
3579  Register SrcReg = getRegForValue(II->getOperand(0));
3580  if (!SrcReg)
3581  return false;
3582  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3583  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3584  .addReg(SrcReg);
3585  updateValueMap(II, ResultReg);
3586  return true;
3587  }
3588  case Intrinsic::trap:
3589  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3590  .addImm(1);
3591  return true;
3592  case Intrinsic::debugtrap:
3593  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3594  .addImm(0xF000);
3595  return true;
3596 
3597  case Intrinsic::sqrt: {
3598  Type *RetTy = II->getCalledFunction()->getReturnType();
3599 
3600  MVT VT;
3601  if (!isTypeLegal(RetTy, VT))
3602  return false;
3603 
3604  Register Op0Reg = getRegForValue(II->getOperand(0));
3605  if (!Op0Reg)
3606  return false;
3607 
3608  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3609  if (!ResultReg)
3610  return false;
3611 
3612  updateValueMap(II, ResultReg);
3613  return true;
3614  }
3615  case Intrinsic::sadd_with_overflow:
3616  case Intrinsic::uadd_with_overflow:
3617  case Intrinsic::ssub_with_overflow:
3618  case Intrinsic::usub_with_overflow:
3619  case Intrinsic::smul_with_overflow:
3620  case Intrinsic::umul_with_overflow: {
3621  // This implements the basic lowering of the xalu with overflow intrinsics.
3622  const Function *Callee = II->getCalledFunction();
3623  auto *Ty = cast<StructType>(Callee->getReturnType());
3624  Type *RetTy = Ty->getTypeAtIndex(0U);
3625 
3626  MVT VT;
3627  if (!isTypeLegal(RetTy, VT))
3628  return false;
3629 
3630  if (VT != MVT::i32 && VT != MVT::i64)
3631  return false;
3632 
3633  const Value *LHS = II->getArgOperand(0);
3634  const Value *RHS = II->getArgOperand(1);
3635  // Canonicalize immediate to the RHS.
3636  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3637  std::swap(LHS, RHS);
3638 
3639  // Simplify multiplies.
3640  Intrinsic::ID IID = II->getIntrinsicID();
3641  switch (IID) {
3642  default:
3643  break;
3644  case Intrinsic::smul_with_overflow:
3645  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3646  if (C->getValue() == 2) {
3647  IID = Intrinsic::sadd_with_overflow;
3648  RHS = LHS;
3649  }
3650  break;
3651  case Intrinsic::umul_with_overflow:
3652  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3653  if (C->getValue() == 2) {
3654  IID = Intrinsic::uadd_with_overflow;
3655  RHS = LHS;
3656  }
3657  break;
3658  }
3659 
3660  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3662  switch (IID) {
3663  default: llvm_unreachable("Unexpected intrinsic!");
3664  case Intrinsic::sadd_with_overflow:
3665  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3666  CC = AArch64CC::VS;
3667  break;
3668  case Intrinsic::uadd_with_overflow:
3669  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3670  CC = AArch64CC::HS;
3671  break;
3672  case Intrinsic::ssub_with_overflow:
3673  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3674  CC = AArch64CC::VS;
3675  break;
3676  case Intrinsic::usub_with_overflow:
3677  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3678  CC = AArch64CC::LO;
3679  break;
3680  case Intrinsic::smul_with_overflow: {
3681  CC = AArch64CC::NE;
3682  Register LHSReg = getRegForValue(LHS);
3683  if (!LHSReg)
3684  return false;
3685 
3686  Register RHSReg = getRegForValue(RHS);
3687  if (!RHSReg)
3688  return false;
3689 
3690  if (VT == MVT::i32) {
3691  MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3692  Register MulSubReg =
3693  fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3694  // cmp xreg, wreg, sxtw
3695  emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3696  AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3697  /*WantResult=*/false);
3698  MulReg = MulSubReg;
3699  } else {
3700  assert(VT == MVT::i64 && "Unexpected value type.");
3701  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3702  // reused in the next instruction.
3703  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3704  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3705  emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3706  /*WantResult=*/false);
3707  }
3708  break;
3709  }
3710  case Intrinsic::umul_with_overflow: {
3711  CC = AArch64CC::NE;
3712  Register LHSReg = getRegForValue(LHS);
3713  if (!LHSReg)
3714  return false;
3715 
3716  Register RHSReg = getRegForValue(RHS);
3717  if (!RHSReg)
3718  return false;
3719 
3720  if (VT == MVT::i32) {
3721  MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3722  // tst xreg, #0xffffffff00000000
3723  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3724  TII.get(AArch64::ANDSXri), AArch64::XZR)
3725  .addReg(MulReg)
3726  .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3727  MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3728  } else {
3729  assert(VT == MVT::i64 && "Unexpected value type.");
3730  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3731  // reused in the next instruction.
3732  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3733  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3734  emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3735  }
3736  break;
3737  }
3738  }
3739 
3740  if (MulReg) {
3741  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3742  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3743  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3744  }
3745 
3746  if (!ResultReg1)
3747  return false;
3748 
3749  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3750  AArch64::WZR, AArch64::WZR,
3751  getInvertedCondCode(CC));
3752  (void)ResultReg2;
3753  assert((ResultReg1 + 1) == ResultReg2 &&
3754  "Nonconsecutive result registers.");
3755  updateValueMap(II, ResultReg1, 2);
3756  return true;
3757  }
3758  }
3759  return false;
3760 }
3761 
3762 bool AArch64FastISel::selectRet(const Instruction *I) {
3763  const ReturnInst *Ret = cast<ReturnInst>(I);
3764  const Function &F = *I->getParent()->getParent();
3765 
3766  if (!FuncInfo.CanLowerReturn)
3767  return false;
3768 
3769  if (F.isVarArg())
3770  return false;
3771 
3772  if (TLI.supportSwiftError() &&
3773  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3774  return false;
3775 
3776  if (TLI.supportSplitCSR(FuncInfo.MF))
3777  return false;
3778 
3779  // Build a list of return value registers.
3780  SmallVector<unsigned, 4> RetRegs;
3781 
3782  if (Ret->getNumOperands() > 0) {
3783  CallingConv::ID CC = F.getCallingConv();
3785  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3786 
3787  // Analyze operands of the call, assigning locations to each operand.
3789  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3792  CCInfo.AnalyzeReturn(Outs, RetCC);
3793 
3794  // Only handle a single return value for now.
3795  if (ValLocs.size() != 1)
3796  return false;
3797 
3798  CCValAssign &VA = ValLocs[0];
3799  const Value *RV = Ret->getOperand(0);
3800 
3801  // Don't bother handling odd stuff for now.
3802  if ((VA.getLocInfo() != CCValAssign::Full) &&
3803  (VA.getLocInfo() != CCValAssign::BCvt))
3804  return false;
3805 
3806  // Only handle register returns for now.
3807  if (!VA.isRegLoc())
3808  return false;
3809 
3810  Register Reg = getRegForValue(RV);
3811  if (Reg == 0)
3812  return false;
3813 
3814  unsigned SrcReg = Reg + VA.getValNo();
3815  Register DestReg = VA.getLocReg();
3816  // Avoid a cross-class copy. This is very unlikely.
3817  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3818  return false;
3819 
3820  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3821  if (!RVEVT.isSimple())
3822  return false;
3823 
3824  // Vectors (of > 1 lane) in big endian need tricky handling.
3825  if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3826  !Subtarget->isLittleEndian())
3827  return false;
3828 
3829  MVT RVVT = RVEVT.getSimpleVT();
3830  if (RVVT == MVT::f128)
3831  return false;
3832 
3833  MVT DestVT = VA.getValVT();
3834  // Special handling for extended integers.
3835  if (RVVT != DestVT) {
3836  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3837  return false;
3838 
3839  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3840  return false;
3841 
3842  bool IsZExt = Outs[0].Flags.isZExt();
3843  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3844  if (SrcReg == 0)
3845  return false;
3846  }
3847 
3848  // "Callee" (i.e. value producer) zero extends pointers at function
3849  // boundary.
3850  if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3851  SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3852 
3853  // Make the copy.
3854  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3855  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3856 
3857  // Add register to return instruction.
3858  RetRegs.push_back(VA.getLocReg());
3859  }
3860 
3861  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3862  TII.get(AArch64::RET_ReallyLR));
3863  for (unsigned RetReg : RetRegs)
3864  MIB.addReg(RetReg, RegState::Implicit);
3865  return true;
3866 }
3867 
3868 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3869  Type *DestTy = I->getType();
3870  Value *Op = I->getOperand(0);
3871  Type *SrcTy = Op->getType();
3872 
3873  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3874  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3875  if (!SrcEVT.isSimple())
3876  return false;
3877  if (!DestEVT.isSimple())
3878  return false;
3879 
3880  MVT SrcVT = SrcEVT.getSimpleVT();
3881  MVT DestVT = DestEVT.getSimpleVT();
3882 
3883  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3884  SrcVT != MVT::i8)
3885  return false;
3886  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3887  DestVT != MVT::i1)
3888  return false;
3889 
3890  Register SrcReg = getRegForValue(Op);
3891  if (!SrcReg)
3892  return false;
3893 
3894  // If we're truncating from i64 to a smaller non-legal type then generate an
3895  // AND. Otherwise, we know the high bits are undefined and a truncate only
3896  // generate a COPY. We cannot mark the source register also as result
3897  // register, because this can incorrectly transfer the kill flag onto the
3898  // source register.
3899  unsigned ResultReg;
3900  if (SrcVT == MVT::i64) {
3901  uint64_t Mask = 0;
3902  switch (DestVT.SimpleTy) {
3903  default:
3904  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3905  return false;
3906  case MVT::i1:
3907  Mask = 0x1;
3908  break;
3909  case MVT::i8:
3910  Mask = 0xff;
3911  break;
3912  case MVT::i16:
3913  Mask = 0xffff;
3914  break;
3915  }
3916  // Issue an extract_subreg to get the lower 32-bits.
3917  Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3918  AArch64::sub_32);
3919  // Create the AND instruction which performs the actual truncation.
3920  ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3921  assert(ResultReg && "Unexpected AND instruction emission failure.");
3922  } else {
3923  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3924  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3925  TII.get(TargetOpcode::COPY), ResultReg)
3926  .addReg(SrcReg);
3927  }
3928 
3929  updateValueMap(I, ResultReg);
3930  return true;
3931 }
3932 
3933 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3934  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3935  DestVT == MVT::i64) &&
3936  "Unexpected value type.");
3937  // Handle i8 and i16 as i32.
3938  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3939  DestVT = MVT::i32;
3940 
3941  if (IsZExt) {
3942  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3943  assert(ResultReg && "Unexpected AND instruction emission failure.");
3944  if (DestVT == MVT::i64) {
3945  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3946  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3947  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3948  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3949  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3950  .addImm(0)
3951  .addReg(ResultReg)
3952  .addImm(AArch64::sub_32);
3953  ResultReg = Reg64;
3954  }
3955  return ResultReg;
3956  } else {
3957  if (DestVT == MVT::i64) {
3958  // FIXME: We're SExt i1 to i64.
3959  return 0;
3960  }
3961  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3962  0, 0);
3963  }
3964 }
3965 
3966 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3967  unsigned Opc, ZReg;
3968  switch (RetVT.SimpleTy) {
3969  default: return 0;
3970  case MVT::i8:
3971  case MVT::i16:
3972  case MVT::i32:
3973  RetVT = MVT::i32;
3974  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3975  case MVT::i64:
3976  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3977  }
3978 
3979  const TargetRegisterClass *RC =
3980  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3981  return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3982 }
3983 
3984 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3985  if (RetVT != MVT::i64)
3986  return 0;
3987 
3988  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3989  Op0, Op1, AArch64::XZR);
3990 }
3991 
3992 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3993  if (RetVT != MVT::i64)
3994  return 0;
3995 
3996  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3997  Op0, Op1, AArch64::XZR);
3998 }
3999 
4000 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4001  unsigned Op1Reg) {
4002  unsigned Opc = 0;
4003  bool NeedTrunc = false;
4004  uint64_t Mask = 0;
4005  switch (RetVT.SimpleTy) {
4006  default: return 0;
4007  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4008  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4009  case MVT::i32: Opc = AArch64::LSLVWr; break;
4010  case MVT::i64: Opc = AArch64::LSLVXr; break;
4011  }
4012 
4013  const TargetRegisterClass *RC =
4014  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4015  if (NeedTrunc)
4016  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4017 
4018  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4019  if (NeedTrunc)
4020  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4021  return ResultReg;
4022 }
4023 
4024 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4025  uint64_t Shift, bool IsZExt) {
4026  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4027  "Unexpected source/return type pair.");
4028  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4029  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4030  "Unexpected source value type.");
4031  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4032  RetVT == MVT::i64) && "Unexpected return value type.");
4033 
4034  bool Is64Bit = (RetVT == MVT::i64);
4035  unsigned RegSize = Is64Bit ? 64 : 32;
4036  unsigned DstBits = RetVT.getSizeInBits();
4037  unsigned SrcBits = SrcVT.getSizeInBits();
4038  const TargetRegisterClass *RC =
4039  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4040 
4041  // Just emit a copy for "zero" shifts.
4042  if (Shift == 0) {
4043  if (RetVT == SrcVT) {
4044  Register ResultReg = createResultReg(RC);
4045  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4046  TII.get(TargetOpcode::COPY), ResultReg)
4047  .addReg(Op0);
4048  return ResultReg;
4049  } else
4050  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4051  }
4052 
4053  // Don't deal with undefined shifts.
4054  if (Shift >= DstBits)
4055  return 0;
4056 
4057  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4058  // {S|U}BFM Wd, Wn, #r, #s
4059  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4060 
4061  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4062  // %2 = shl i16 %1, 4
4063  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4064  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4065  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4066  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4067 
4068  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4069  // %2 = shl i16 %1, 8
4070  // Wd<32+7-24,32-24> = Wn<7:0>
4071  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4072  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4073  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4074 
4075  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4076  // %2 = shl i16 %1, 12
4077  // Wd<32+3-20,32-20> = Wn<3:0>
4078  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4079  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4080  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4081 
4082  unsigned ImmR = RegSize - Shift;
4083  // Limit the width to the length of the source type.
4084  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4085  static const unsigned OpcTable[2][2] = {
4086  {AArch64::SBFMWri, AArch64::SBFMXri},
4087  {AArch64::UBFMWri, AArch64::UBFMXri}
4088  };
4089  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4090  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4091  Register TmpReg = MRI.createVirtualRegister(RC);
4092  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4093  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4094  .addImm(0)
4095  .addReg(Op0)
4096  .addImm(AArch64::sub_32);
4097  Op0 = TmpReg;
4098  }
4099  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4100 }
4101 
4102 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4103  unsigned Op1Reg) {
4104  unsigned Opc = 0;
4105  bool NeedTrunc = false;
4106  uint64_t Mask = 0;
4107  switch (RetVT.SimpleTy) {
4108  default: return 0;
4109  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4110  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4111  case MVT::i32: Opc = AArch64::LSRVWr; break;
4112  case MVT::i64: Opc = AArch64::LSRVXr; break;
4113  }
4114 
4115  const TargetRegisterClass *RC =
4116  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4117  if (NeedTrunc) {
4118  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4119  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4120  }
4121  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4122  if (NeedTrunc)
4123  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4124  return ResultReg;
4125 }
4126 
4127 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4128  uint64_t Shift, bool IsZExt) {
4129  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4130  "Unexpected source/return type pair.");
4131  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4132  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4133  "Unexpected source value type.");
4134  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4135  RetVT == MVT::i64) && "Unexpected return value type.");
4136 
4137  bool Is64Bit = (RetVT == MVT::i64);
4138  unsigned RegSize = Is64Bit ? 64 : 32;
4139  unsigned DstBits = RetVT.getSizeInBits();
4140  unsigned SrcBits = SrcVT.getSizeInBits();
4141  const TargetRegisterClass *RC =
4142  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4143 
4144  // Just emit a copy for "zero" shifts.
4145  if (Shift == 0) {
4146  if (RetVT == SrcVT) {
4147  Register ResultReg = createResultReg(RC);
4148  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4149  TII.get(TargetOpcode::COPY), ResultReg)
4150  .addReg(Op0);
4151  return ResultReg;
4152  } else
4153  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4154  }
4155 
4156  // Don't deal with undefined shifts.
4157  if (Shift >= DstBits)
4158  return 0;
4159 
4160  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4161  // {S|U}BFM Wd, Wn, #r, #s
4162  // Wd<s-r:0> = Wn<s:r> when r <= s
4163 
4164  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4165  // %2 = lshr i16 %1, 4
4166  // Wd<7-4:0> = Wn<7:4>
4167  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4168  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4169  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4170 
4171  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4172  // %2 = lshr i16 %1, 8
4173  // Wd<7-7,0> = Wn<7:7>
4174  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4175  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4176  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4177 
4178  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4179  // %2 = lshr i16 %1, 12
4180  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4181  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4182  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4183  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4184 
4185  if (Shift >= SrcBits && IsZExt)
4186  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4187 
4188  // It is not possible to fold a sign-extend into the LShr instruction. In this
4189  // case emit a sign-extend.
4190  if (!IsZExt) {
4191  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4192  if (!Op0)
4193  return 0;
4194  SrcVT = RetVT;
4195  SrcBits = SrcVT.getSizeInBits();
4196  IsZExt = true;
4197  }
4198 
4199  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4200  unsigned ImmS = SrcBits - 1;
4201  static const unsigned OpcTable[2][2] = {
4202  {AArch64::SBFMWri, AArch64::SBFMXri},
4203  {AArch64::UBFMWri, AArch64::UBFMXri}
4204  };
4205  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4206  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4207  Register TmpReg = MRI.createVirtualRegister(RC);
4208  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4209  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4210  .addImm(0)
4211  .addReg(Op0)
4212  .addImm(AArch64::sub_32);
4213  Op0 = TmpReg;
4214  }
4215  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4216 }
4217 
4218 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4219  unsigned Op1Reg) {
4220  unsigned Opc = 0;
4221  bool NeedTrunc = false;
4222  uint64_t Mask = 0;
4223  switch (RetVT.SimpleTy) {
4224  default: return 0;
4225  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4226  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4227  case MVT::i32: Opc = AArch64::ASRVWr; break;
4228  case MVT::i64: Opc = AArch64::ASRVXr; break;
4229  }
4230 
4231  const TargetRegisterClass *RC =
4232  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4233  if (NeedTrunc) {
4234  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4235  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4236  }
4237  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4238  if (NeedTrunc)
4239  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4240  return ResultReg;
4241 }
4242 
4243 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4244  uint64_t Shift, bool IsZExt) {
4245  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4246  "Unexpected source/return type pair.");
4247  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4248  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4249  "Unexpected source value type.");
4250  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4251  RetVT == MVT::i64) && "Unexpected return value type.");
4252 
4253  bool Is64Bit = (RetVT == MVT::i64);
4254  unsigned RegSize = Is64Bit ? 64 : 32;
4255  unsigned DstBits = RetVT.getSizeInBits();
4256  unsigned SrcBits = SrcVT.getSizeInBits();
4257  const TargetRegisterClass *RC =
4258  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4259 
4260  // Just emit a copy for "zero" shifts.
4261  if (Shift == 0) {
4262  if (RetVT == SrcVT) {
4263  Register ResultReg = createResultReg(RC);
4264  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4265  TII.get(TargetOpcode::COPY), ResultReg)
4266  .addReg(Op0);
4267  return ResultReg;
4268  } else
4269  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4270  }
4271 
4272  // Don't deal with undefined shifts.
4273  if (Shift >= DstBits)
4274  return 0;
4275 
4276  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4277  // {S|U}BFM Wd, Wn, #r, #s
4278  // Wd<s-r:0> = Wn<s:r> when r <= s
4279 
4280  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4281  // %2 = ashr i16 %1, 4
4282  // Wd<7-4:0> = Wn<7:4>
4283  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4284  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4285  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4286 
4287  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4288  // %2 = ashr i16 %1, 8
4289  // Wd<7-7,0> = Wn<7:7>
4290  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4291  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4292  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4293 
4294  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4295  // %2 = ashr i16 %1, 12
4296  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4297  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4298  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4299  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4300 
4301  if (Shift >= SrcBits && IsZExt)
4302  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4303 
4304  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4305  unsigned ImmS = SrcBits - 1;
4306  static const unsigned OpcTable[2][2] = {
4307  {AArch64::SBFMWri, AArch64::SBFMXri},
4308  {AArch64::UBFMWri, AArch64::UBFMXri}
4309  };
4310  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4311  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4312  Register TmpReg = MRI.createVirtualRegister(RC);
4313  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4314  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4315  .addImm(0)
4316  .addReg(Op0)
4317  .addImm(AArch64::sub_32);
4318  Op0 = TmpReg;
4319  }
4320  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4321 }
4322 
4323 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4324  bool IsZExt) {
4325  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4326 
4327  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4328  // DestVT are odd things, so test to make sure that they are both types we can
4329  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4330  // bail out to SelectionDAG.
4331  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4332  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4333  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4334  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4335  return 0;
4336 
4337  unsigned Opc;
4338  unsigned Imm = 0;
4339 
4340  switch (SrcVT.SimpleTy) {
4341  default:
4342  return 0;
4343  case MVT::i1:
4344  return emiti1Ext(SrcReg, DestVT, IsZExt);
4345  case MVT::i8:
4346  if (DestVT == MVT::i64)
4347  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4348  else
4349  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4350  Imm = 7;
4351  break;
4352  case MVT::i16:
4353  if (DestVT == MVT::i64)
4354  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4355  else
4356  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4357  Imm = 15;
4358  break;
4359  case MVT::i32:
4360  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4361  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4362  Imm = 31;
4363  break;
4364  }
4365 
4366  // Handle i8 and i16 as i32.
4367  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4368  DestVT = MVT::i32;
4369  else if (DestVT == MVT::i64) {
4370  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4371  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4372  TII.get(AArch64::SUBREG_TO_REG), Src64)
4373  .addImm(0)
4374  .addReg(SrcReg)
4375  .addImm(AArch64::sub_32);
4376  SrcReg = Src64;
4377  }
4378 
4379  const TargetRegisterClass *RC =
4380  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4381  return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4382 }
4383 
4384 static bool isZExtLoad(const MachineInstr *LI) {
4385  switch (LI->getOpcode()) {
4386  default:
4387  return false;
4388  case AArch64::LDURBBi:
4389  case AArch64::LDURHHi:
4390  case AArch64::LDURWi:
4391  case AArch64::LDRBBui:
4392  case AArch64::LDRHHui:
4393  case AArch64::LDRWui:
4394  case AArch64::LDRBBroX:
4395  case AArch64::LDRHHroX:
4396  case AArch64::LDRWroX:
4397  case AArch64::LDRBBroW:
4398  case AArch64::LDRHHroW:
4399  case AArch64::LDRWroW:
4400  return true;
4401  }
4402 }
4403 
4404 static bool isSExtLoad(const MachineInstr *LI) {
4405  switch (LI->getOpcode()) {
4406  default:
4407  return false;
4408  case AArch64::LDURSBWi:
4409  case AArch64::LDURSHWi:
4410  case AArch64::LDURSBXi:
4411  case AArch64::LDURSHXi:
4412  case AArch64::LDURSWi:
4413  case AArch64::LDRSBWui:
4414  case AArch64::LDRSHWui:
4415  case AArch64::LDRSBXui:
4416  case AArch64::LDRSHXui:
4417  case AArch64::LDRSWui:
4418  case AArch64::LDRSBWroX:
4419  case AArch64::LDRSHWroX:
4420  case AArch64::LDRSBXroX:
4421  case AArch64::LDRSHXroX:
4422  case AArch64::LDRSWroX:
4423  case AArch64::LDRSBWroW:
4424  case AArch64::LDRSHWroW:
4425  case AArch64::LDRSBXroW:
4426  case AArch64::LDRSHXroW:
4427  case AArch64::LDRSWroW:
4428  return true;
4429  }
4430 }
4431 
4432 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4433  MVT SrcVT) {
4434  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4435  if (!LI || !LI->hasOneUse())
4436  return false;
4437 
4438  // Check if the load instruction has already been selected.
4439  Register Reg = lookUpRegForValue(LI);
4440  if (!Reg)
4441  return false;
4442 
4444  if (!MI)
4445  return false;
4446 
4447  // Check if the correct load instruction has been emitted - SelectionDAG might
4448  // have emitted a zero-extending load, but we need a sign-extending load.
4449  bool IsZExt = isa<ZExtInst>(I);
4450  const auto *LoadMI = MI;
4451  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4452  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4453  Register LoadReg = MI->getOperand(1).getReg();
4454  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4455  assert(LoadMI && "Expected valid instruction");
4456  }
4457  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4458  return false;
4459 
4460  // Nothing to be done.
4461  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4462  updateValueMap(I, Reg);
4463  return true;
4464  }
4465 
4466  if (IsZExt) {
4467  Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4468  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4469  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4470  .addImm(0)
4471  .addReg(Reg, getKillRegState(true))
4472  .addImm(AArch64::sub_32);
4473  Reg = Reg64;
4474  } else {
4475  assert((MI->getOpcode() == TargetOpcode::COPY &&
4476  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4477  "Expected copy instruction");
4478  Reg = MI->getOperand(1).getReg();
4480  removeDeadCode(I, std::next(I));
4481  }
4482  updateValueMap(I, Reg);
4483  return true;
4484 }
4485 
4486 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4487  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4488  "Unexpected integer extend instruction.");
4489  MVT RetVT;
4490  MVT SrcVT;
4491  if (!isTypeSupported(I->getType(), RetVT))
4492  return false;
4493 
4494  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4495  return false;
4496 
4497  // Try to optimize already sign-/zero-extended values from load instructions.
4498  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4499  return true;
4500 
4501  Register SrcReg = getRegForValue(I->getOperand(0));
4502  if (!SrcReg)
4503  return false;
4504 
4505  // Try to optimize already sign-/zero-extended values from function arguments.
4506  bool IsZExt = isa<ZExtInst>(I);
4507  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4508  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4509  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4510  Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4512  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4513  .addImm(0)
4514  .addReg(SrcReg)
4515  .addImm(AArch64::sub_32);
4516  SrcReg = ResultReg;
4517  }
4518 
4519  updateValueMap(I, SrcReg);
4520  return true;
4521  }
4522  }
4523 
4524  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4525  if (!ResultReg)
4526  return false;
4527 
4528  updateValueMap(I, ResultReg);
4529  return true;
4530 }
4531 
4532 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4533  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4534  if (!DestEVT.isSimple())
4535  return false;
4536 
4537  MVT DestVT = DestEVT.getSimpleVT();
4538  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4539  return false;
4540 
4541  unsigned DivOpc;
4542  bool Is64bit = (DestVT == MVT::i64);
4543  switch (ISDOpcode) {
4544  default:
4545  return false;
4546  case ISD::SREM:
4547  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4548  break;
4549  case ISD::UREM:
4550  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4551  break;
4552  }
4553  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4554  Register Src0Reg = getRegForValue(I->getOperand(0));
4555  if (!Src0Reg)
4556  return false;
4557 
4558  Register Src1Reg = getRegForValue(I->getOperand(1));
4559  if (!Src1Reg)
4560  return false;
4561 
4562  const TargetRegisterClass *RC =
4563  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4564  Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4565  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4566  // The remainder is computed as numerator - (quotient * denominator) using the
4567  // MSUB instruction.
4568  Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4569  updateValueMap(I, ResultReg);
4570  return true;
4571 }
4572 
4573 bool AArch64FastISel::selectMul(const Instruction *I) {
4574  MVT VT;
4575  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4576  return false;
4577 
4578  if (VT.isVector())
4579  return selectBinaryOp(I, ISD::MUL);
4580 
4581  const Value *Src0 = I->getOperand(0);
4582  const Value *Src1 = I->getOperand(1);
4583  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4584  if (C->getValue().isPowerOf2())
4585  std::swap(Src0, Src1);
4586 
4587  // Try to simplify to a shift instruction.
4588  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4589  if (C->getValue().isPowerOf2()) {
4590  uint64_t ShiftVal = C->getValue().logBase2();
4591  MVT SrcVT = VT;
4592  bool IsZExt = true;
4593  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4594  if (!isIntExtFree(ZExt)) {
4595  MVT VT;
4596  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4597  SrcVT = VT;
4598  IsZExt = true;
4599  Src0 = ZExt->getOperand(0);
4600  }
4601  }
4602  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4603  if (!isIntExtFree(SExt)) {
4604  MVT VT;
4605  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4606  SrcVT = VT;
4607  IsZExt = false;
4608  Src0 = SExt->getOperand(0);
4609  }
4610  }
4611  }
4612 
4613  Register Src0Reg = getRegForValue(Src0);
4614  if (!Src0Reg)
4615  return false;
4616 
4617  unsigned ResultReg =
4618  emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4619 
4620  if (ResultReg) {
4621  updateValueMap(I, ResultReg);
4622  return true;
4623  }
4624  }
4625 
4626  Register Src0Reg = getRegForValue(I->getOperand(0));
4627  if (!Src0Reg)
4628  return false;
4629 
4630  Register Src1Reg = getRegForValue(I->getOperand(1));
4631  if (!Src1Reg)
4632  return false;
4633 
4634  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4635 
4636  if (!ResultReg)
4637  return false;
4638 
4639  updateValueMap(I, ResultReg);
4640  return true;
4641 }
4642 
4643 bool AArch64FastISel::selectShift(const Instruction *I) {
4644  MVT RetVT;
4645  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4646  return false;
4647 
4648  if (RetVT.isVector())
4649  return selectOperator(I, I->getOpcode());
4650 
4651  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4652  unsigned ResultReg = 0;
4653  uint64_t ShiftVal = C->getZExtValue();
4654  MVT SrcVT = RetVT;
4655  bool IsZExt = I->getOpcode() != Instruction::AShr;
4656  const Value *Op0 = I->getOperand(0);
4657  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4658  if (!isIntExtFree(ZExt)) {
4659  MVT TmpVT;
4660  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4661  SrcVT = TmpVT;
4662  IsZExt = true;
4663  Op0 = ZExt->getOperand(0);
4664  }
4665  }
4666  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4667  if (!isIntExtFree(SExt)) {
4668  MVT TmpVT;
4669  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4670  SrcVT = TmpVT;
4671  IsZExt = false;
4672  Op0 = SExt->getOperand(0);
4673  }
4674  }
4675  }
4676 
4677  Register Op0Reg = getRegForValue(Op0);
4678  if (!Op0Reg)
4679  return false;
4680 
4681  switch (I->getOpcode()) {
4682  default: llvm_unreachable("Unexpected instruction.");
4683  case Instruction::Shl:
4684  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4685  break;
4686  case Instruction::AShr:
4687  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4688  break;
4689  case Instruction::LShr:
4690  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4691  break;
4692  }
4693  if (!ResultReg)
4694  return false;
4695 
4696  updateValueMap(I, ResultReg);
4697  return true;
4698  }
4699 
4700  Register Op0Reg = getRegForValue(I->getOperand(0));
4701  if (!Op0Reg)
4702  return false;
4703 
4704  Register Op1Reg = getRegForValue(I->getOperand(1));
4705  if (!Op1Reg)
4706  return false;
4707 
4708  unsigned ResultReg = 0;
4709  switch (I->getOpcode()) {
4710  default: llvm_unreachable("Unexpected instruction.");
4711  case Instruction::Shl:
4712  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4713  break;
4714  case Instruction::AShr:
4715  ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4716  break;
4717  case Instruction::LShr:
4718  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4719  break;
4720  }
4721 
4722  if (!ResultReg)
4723  return false;
4724 
4725  updateValueMap(I, ResultReg);
4726  return true;
4727 }
4728 
4729 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4730  MVT RetVT, SrcVT;
4731 
4732  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4733  return false;
4734  if (!isTypeLegal(I->getType(), RetVT))
4735  return false;
4736 
4737  unsigned Opc;
4738  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4739  Opc = AArch64::FMOVWSr;
4740  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4741  Opc = AArch64::FMOVXDr;
4742  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4743  Opc = AArch64::FMOVSWr;
4744  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4745  Opc = AArch64::FMOVDXr;
4746  else
4747  return false;
4748 
4749  const TargetRegisterClass *RC = nullptr;
4750  switch (RetVT.SimpleTy) {
4751  default: llvm_unreachable("Unexpected value type.");
4752  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4753  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4754  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4755  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4756  }
4757  Register Op0Reg = getRegForValue(I->getOperand(0));
4758  if (!Op0Reg)
4759  return false;
4760 
4761  Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4762  if (!ResultReg)
4763  return false;
4764 
4765  updateValueMap(I, ResultReg);
4766  return true;
4767 }
4768 
4769 bool AArch64FastISel::selectFRem(const Instruction *I) {
4770  MVT RetVT;
4771  if (!isTypeLegal(I->getType(), RetVT))
4772  return false;
4773 
4774  RTLIB::Libcall LC;
4775  switch (RetVT.SimpleTy) {
4776  default:
4777  return false;
4778  case MVT::f32:
4779  LC = RTLIB::REM_F32;
4780  break;
4781  case MVT::f64:
4782  LC = RTLIB::REM_F64;
4783  break;
4784  }
4785 
4786  ArgListTy Args;
4787  Args.reserve(I->getNumOperands());
4788 
4789  // Populate the argument list.
4790  for (auto &Arg : I->operands()) {
4791  ArgListEntry Entry;
4792  Entry.Val = Arg;
4793  Entry.Ty = Arg->getType();
4794  Args.push_back(Entry);
4795  }
4796 
4797  CallLoweringInfo CLI;
4798  MCContext &Ctx = MF->getContext();
4799  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4800  TLI.getLibcallName(LC), std::move(Args));
4801  if (!lowerCallTo(CLI))
4802  return false;
4803  updateValueMap(I, CLI.ResultReg);
4804  return true;
4805 }
4806 
4807 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4808  MVT VT;
4809  if (!isTypeLegal(I->getType(), VT))
4810  return false;
4811 
4812  if (!isa<ConstantInt>(I->getOperand(1)))
4813  return selectBinaryOp(I, ISD::SDIV);
4814 
4815  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4816  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4817  !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4818  return selectBinaryOp(I, ISD::SDIV);
4819 
4820  unsigned Lg2 = C.countTrailingZeros();
4821  Register Src0Reg = getRegForValue(I->getOperand(0));
4822  if (!Src0Reg)
4823  return false;
4824 
4825  if (cast<BinaryOperator>(I)->isExact()) {
4826  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4827  if (!ResultReg)
4828  return false;
4829  updateValueMap(I, ResultReg);
4830  return true;
4831  }
4832 
4833  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4834  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4835  if (!AddReg)
4836  return false;
4837 
4838  // (Src0 < 0) ? Pow2 - 1 : 0;
4839  if (!emitICmp_ri(VT, Src0Reg, 0))
4840  return false;
4841 
4842  unsigned SelectOpc;
4843  const TargetRegisterClass *RC;
4844  if (VT == MVT::i64) {
4845  SelectOpc = AArch64::CSELXr;
4846  RC = &AArch64::GPR64RegClass;
4847  } else {
4848  SelectOpc = AArch64::CSELWr;
4849  RC = &AArch64::GPR32RegClass;
4850  }
4851  Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4852  AArch64CC::LT);
4853  if (!SelectReg)
4854  return false;
4855 
4856  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4857  // negate the result.
4858  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4859  unsigned ResultReg;
4860  if (C.isNegative())
4861  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4862  AArch64_AM::ASR, Lg2);
4863  else
4864  ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4865 
4866  if (!ResultReg)
4867  return false;
4868 
4869  updateValueMap(I, ResultReg);
4870  return true;
4871 }
4872 
4873 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4874 /// have to duplicate it for AArch64, because otherwise we would fail during the
4875 /// sign-extend emission.
4876 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4877  Register IdxN = getRegForValue(Idx);
4878  if (IdxN == 0)
4879  // Unhandled operand. Halt "fast" selection and bail.
4880  return 0;
4881 
4882  // If the index is smaller or larger than intptr_t, truncate or extend it.
4883  MVT PtrVT = TLI.getPointerTy(DL);
4884  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4885  if (IdxVT.bitsLT(PtrVT)) {
4886  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4887  } else if (IdxVT.bitsGT(PtrVT))
4888  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4889  return IdxN;
4890 }
4891 
4892 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4893 /// duplicate it for AArch64, because otherwise we would bail out even for
4894 /// simple cases. This is because the standard fastEmit functions don't cover
4895 /// MUL at all and ADD is lowered very inefficientily.
4896 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4897  if (Subtarget->isTargetILP32())
4898  return false;
4899 
4900  Register N = getRegForValue(I->getOperand(0));
4901  if (!N)
4902  return false;
4903 
4904  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4905  // into a single N = N + TotalOffset.
4906  uint64_t TotalOffs = 0;
4907  MVT VT = TLI.getPointerTy(DL);
4909  GTI != E; ++GTI) {
4910  const Value *Idx = GTI.getOperand();
4911  if (auto *StTy = GTI.getStructTypeOrNull()) {
4912  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4913  // N = N + Offset
4914  if (Field)
4915  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4916  } else {
4917  Type *Ty = GTI.getIndexedType();
4918 
4919  // If this is a constant subscript, handle it quickly.
4920  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4921  if (CI->isZero())
4922  continue;
4923  // N = N + Offset
4924  TotalOffs +=
4925  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4926  continue;
4927  }
4928  if (TotalOffs) {
4929  N = emitAdd_ri_(VT, N, TotalOffs);
4930  if (!N)
4931  return false;
4932  TotalOffs = 0;
4933  }
4934 
4935  // N = N + Idx * ElementSize;
4936  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4937  unsigned IdxN = getRegForGEPIndex(Idx);
4938  if (!IdxN)
4939  return false;
4940 
4941  if (ElementSize != 1) {
4942  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4943  if (!C)
4944  return false;
4945  IdxN = emitMul_rr(VT, IdxN, C);
4946  if (!IdxN)
4947  return false;
4948  }
4949  N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4950  if (!N)
4951  return false;
4952  }
4953  }
4954  if (TotalOffs) {
4955  N = emitAdd_ri_(VT, N, TotalOffs);
4956  if (!N)
4957  return false;
4958  }
4959  updateValueMap(I, N);
4960  return true;
4961 }
4962 
4963 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4964  assert(TM.getOptLevel() == CodeGenOpt::None &&
4965  "cmpxchg survived AtomicExpand at optlevel > -O0");
4966 
4967  auto *RetPairTy = cast<StructType>(I->getType());
4968  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4969  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4970  "cmpxchg has a non-i1 status result");
4971 
4972  MVT VT;
4973  if (!isTypeLegal(RetTy, VT))
4974  return false;
4975 
4976  const TargetRegisterClass *ResRC;
4977  unsigned Opc, CmpOpc;
4978  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4979  // extractvalue selection doesn't support that.
4980  if (VT == MVT::i32) {
4981  Opc = AArch64::CMP_SWAP_32;
4982  CmpOpc = AArch64::SUBSWrs;
4983  ResRC = &AArch64::GPR32RegClass;
4984  } else if (VT == MVT::i64) {
4985  Opc = AArch64::CMP_SWAP_64;
4986  CmpOpc = AArch64::SUBSXrs;
4987  ResRC = &AArch64::GPR64RegClass;
4988  } else {
4989  return false;
4990  }
4991 
4992  const MCInstrDesc &II = TII.get(Opc);
4993 
4994  const Register AddrReg = constrainOperandRegClass(
4995  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4996  const Register DesiredReg = constrainOperandRegClass(
4997  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4998  const Register NewReg = constrainOperandRegClass(
4999  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5000 
5001  const Register ResultReg1 = createResultReg(ResRC);
5002  const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5003  const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5004 
5005  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5006  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5007  .addDef(ResultReg1)
5008  .addDef(ScratchReg)
5009  .addUse(AddrReg)
5010  .addUse(DesiredReg)
5011  .addUse(NewReg);
5012 
5013  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5014  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5015  .addUse(ResultReg1)
5016  .addUse(DesiredReg)
5017  .addImm(0);
5018 
5019  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5020  .addDef(ResultReg2)
5021  .addUse(AArch64::WZR)
5022  .addUse(AArch64::WZR)
5024 
5025  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5026  updateValueMap(I, ResultReg1, 2);
5027  return true;
5028 }
5029 
5030 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5031  switch (I->getOpcode()) {
5032  default:
5033  break;
5034  case Instruction::Add:
5035  case Instruction::Sub:
5036  return selectAddSub(I);
5037  case Instruction::Mul:
5038  return selectMul(I);
5039  case Instruction::SDiv:
5040  return selectSDiv(I);
5041  case Instruction::SRem:
5042  if (!selectBinaryOp(I, ISD::SREM))
5043  return selectRem(I, ISD::SREM);
5044  return true;
5045  case Instruction::URem:
5046  if (!selectBinaryOp(I, ISD::UREM))
5047  return selectRem(I, ISD::UREM);
5048  return true;
5049  case Instruction::Shl:
5050  case Instruction::LShr:
5051  case Instruction::AShr:
5052  return selectShift(I);
5053  case Instruction::And:
5054  case Instruction::Or:
5055  case Instruction::Xor:
5056  return selectLogicalOp(I);
5057  case Instruction::Br:
5058  return selectBranch(I);
5059  case Instruction::IndirectBr:
5060  return selectIndirectBr(I);
5061  case Instruction::BitCast:
5062  if (!FastISel::selectBitCast(I))
5063  return selectBitCast(I);
5064  return true;
5065  case Instruction::FPToSI:
5066  if (!selectCast(I, ISD::FP_TO_SINT))
5067  return selectFPToInt(I, /*Signed=*/true);
5068  return true;
5069  case Instruction::FPToUI:
5070  return selectFPToInt(I, /*Signed=*/false);
5071  case Instruction::ZExt:
5072  case Instruction::SExt:
5073  return selectIntExt(I);
5074  case Instruction::Trunc:
5075  if (!selectCast(I, ISD::TRUNCATE))
5076  return selectTrunc(I);
5077  return true;
5078  case Instruction::FPExt:
5079  return selectFPExt(I);
5080  case Instruction::FPTrunc:
5081  return selectFPTrunc(I);
5082  case Instruction::SIToFP:
5083  if (!selectCast(I, ISD::SINT_TO_FP))
5084  return selectIntToFP(I, /*Signed=*/true);
5085  return true;
5086  case Instruction::UIToFP:
5087  return selectIntToFP(I, /*Signed=*/false);
5088  case Instruction::Load:
5089  return selectLoad(I);
5090  case Instruction::Store:
5091  return selectStore(I);
5092  case Instruction::FCmp:
5093  case Instruction::ICmp:
5094  return selectCmp(I);
5095  case Instruction::Select:
5096  return selectSelect(I);
5097  case Instruction::Ret:
5098  return selectRet(I);
5099  case Instruction::FRem:
5100  return selectFRem(I);
5101  case Instruction::GetElementPtr:
5102  return selectGetElementPtr(I);
5103  case Instruction::AtomicCmpXchg:
5104  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5105  }
5106 
5107  // fall-back to target-independent instruction selection.
5108  return selectOperator(I, I->getOpcode());
5109 }
5110 
5112  const TargetLibraryInfo *LibInfo) {
5113  return new AArch64FastISel(FuncInfo, LibInfo);
5114 }
llvm::FunctionLoweringInfo::Fn
const Function * Fn
Definition: FunctionLoweringInfo.h:54
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:734
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:272
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:245
AArch64RegisterInfo.h
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4635
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1076
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87
AArch64MachineFunctionInfo.h
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
AArch64.h
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3017
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
optimizeSelect
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeSelect
Definition: README.txt:81
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:322
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
MCInstrDesc.h
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259
RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:126
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:74
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
GetElementPtrTypeIterator.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:140
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:742
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:727
ErrorHandling.h
llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition: AArch64InstrInfo.cpp:8000
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::AArch64RegisterInfo::isAnyArgRegReserved
bool isAnyArgRegReserved(const MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:348
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
Registers
SI Pre allocate WWM Registers
Definition: SIPreAllocateWWMRegs.cpp:80
AArch64BaseInfo.h
llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:242
MachineBasicBlock.h
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
APInt.h
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1909
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
Shift
bool Shift
Definition: README.txt:468
getCompareCC
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
Definition: AArch64FastISel.cpp:2194
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:301
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:488
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
DenseMap.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
llvm::AArch64RegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: AArch64RegisterInfo.cpp:422
llvm::CC_AArch64_GHC
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:44
Operator.h
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:749
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:723
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition: RuntimeDyld.cpp:174
llvm::ARCISD::BL
@ BL
Definition: ARCISelLowering.h:34
F
#define F(x, y, z)
Definition: MD5.cpp:55
MachineRegisterInfo.h
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
MachineValueType.h
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:733
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
<