LLVM  10.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/FastISel.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82  using BaseKind = enum {
83  RegBase,
84  FrameIndexBase
85  };
86 
87  private:
88  BaseKind Kind = RegBase;
90  union {
91  unsigned Reg;
92  int FI;
93  } Base;
94  unsigned OffsetReg = 0;
95  unsigned Shift = 0;
96  int64_t Offset = 0;
97  const GlobalValue *GV = nullptr;
98 
99  public:
100  Address() { Base.Reg = 0; }
101 
102  void setKind(BaseKind K) { Kind = K; }
103  BaseKind getKind() const { return Kind; }
104  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  bool isRegBase() const { return Kind == RegBase; }
107  bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109  void setReg(unsigned Reg) {
110  assert(isRegBase() && "Invalid base register access!");
111  Base.Reg = Reg;
112  }
113 
114  unsigned getReg() const {
115  assert(isRegBase() && "Invalid base register access!");
116  return Base.Reg;
117  }
118 
119  void setOffsetReg(unsigned Reg) {
120  OffsetReg = Reg;
121  }
122 
123  unsigned getOffsetReg() const {
124  return OffsetReg;
125  }
126 
127  void setFI(unsigned FI) {
128  assert(isFIBase() && "Invalid base frame index access!");
129  Base.FI = FI;
130  }
131 
132  unsigned getFI() const {
133  assert(isFIBase() && "Invalid base frame index access!");
134  return Base.FI;
135  }
136 
137  void setOffset(int64_t O) { Offset = O; }
138  int64_t getOffset() { return Offset; }
139  void setShift(unsigned S) { Shift = S; }
140  unsigned getShift() { return Shift; }
141 
142  void setGlobalValue(const GlobalValue *G) { GV = G; }
143  const GlobalValue *getGlobalValue() { return GV; }
144  };
145 
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
150 
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190  unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193  unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195  const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199 
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202  const Value *RHS, bool SetFlags = false,
203  bool WantResult = true, bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206  bool SetFlags = false, bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209  bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212  AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218  uint64_t ShiftImm, bool SetFlags = false,
219  bool WantResult = true);
220 
221  // Emit functions.
222  bool emitCompareAndBranch(const BranchInst *BI);
223  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228  MachineMemOperand *MMO = nullptr);
229  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230  MachineMemOperand *MMO = nullptr);
231  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232  MachineMemOperand *MMO = nullptr);
233  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236  bool SetFlags = false, bool WantResult = true,
237  bool IsZExt = false);
238  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240  bool SetFlags = false, bool WantResult = true,
241  bool IsZExt = false);
242  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245  unsigned RHSReg, bool RHSIsKill,
246  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247  bool WantResult = true);
248  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249  const Value *RHS);
250  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  bool LHSIsKill, uint64_t Imm);
252  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254  uint64_t ShiftImm);
255  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257  unsigned Op1, bool Op1IsKill);
258  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259  unsigned Op1, bool Op1IsKill);
260  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261  unsigned Op1, bool Op1IsKill);
262  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263  unsigned Op1Reg, bool Op1IsKill);
264  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265  uint64_t Imm, bool IsZExt = true);
266  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267  unsigned Op1Reg, bool Op1IsKill);
268  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269  uint64_t Imm, bool IsZExt = true);
270  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271  unsigned Op1Reg, bool Op1IsKill);
272  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273  uint64_t Imm, bool IsZExt = false);
274 
275  unsigned materializeInt(const ConstantInt *CI, MVT VT);
276  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277  unsigned materializeGV(const GlobalValue *GV);
278 
279  // Call handling routines.
280 private:
281  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283  unsigned &NumBytes);
284  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285 
286 public:
287  // Backend specific FastISel code.
288  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289  unsigned fastMaterializeConstant(const Constant *C) override;
290  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291 
292  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293  const TargetLibraryInfo *LibInfo)
294  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295  Subtarget =
296  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297  Context = &FuncInfo.Fn->getContext();
298  }
299 
300  bool fastSelectInstruction(const Instruction *I) override;
301 
302 #include "AArch64GenFastISel.inc"
303 };
304 
305 } // end anonymous namespace
306 
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310  "Unexpected integer extend instruction.");
311  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312  "Unexpected value type.");
313  bool IsZExt = isa<ZExtInst>(I);
314 
315  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316  if (LI->hasOneUse())
317  return true;
318 
319  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321  return true;
322 
323  return false;
324 }
325 
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329  switch (VT.SimpleTy) {
330  default:
331  return 0; // invalid
332  case MVT::i1: // fall-through
333  case MVT::i8:
334  return 1;
335  case MVT::i16:
336  return 2;
337  case MVT::i32: // fall-through
338  case MVT::f32:
339  return 4;
340  case MVT::i64: // fall-through
341  case MVT::f64:
342  return 8;
343  }
344 }
345 
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347  if (CC == CallingConv::WebKit_JS)
348  return CC_AArch64_WebKit_JS;
349  if (CC == CallingConv::GHC)
350  return CC_AArch64_GHC;
351  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
352 }
353 
354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356  "Alloca should always return a pointer.");
357 
358  // Don't handle dynamic allocas.
359  if (!FuncInfo.StaticAllocaMap.count(AI))
360  return 0;
361 
363  FuncInfo.StaticAllocaMap.find(AI);
364 
365  if (SI != FuncInfo.StaticAllocaMap.end()) {
366  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368  ResultReg)
369  .addFrameIndex(SI->second)
370  .addImm(0)
371  .addImm(0);
372  return ResultReg;
373  }
374 
375  return 0;
376 }
377 
378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379  if (VT > MVT::i64)
380  return 0;
381 
382  if (!CI->isZero())
383  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
384 
385  // Create a copy from the zero register to materialize a "0" value.
386  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
387  : &AArch64::GPR32RegClass;
388  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
389  unsigned ResultReg = createResultReg(RC);
390  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391  ResultReg).addReg(ZeroReg, getKillRegState(true));
392  return ResultReg;
393 }
394 
395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396  // Positive zero (+0.0) has to be materialized with a fmov from the zero
397  // register, because the immediate version of fmov cannot encode zero.
398  if (CFP->isNullValue())
399  return fastMaterializeFloatZero(CFP);
400 
401  if (VT != MVT::f32 && VT != MVT::f64)
402  return 0;
403 
404  const APFloat Val = CFP->getValueAPF();
405  bool Is64Bit = (VT == MVT::f64);
406  // This checks to see if we can use FMOV instructions to materialize
407  // a constant, otherwise we have to materialize via the constant pool.
408  int Imm =
409  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
410  if (Imm != -1) {
411  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
412  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
413  }
414 
415  // For the MachO large code model materialize the FP constant in code.
416  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
417  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
418  const TargetRegisterClass *RC = Is64Bit ?
419  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
420 
421  unsigned TmpReg = createResultReg(RC);
422  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
424 
425  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427  TII.get(TargetOpcode::COPY), ResultReg)
428  .addReg(TmpReg, getKillRegState(true));
429 
430  return ResultReg;
431  }
432 
433  // Materialize via constant pool. MachineConstantPool wants an explicit
434  // alignment.
435  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436  if (Align == 0)
437  Align = DL.getTypeAllocSize(CFP->getType());
438 
439  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443 
444  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447  .addReg(ADRPReg)
449  return ResultReg;
450 }
451 
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453  // We can't handle thread-local variables quickly yet.
454  if (GV->isThreadLocal())
455  return 0;
456 
457  // MachO still uses GOT for large code-model accesses, but ELF requires
458  // movz/movk sequences, which FastISel doesn't handle yet.
459  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460  return 0;
461 
462  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463 
464  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465  if (!DestEVT.isSimple())
466  return 0;
467 
468  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469  unsigned ResultReg;
470 
471  if (OpFlags & AArch64II::MO_GOT) {
472  // ADRP + LDRX
473  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474  ADRPReg)
475  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476 
477  ResultReg = createResultReg(&AArch64::GPR64RegClass);
478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
479  ResultReg)
480  .addReg(ADRPReg)
481  .addGlobalAddress(GV, 0,
483  } else {
484  // ADRP + ADDX
485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
486  ADRPReg)
487  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
488 
489  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
490  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
491  ResultReg)
492  .addReg(ADRPReg)
493  .addGlobalAddress(GV, 0,
495  .addImm(0);
496  }
497  return ResultReg;
498 }
499 
500 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
501  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
502 
503  // Only handle simple types.
504  if (!CEVT.isSimple())
505  return 0;
506  MVT VT = CEVT.getSimpleVT();
507 
508  if (const auto *CI = dyn_cast<ConstantInt>(C))
509  return materializeInt(CI, VT);
510  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
511  return materializeFP(CFP, VT);
512  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
513  return materializeGV(GV);
514 
515  return 0;
516 }
517 
518 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
519  assert(CFP->isNullValue() &&
520  "Floating-point constant is not a positive zero.");
521  MVT VT;
522  if (!isTypeLegal(CFP->getType(), VT))
523  return 0;
524 
525  if (VT != MVT::f32 && VT != MVT::f64)
526  return 0;
527 
528  bool Is64Bit = (VT == MVT::f64);
529  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
530  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
531  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
532 }
533 
534 /// Check if the multiply is by a power-of-2 constant.
535 static bool isMulPowOf2(const Value *I) {
536  if (const auto *MI = dyn_cast<MulOperator>(I)) {
537  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
538  if (C->getValue().isPowerOf2())
539  return true;
540  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
541  if (C->getValue().isPowerOf2())
542  return true;
543  }
544  return false;
545 }
546 
547 // Computes the address to get to an object.
548 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
549 {
550  const User *U = nullptr;
551  unsigned Opcode = Instruction::UserOp1;
552  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
553  // Don't walk into other basic blocks unless the object is an alloca from
554  // another block, otherwise it may not have a virtual register assigned.
555  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
556  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
557  Opcode = I->getOpcode();
558  U = I;
559  }
560  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
561  Opcode = C->getOpcode();
562  U = C;
563  }
564 
565  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
566  if (Ty->getAddressSpace() > 255)
567  // Fast instruction selection doesn't support the special
568  // address spaces.
569  return false;
570 
571  switch (Opcode) {
572  default:
573  break;
574  case Instruction::BitCast:
575  // Look through bitcasts.
576  return computeAddress(U->getOperand(0), Addr, Ty);
577 
578  case Instruction::IntToPtr:
579  // Look past no-op inttoptrs.
580  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
581  TLI.getPointerTy(DL))
582  return computeAddress(U->getOperand(0), Addr, Ty);
583  break;
584 
585  case Instruction::PtrToInt:
586  // Look past no-op ptrtoints.
587  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
588  return computeAddress(U->getOperand(0), Addr, Ty);
589  break;
590 
591  case Instruction::GetElementPtr: {
592  Address SavedAddr = Addr;
593  uint64_t TmpOffset = Addr.getOffset();
594 
595  // Iterate through the GEP folding the constants into offsets where
596  // we can.
597  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
598  GTI != E; ++GTI) {
599  const Value *Op = GTI.getOperand();
600  if (StructType *STy = GTI.getStructTypeOrNull()) {
601  const StructLayout *SL = DL.getStructLayout(STy);
602  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
603  TmpOffset += SL->getElementOffset(Idx);
604  } else {
605  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
606  while (true) {
607  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
608  // Constant-offset addressing.
609  TmpOffset += CI->getSExtValue() * S;
610  break;
611  }
612  if (canFoldAddIntoGEP(U, Op)) {
613  // A compatible add with a constant operand. Fold the constant.
614  ConstantInt *CI =
615  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
616  TmpOffset += CI->getSExtValue() * S;
617  // Iterate on the other operand.
618  Op = cast<AddOperator>(Op)->getOperand(0);
619  continue;
620  }
621  // Unsupported
622  goto unsupported_gep;
623  }
624  }
625  }
626 
627  // Try to grab the base operand now.
628  Addr.setOffset(TmpOffset);
629  if (computeAddress(U->getOperand(0), Addr, Ty))
630  return true;
631 
632  // We failed, restore everything and try the other options.
633  Addr = SavedAddr;
634 
635  unsupported_gep:
636  break;
637  }
638  case Instruction::Alloca: {
639  const AllocaInst *AI = cast<AllocaInst>(Obj);
641  FuncInfo.StaticAllocaMap.find(AI);
642  if (SI != FuncInfo.StaticAllocaMap.end()) {
643  Addr.setKind(Address::FrameIndexBase);
644  Addr.setFI(SI->second);
645  return true;
646  }
647  break;
648  }
649  case Instruction::Add: {
650  // Adds of constants are common and easy enough.
651  const Value *LHS = U->getOperand(0);
652  const Value *RHS = U->getOperand(1);
653 
654  if (isa<ConstantInt>(LHS))
655  std::swap(LHS, RHS);
656 
657  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
658  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
659  return computeAddress(LHS, Addr, Ty);
660  }
661 
662  Address Backup = Addr;
663  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
664  return true;
665  Addr = Backup;
666 
667  break;
668  }
669  case Instruction::Sub: {
670  // Subs of constants are common and easy enough.
671  const Value *LHS = U->getOperand(0);
672  const Value *RHS = U->getOperand(1);
673 
674  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
676  return computeAddress(LHS, Addr, Ty);
677  }
678  break;
679  }
680  case Instruction::Shl: {
681  if (Addr.getOffsetReg())
682  break;
683 
684  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
685  if (!CI)
686  break;
687 
688  unsigned Val = CI->getZExtValue();
689  if (Val < 1 || Val > 3)
690  break;
691 
692  uint64_t NumBytes = 0;
693  if (Ty && Ty->isSized()) {
694  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
695  NumBytes = NumBits / 8;
696  if (!isPowerOf2_64(NumBits))
697  NumBytes = 0;
698  }
699 
700  if (NumBytes != (1ULL << Val))
701  break;
702 
703  Addr.setShift(Val);
704  Addr.setExtendType(AArch64_AM::LSL);
705 
706  const Value *Src = U->getOperand(0);
707  if (const auto *I = dyn_cast<Instruction>(Src)) {
708  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
709  // Fold the zext or sext when it won't become a noop.
710  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
711  if (!isIntExtFree(ZE) &&
712  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
713  Addr.setExtendType(AArch64_AM::UXTW);
714  Src = ZE->getOperand(0);
715  }
716  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
717  if (!isIntExtFree(SE) &&
718  SE->getOperand(0)->getType()->isIntegerTy(32)) {
719  Addr.setExtendType(AArch64_AM::SXTW);
720  Src = SE->getOperand(0);
721  }
722  }
723  }
724  }
725 
726  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
727  if (AI->getOpcode() == Instruction::And) {
728  const Value *LHS = AI->getOperand(0);
729  const Value *RHS = AI->getOperand(1);
730 
731  if (const auto *C = dyn_cast<ConstantInt>(LHS))
732  if (C->getValue() == 0xffffffff)
733  std::swap(LHS, RHS);
734 
735  if (const auto *C = dyn_cast<ConstantInt>(RHS))
736  if (C->getValue() == 0xffffffff) {
737  Addr.setExtendType(AArch64_AM::UXTW);
738  unsigned Reg = getRegForValue(LHS);
739  if (!Reg)
740  return false;
741  bool RegIsKill = hasTrivialKill(LHS);
742  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
743  AArch64::sub_32);
744  Addr.setOffsetReg(Reg);
745  return true;
746  }
747  }
748 
749  unsigned Reg = getRegForValue(Src);
750  if (!Reg)
751  return false;
752  Addr.setOffsetReg(Reg);
753  return true;
754  }
755  case Instruction::Mul: {
756  if (Addr.getOffsetReg())
757  break;
758 
759  if (!isMulPowOf2(U))
760  break;
761 
762  const Value *LHS = U->getOperand(0);
763  const Value *RHS = U->getOperand(1);
764 
765  // Canonicalize power-of-2 value to the RHS.
766  if (const auto *C = dyn_cast<ConstantInt>(LHS))
767  if (C->getValue().isPowerOf2())
768  std::swap(LHS, RHS);
769 
770  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
771  const auto *C = cast<ConstantInt>(RHS);
772  unsigned Val = C->getValue().logBase2();
773  if (Val < 1 || Val > 3)
774  break;
775 
776  uint64_t NumBytes = 0;
777  if (Ty && Ty->isSized()) {
778  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
779  NumBytes = NumBits / 8;
780  if (!isPowerOf2_64(NumBits))
781  NumBytes = 0;
782  }
783 
784  if (NumBytes != (1ULL << Val))
785  break;
786 
787  Addr.setShift(Val);
788  Addr.setExtendType(AArch64_AM::LSL);
789 
790  const Value *Src = LHS;
791  if (const auto *I = dyn_cast<Instruction>(Src)) {
792  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
793  // Fold the zext or sext when it won't become a noop.
794  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
795  if (!isIntExtFree(ZE) &&
796  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
797  Addr.setExtendType(AArch64_AM::UXTW);
798  Src = ZE->getOperand(0);
799  }
800  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
801  if (!isIntExtFree(SE) &&
802  SE->getOperand(0)->getType()->isIntegerTy(32)) {
803  Addr.setExtendType(AArch64_AM::SXTW);
804  Src = SE->getOperand(0);
805  }
806  }
807  }
808  }
809 
810  unsigned Reg = getRegForValue(Src);
811  if (!Reg)
812  return false;
813  Addr.setOffsetReg(Reg);
814  return true;
815  }
816  case Instruction::And: {
817  if (Addr.getOffsetReg())
818  break;
819 
820  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
821  break;
822 
823  const Value *LHS = U->getOperand(0);
824  const Value *RHS = U->getOperand(1);
825 
826  if (const auto *C = dyn_cast<ConstantInt>(LHS))
827  if (C->getValue() == 0xffffffff)
828  std::swap(LHS, RHS);
829 
830  if (const auto *C = dyn_cast<ConstantInt>(RHS))
831  if (C->getValue() == 0xffffffff) {
832  Addr.setShift(0);
833  Addr.setExtendType(AArch64_AM::LSL);
834  Addr.setExtendType(AArch64_AM::UXTW);
835 
836  unsigned Reg = getRegForValue(LHS);
837  if (!Reg)
838  return false;
839  bool RegIsKill = hasTrivialKill(LHS);
840  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
841  AArch64::sub_32);
842  Addr.setOffsetReg(Reg);
843  return true;
844  }
845  break;
846  }
847  case Instruction::SExt:
848  case Instruction::ZExt: {
849  if (!Addr.getReg() || Addr.getOffsetReg())
850  break;
851 
852  const Value *Src = nullptr;
853  // Fold the zext or sext when it won't become a noop.
854  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
855  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
856  Addr.setExtendType(AArch64_AM::UXTW);
857  Src = ZE->getOperand(0);
858  }
859  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
860  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
861  Addr.setExtendType(AArch64_AM::SXTW);
862  Src = SE->getOperand(0);
863  }
864  }
865 
866  if (!Src)
867  break;
868 
869  Addr.setShift(0);
870  unsigned Reg = getRegForValue(Src);
871  if (!Reg)
872  return false;
873  Addr.setOffsetReg(Reg);
874  return true;
875  }
876  } // end switch
877 
878  if (Addr.isRegBase() && !Addr.getReg()) {
879  unsigned Reg = getRegForValue(Obj);
880  if (!Reg)
881  return false;
882  Addr.setReg(Reg);
883  return true;
884  }
885 
886  if (!Addr.getOffsetReg()) {
887  unsigned Reg = getRegForValue(Obj);
888  if (!Reg)
889  return false;
890  Addr.setOffsetReg(Reg);
891  return true;
892  }
893 
894  return false;
895 }
896 
897 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
898  const User *U = nullptr;
899  unsigned Opcode = Instruction::UserOp1;
900  bool InMBB = true;
901 
902  if (const auto *I = dyn_cast<Instruction>(V)) {
903  Opcode = I->getOpcode();
904  U = I;
905  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
906  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
907  Opcode = C->getOpcode();
908  U = C;
909  }
910 
911  switch (Opcode) {
912  default: break;
913  case Instruction::BitCast:
914  // Look past bitcasts if its operand is in the same BB.
915  if (InMBB)
916  return computeCallAddress(U->getOperand(0), Addr);
917  break;
918  case Instruction::IntToPtr:
919  // Look past no-op inttoptrs if its operand is in the same BB.
920  if (InMBB &&
921  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
922  TLI.getPointerTy(DL))
923  return computeCallAddress(U->getOperand(0), Addr);
924  break;
925  case Instruction::PtrToInt:
926  // Look past no-op ptrtoints if its operand is in the same BB.
927  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
928  return computeCallAddress(U->getOperand(0), Addr);
929  break;
930  }
931 
932  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
933  Addr.setGlobalValue(GV);
934  return true;
935  }
936 
937  // If all else fails, try to materialize the value in a register.
938  if (!Addr.getGlobalValue()) {
939  Addr.setReg(getRegForValue(V));
940  return Addr.getReg() != 0;
941  }
942 
943  return false;
944 }
945 
946 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
947  EVT evt = TLI.getValueType(DL, Ty, true);
948 
949  // Only handle simple types.
950  if (evt == MVT::Other || !evt.isSimple())
951  return false;
952  VT = evt.getSimpleVT();
953 
954  // This is a legal type, but it's not something we handle in fast-isel.
955  if (VT == MVT::f128)
956  return false;
957 
958  // Handle all other legal types, i.e. a register that will directly hold this
959  // value.
960  return TLI.isTypeLegal(VT);
961 }
962 
963 /// Determine if the value type is supported by FastISel.
964 ///
965 /// FastISel for AArch64 can handle more value types than are legal. This adds
966 /// simple value type such as i1, i8, and i16.
967 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
968  if (Ty->isVectorTy() && !IsVectorAllowed)
969  return false;
970 
971  if (isTypeLegal(Ty, VT))
972  return true;
973 
974  // If this is a type than can be sign or zero-extended to a basic operation
975  // go ahead and accept it now.
976  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
977  return true;
978 
979  return false;
980 }
981 
982 bool AArch64FastISel::isValueAvailable(const Value *V) const {
983  if (!isa<Instruction>(V))
984  return true;
985 
986  const auto *I = cast<Instruction>(V);
987  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
988 }
989 
990 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
991  unsigned ScaleFactor = getImplicitScaleFactor(VT);
992  if (!ScaleFactor)
993  return false;
994 
995  bool ImmediateOffsetNeedsLowering = false;
996  bool RegisterOffsetNeedsLowering = false;
997  int64_t Offset = Addr.getOffset();
998  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
999  ImmediateOffsetNeedsLowering = true;
1000  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1001  !isUInt<12>(Offset / ScaleFactor))
1002  ImmediateOffsetNeedsLowering = true;
1003 
1004  // Cannot encode an offset register and an immediate offset in the same
1005  // instruction. Fold the immediate offset into the load/store instruction and
1006  // emit an additional add to take care of the offset register.
1007  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1008  RegisterOffsetNeedsLowering = true;
1009 
1010  // Cannot encode zero register as base.
1011  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1012  RegisterOffsetNeedsLowering = true;
1013 
1014  // If this is a stack pointer and the offset needs to be simplified then put
1015  // the alloca address into a register, set the base type back to register and
1016  // continue. This should almost never happen.
1017  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1018  {
1019  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1020  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1021  ResultReg)
1022  .addFrameIndex(Addr.getFI())
1023  .addImm(0)
1024  .addImm(0);
1025  Addr.setKind(Address::RegBase);
1026  Addr.setReg(ResultReg);
1027  }
1028 
1029  if (RegisterOffsetNeedsLowering) {
1030  unsigned ResultReg = 0;
1031  if (Addr.getReg()) {
1032  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1033  Addr.getExtendType() == AArch64_AM::UXTW )
1034  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1035  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1036  /*TODO:IsKill=*/false, Addr.getExtendType(),
1037  Addr.getShift());
1038  else
1039  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1040  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1041  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1042  Addr.getShift());
1043  } else {
1044  if (Addr.getExtendType() == AArch64_AM::UXTW)
1045  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1046  /*Op0IsKill=*/false, Addr.getShift(),
1047  /*IsZExt=*/true);
1048  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1049  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050  /*Op0IsKill=*/false, Addr.getShift(),
1051  /*IsZExt=*/false);
1052  else
1053  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1054  /*Op0IsKill=*/false, Addr.getShift());
1055  }
1056  if (!ResultReg)
1057  return false;
1058 
1059  Addr.setReg(ResultReg);
1060  Addr.setOffsetReg(0);
1061  Addr.setShift(0);
1062  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1063  }
1064 
1065  // Since the offset is too large for the load/store instruction get the
1066  // reg+offset into a register.
1067  if (ImmediateOffsetNeedsLowering) {
1068  unsigned ResultReg;
1069  if (Addr.getReg())
1070  // Try to fold the immediate into the add instruction.
1071  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1072  else
1073  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1074 
1075  if (!ResultReg)
1076  return false;
1077  Addr.setReg(ResultReg);
1078  Addr.setOffset(0);
1079  }
1080  return true;
1081 }
1082 
1083 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1084  const MachineInstrBuilder &MIB,
1086  unsigned ScaleFactor,
1087  MachineMemOperand *MMO) {
1088  int64_t Offset = Addr.getOffset() / ScaleFactor;
1089  // Frame base works a bit differently. Handle it separately.
1090  if (Addr.isFIBase()) {
1091  int FI = Addr.getFI();
1092  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1093  // and alignment should be based on the VT.
1094  MMO = FuncInfo.MF->getMachineMemOperand(
1095  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1096  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1097  // Now add the rest of the operands.
1098  MIB.addFrameIndex(FI).addImm(Offset);
1099  } else {
1100  assert(Addr.isRegBase() && "Unexpected address kind.");
1101  const MCInstrDesc &II = MIB->getDesc();
1102  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1103  Addr.setReg(
1104  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1105  Addr.setOffsetReg(
1106  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1107  if (Addr.getOffsetReg()) {
1108  assert(Addr.getOffset() == 0 && "Unexpected offset");
1109  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1110  Addr.getExtendType() == AArch64_AM::SXTX;
1111  MIB.addReg(Addr.getReg());
1112  MIB.addReg(Addr.getOffsetReg());
1113  MIB.addImm(IsSigned);
1114  MIB.addImm(Addr.getShift() != 0);
1115  } else
1116  MIB.addReg(Addr.getReg()).addImm(Offset);
1117  }
1118 
1119  if (MMO)
1120  MIB.addMemOperand(MMO);
1121 }
1122 
1123 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1124  const Value *RHS, bool SetFlags,
1125  bool WantResult, bool IsZExt) {
1127  bool NeedExtend = false;
1128  switch (RetVT.SimpleTy) {
1129  default:
1130  return 0;
1131  case MVT::i1:
1132  NeedExtend = true;
1133  break;
1134  case MVT::i8:
1135  NeedExtend = true;
1136  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1137  break;
1138  case MVT::i16:
1139  NeedExtend = true;
1140  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1141  break;
1142  case MVT::i32: // fall-through
1143  case MVT::i64:
1144  break;
1145  }
1146  MVT SrcVT = RetVT;
1147  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1148 
1149  // Canonicalize immediates to the RHS first.
1150  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1151  std::swap(LHS, RHS);
1152 
1153  // Canonicalize mul by power of 2 to the RHS.
1154  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1155  if (isMulPowOf2(LHS))
1156  std::swap(LHS, RHS);
1157 
1158  // Canonicalize shift immediate to the RHS.
1159  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1160  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1161  if (isa<ConstantInt>(SI->getOperand(1)))
1162  if (SI->getOpcode() == Instruction::Shl ||
1163  SI->getOpcode() == Instruction::LShr ||
1164  SI->getOpcode() == Instruction::AShr )
1165  std::swap(LHS, RHS);
1166 
1167  unsigned LHSReg = getRegForValue(LHS);
1168  if (!LHSReg)
1169  return 0;
1170  bool LHSIsKill = hasTrivialKill(LHS);
1171 
1172  if (NeedExtend)
1173  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1174 
1175  unsigned ResultReg = 0;
1176  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1177  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1178  if (C->isNegative())
1179  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1180  SetFlags, WantResult);
1181  else
1182  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1183  WantResult);
1184  } else if (const auto *C = dyn_cast<Constant>(RHS))
1185  if (C->isNullValue())
1186  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1187  WantResult);
1188 
1189  if (ResultReg)
1190  return ResultReg;
1191 
1192  // Only extend the RHS within the instruction if there is a valid extend type.
1193  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1194  isValueAvailable(RHS)) {
1195  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1196  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1197  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1198  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1199  if (!RHSReg)
1200  return 0;
1201  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1202  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1203  RHSIsKill, ExtendType, C->getZExtValue(),
1204  SetFlags, WantResult);
1205  }
1206  unsigned RHSReg = getRegForValue(RHS);
1207  if (!RHSReg)
1208  return 0;
1209  bool RHSIsKill = hasTrivialKill(RHS);
1210  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1211  ExtendType, 0, SetFlags, WantResult);
1212  }
1213 
1214  // Check if the mul can be folded into the instruction.
1215  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1216  if (isMulPowOf2(RHS)) {
1217  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1218  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1219 
1220  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1221  if (C->getValue().isPowerOf2())
1222  std::swap(MulLHS, MulRHS);
1223 
1224  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1225  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1226  unsigned RHSReg = getRegForValue(MulLHS);
1227  if (!RHSReg)
1228  return 0;
1229  bool RHSIsKill = hasTrivialKill(MulLHS);
1230  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1231  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1232  WantResult);
1233  if (ResultReg)
1234  return ResultReg;
1235  }
1236  }
1237 
1238  // Check if the shift can be folded into the instruction.
1239  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1240  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1241  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1243  switch (SI->getOpcode()) {
1244  default: break;
1245  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1246  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1247  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1248  }
1249  uint64_t ShiftVal = C->getZExtValue();
1250  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1251  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1252  if (!RHSReg)
1253  return 0;
1254  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1255  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1256  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1257  WantResult);
1258  if (ResultReg)
1259  return ResultReg;
1260  }
1261  }
1262  }
1263  }
1264 
1265  unsigned RHSReg = getRegForValue(RHS);
1266  if (!RHSReg)
1267  return 0;
1268  bool RHSIsKill = hasTrivialKill(RHS);
1269 
1270  if (NeedExtend)
1271  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1272 
1273  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1274  SetFlags, WantResult);
1275 }
1276 
1277 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278  bool LHSIsKill, unsigned RHSReg,
1279  bool RHSIsKill, bool SetFlags,
1280  bool WantResult) {
1281  assert(LHSReg && RHSReg && "Invalid register number.");
1282 
1283  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1284  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1285  return 0;
1286 
1287  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1288  return 0;
1289 
1290  static const unsigned OpcTable[2][2][2] = {
1291  { { AArch64::SUBWrr, AArch64::SUBXrr },
1292  { AArch64::ADDWrr, AArch64::ADDXrr } },
1293  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1294  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1295  };
1296  bool Is64Bit = RetVT == MVT::i64;
1297  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1298  const TargetRegisterClass *RC =
1299  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1300  unsigned ResultReg;
1301  if (WantResult)
1302  ResultReg = createResultReg(RC);
1303  else
1304  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1305 
1306  const MCInstrDesc &II = TII.get(Opc);
1307  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1309  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1310  .addReg(LHSReg, getKillRegState(LHSIsKill))
1311  .addReg(RHSReg, getKillRegState(RHSIsKill));
1312  return ResultReg;
1313 }
1314 
1315 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316  bool LHSIsKill, uint64_t Imm,
1317  bool SetFlags, bool WantResult) {
1318  assert(LHSReg && "Invalid register number.");
1319 
1320  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1321  return 0;
1322 
1323  unsigned ShiftImm;
1324  if (isUInt<12>(Imm))
1325  ShiftImm = 0;
1326  else if ((Imm & 0xfff000) == Imm) {
1327  ShiftImm = 12;
1328  Imm >>= 12;
1329  } else
1330  return 0;
1331 
1332  static const unsigned OpcTable[2][2][2] = {
1333  { { AArch64::SUBWri, AArch64::SUBXri },
1334  { AArch64::ADDWri, AArch64::ADDXri } },
1335  { { AArch64::SUBSWri, AArch64::SUBSXri },
1336  { AArch64::ADDSWri, AArch64::ADDSXri } }
1337  };
1338  bool Is64Bit = RetVT == MVT::i64;
1339  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340  const TargetRegisterClass *RC;
1341  if (SetFlags)
1342  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1343  else
1344  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1345  unsigned ResultReg;
1346  if (WantResult)
1347  ResultReg = createResultReg(RC);
1348  else
1349  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1350 
1351  const MCInstrDesc &II = TII.get(Opc);
1352  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1354  .addReg(LHSReg, getKillRegState(LHSIsKill))
1355  .addImm(Imm)
1356  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1357  return ResultReg;
1358 }
1359 
1360 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1361  bool LHSIsKill, unsigned RHSReg,
1362  bool RHSIsKill,
1363  AArch64_AM::ShiftExtendType ShiftType,
1364  uint64_t ShiftImm, bool SetFlags,
1365  bool WantResult) {
1366  assert(LHSReg && RHSReg && "Invalid register number.");
1367  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1368  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1369 
1370  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1371  return 0;
1372 
1373  // Don't deal with undefined shifts.
1374  if (ShiftImm >= RetVT.getSizeInBits())
1375  return 0;
1376 
1377  static const unsigned OpcTable[2][2][2] = {
1378  { { AArch64::SUBWrs, AArch64::SUBXrs },
1379  { AArch64::ADDWrs, AArch64::ADDXrs } },
1380  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1381  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1382  };
1383  bool Is64Bit = RetVT == MVT::i64;
1384  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385  const TargetRegisterClass *RC =
1386  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1387  unsigned ResultReg;
1388  if (WantResult)
1389  ResultReg = createResultReg(RC);
1390  else
1391  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1392 
1393  const MCInstrDesc &II = TII.get(Opc);
1394  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1395  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1396  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1397  .addReg(LHSReg, getKillRegState(LHSIsKill))
1398  .addReg(RHSReg, getKillRegState(RHSIsKill))
1399  .addImm(getShifterImm(ShiftType, ShiftImm));
1400  return ResultReg;
1401 }
1402 
1403 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1404  bool LHSIsKill, unsigned RHSReg,
1405  bool RHSIsKill,
1407  uint64_t ShiftImm, bool SetFlags,
1408  bool WantResult) {
1409  assert(LHSReg && RHSReg && "Invalid register number.");
1410  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414  return 0;
1415 
1416  if (ShiftImm >= 4)
1417  return 0;
1418 
1419  static const unsigned OpcTable[2][2][2] = {
1420  { { AArch64::SUBWrx, AArch64::SUBXrx },
1421  { AArch64::ADDWrx, AArch64::ADDXrx } },
1422  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1424  };
1425  bool Is64Bit = RetVT == MVT::i64;
1426  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427  const TargetRegisterClass *RC = nullptr;
1428  if (SetFlags)
1429  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430  else
1431  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432  unsigned ResultReg;
1433  if (WantResult)
1434  ResultReg = createResultReg(RC);
1435  else
1436  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438  const MCInstrDesc &II = TII.get(Opc);
1439  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442  .addReg(LHSReg, getKillRegState(LHSIsKill))
1443  .addReg(RHSReg, getKillRegState(RHSIsKill))
1444  .addImm(getArithExtendImm(ExtType, ShiftImm));
1445  return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449  Type *Ty = LHS->getType();
1450  EVT EVT = TLI.getValueType(DL, Ty, true);
1451  if (!EVT.isSimple())
1452  return false;
1453  MVT VT = EVT.getSimpleVT();
1454 
1455  switch (VT.SimpleTy) {
1456  default:
1457  return false;
1458  case MVT::i1:
1459  case MVT::i8:
1460  case MVT::i16:
1461  case MVT::i32:
1462  case MVT::i64:
1463  return emitICmp(VT, LHS, RHS, IsZExt);
1464  case MVT::f32:
1465  case MVT::f64:
1466  return emitFCmp(VT, LHS, RHS);
1467  }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471  bool IsZExt) {
1472  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1477  uint64_t Imm) {
1478  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1479  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480 }
1481 
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484  return false;
1485 
1486  // Check to see if the 2nd operand is a constant that we can encode directly
1487  // in the compare.
1488  bool UseImm = false;
1489  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490  if (CFP->isZero() && !CFP->isNegative())
1491  UseImm = true;
1492 
1493  unsigned LHSReg = getRegForValue(LHS);
1494  if (!LHSReg)
1495  return false;
1496  bool LHSIsKill = hasTrivialKill(LHS);
1497 
1498  if (UseImm) {
1499  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1501  .addReg(LHSReg, getKillRegState(LHSIsKill));
1502  return true;
1503  }
1504 
1505  unsigned RHSReg = getRegForValue(RHS);
1506  if (!RHSReg)
1507  return false;
1508  bool RHSIsKill = hasTrivialKill(RHS);
1509 
1510  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1512  .addReg(LHSReg, getKillRegState(LHSIsKill))
1513  .addReg(RHSReg, getKillRegState(RHSIsKill));
1514  return true;
1515 }
1516 
1517 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1518  bool SetFlags, bool WantResult, bool IsZExt) {
1519  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1520  IsZExt);
1521 }
1522 
1523 /// This method is a wrapper to simplify add emission.
1524 ///
1525 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1526 /// that fails, then try to materialize the immediate into a register and use
1527 /// emitAddSub_rr instead.
1528 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1529  int64_t Imm) {
1530  unsigned ResultReg;
1531  if (Imm < 0)
1532  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1533  else
1534  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1535 
1536  if (ResultReg)
1537  return ResultReg;
1538 
1539  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1540  if (!CReg)
1541  return 0;
1542 
1543  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1544  return ResultReg;
1545 }
1546 
1547 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1548  bool SetFlags, bool WantResult, bool IsZExt) {
1549  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1550  IsZExt);
1551 }
1552 
1553 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1554  bool LHSIsKill, unsigned RHSReg,
1555  bool RHSIsKill, bool WantResult) {
1556  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1557  RHSIsKill, /*SetFlags=*/true, WantResult);
1558 }
1559 
1560 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1561  bool LHSIsKill, unsigned RHSReg,
1562  bool RHSIsKill,
1563  AArch64_AM::ShiftExtendType ShiftType,
1564  uint64_t ShiftImm, bool WantResult) {
1565  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1566  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1567  WantResult);
1568 }
1569 
1570 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1571  const Value *LHS, const Value *RHS) {
1572  // Canonicalize immediates to the RHS first.
1573  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1574  std::swap(LHS, RHS);
1575 
1576  // Canonicalize mul by power-of-2 to the RHS.
1577  if (LHS->hasOneUse() && isValueAvailable(LHS))
1578  if (isMulPowOf2(LHS))
1579  std::swap(LHS, RHS);
1580 
1581  // Canonicalize shift immediate to the RHS.
1582  if (LHS->hasOneUse() && isValueAvailable(LHS))
1583  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1584  if (isa<ConstantInt>(SI->getOperand(1)))
1585  std::swap(LHS, RHS);
1586 
1587  unsigned LHSReg = getRegForValue(LHS);
1588  if (!LHSReg)
1589  return 0;
1590  bool LHSIsKill = hasTrivialKill(LHS);
1591 
1592  unsigned ResultReg = 0;
1593  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1594  uint64_t Imm = C->getZExtValue();
1595  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1596  }
1597  if (ResultReg)
1598  return ResultReg;
1599 
1600  // Check if the mul can be folded into the instruction.
1601  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1602  if (isMulPowOf2(RHS)) {
1603  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1604  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1605 
1606  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1607  if (C->getValue().isPowerOf2())
1608  std::swap(MulLHS, MulRHS);
1609 
1610  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1611  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1612 
1613  unsigned RHSReg = getRegForValue(MulLHS);
1614  if (!RHSReg)
1615  return 0;
1616  bool RHSIsKill = hasTrivialKill(MulLHS);
1617  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1618  RHSIsKill, ShiftVal);
1619  if (ResultReg)
1620  return ResultReg;
1621  }
1622  }
1623 
1624  // Check if the shift can be folded into the instruction.
1625  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1626  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1627  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1628  uint64_t ShiftVal = C->getZExtValue();
1629  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1630  if (!RHSReg)
1631  return 0;
1632  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1633  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1634  RHSIsKill, ShiftVal);
1635  if (ResultReg)
1636  return ResultReg;
1637  }
1638  }
1639 
1640  unsigned RHSReg = getRegForValue(RHS);
1641  if (!RHSReg)
1642  return 0;
1643  bool RHSIsKill = hasTrivialKill(RHS);
1644 
1645  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1647  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1648  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1649  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1650  }
1651  return ResultReg;
1652 }
1653 
1654 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655  unsigned LHSReg, bool LHSIsKill,
1656  uint64_t Imm) {
1657  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658  "ISD nodes are not consecutive!");
1659  static const unsigned OpcTable[3][2] = {
1660  { AArch64::ANDWri, AArch64::ANDXri },
1661  { AArch64::ORRWri, AArch64::ORRXri },
1662  { AArch64::EORWri, AArch64::EORXri }
1663  };
1664  const TargetRegisterClass *RC;
1665  unsigned Opc;
1666  unsigned RegSize;
1667  switch (RetVT.SimpleTy) {
1668  default:
1669  return 0;
1670  case MVT::i1:
1671  case MVT::i8:
1672  case MVT::i16:
1673  case MVT::i32: {
1674  unsigned Idx = ISDOpc - ISD::AND;
1675  Opc = OpcTable[Idx][0];
1676  RC = &AArch64::GPR32spRegClass;
1677  RegSize = 32;
1678  break;
1679  }
1680  case MVT::i64:
1681  Opc = OpcTable[ISDOpc - ISD::AND][1];
1682  RC = &AArch64::GPR64spRegClass;
1683  RegSize = 64;
1684  break;
1685  }
1686 
1687  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1688  return 0;
1689 
1690  unsigned ResultReg =
1691  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1692  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1693  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1696  }
1697  return ResultReg;
1698 }
1699 
1700 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701  unsigned LHSReg, bool LHSIsKill,
1702  unsigned RHSReg, bool RHSIsKill,
1703  uint64_t ShiftImm) {
1704  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705  "ISD nodes are not consecutive!");
1706  static const unsigned OpcTable[3][2] = {
1707  { AArch64::ANDWrs, AArch64::ANDXrs },
1708  { AArch64::ORRWrs, AArch64::ORRXrs },
1709  { AArch64::EORWrs, AArch64::EORXrs }
1710  };
1711 
1712  // Don't deal with undefined shifts.
1713  if (ShiftImm >= RetVT.getSizeInBits())
1714  return 0;
1715 
1716  const TargetRegisterClass *RC;
1717  unsigned Opc;
1718  switch (RetVT.SimpleTy) {
1719  default:
1720  return 0;
1721  case MVT::i1:
1722  case MVT::i8:
1723  case MVT::i16:
1724  case MVT::i32:
1725  Opc = OpcTable[ISDOpc - ISD::AND][0];
1726  RC = &AArch64::GPR32RegClass;
1727  break;
1728  case MVT::i64:
1729  Opc = OpcTable[ISDOpc - ISD::AND][1];
1730  RC = &AArch64::GPR64RegClass;
1731  break;
1732  }
1733  unsigned ResultReg =
1734  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1736  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1739  }
1740  return ResultReg;
1741 }
1742 
1743 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1744  uint64_t Imm) {
1745  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1746 }
1747 
1748 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749  bool WantZExt, MachineMemOperand *MMO) {
1750  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751  return 0;
1752 
1753  // Simplify this down to something we can handle.
1754  if (!simplifyAddress(Addr, VT))
1755  return 0;
1756 
1757  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758  if (!ScaleFactor)
1759  llvm_unreachable("Unexpected value type.");
1760 
1761  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763  bool UseScaled = true;
1764  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765  UseScaled = false;
1766  ScaleFactor = 1;
1767  }
1768 
1769  static const unsigned GPOpcTable[2][8][4] = {
1770  // Sign-extend.
1771  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1772  AArch64::LDURXi },
1773  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1774  AArch64::LDURXi },
1775  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1776  AArch64::LDRXui },
1777  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1778  AArch64::LDRXui },
1779  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780  AArch64::LDRXroX },
1781  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782  AArch64::LDRXroX },
1783  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784  AArch64::LDRXroW },
1785  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786  AArch64::LDRXroW }
1787  },
1788  // Zero-extend.
1789  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790  AArch64::LDURXi },
1791  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1792  AArch64::LDURXi },
1793  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794  AArch64::LDRXui },
1795  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1796  AArch64::LDRXui },
1797  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798  AArch64::LDRXroX },
1799  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1800  AArch64::LDRXroX },
1801  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802  AArch64::LDRXroW },
1803  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1804  AArch64::LDRXroW }
1805  }
1806  };
1807 
1808  static const unsigned FPOpcTable[4][2] = {
1809  { AArch64::LDURSi, AArch64::LDURDi },
1810  { AArch64::LDRSui, AArch64::LDRDui },
1811  { AArch64::LDRSroX, AArch64::LDRDroX },
1812  { AArch64::LDRSroW, AArch64::LDRDroW }
1813  };
1814 
1815  unsigned Opc;
1816  const TargetRegisterClass *RC;
1817  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818  Addr.getOffsetReg();
1819  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821  Addr.getExtendType() == AArch64_AM::SXTW)
1822  Idx++;
1823 
1824  bool IsRet64Bit = RetVT == MVT::i64;
1825  switch (VT.SimpleTy) {
1826  default:
1827  llvm_unreachable("Unexpected value type.");
1828  case MVT::i1: // Intentional fall-through.
1829  case MVT::i8:
1830  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831  RC = (IsRet64Bit && !WantZExt) ?
1832  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833  break;
1834  case MVT::i16:
1835  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836  RC = (IsRet64Bit && !WantZExt) ?
1837  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838  break;
1839  case MVT::i32:
1840  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841  RC = (IsRet64Bit && !WantZExt) ?
1842  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843  break;
1844  case MVT::i64:
1845  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846  RC = &AArch64::GPR64RegClass;
1847  break;
1848  case MVT::f32:
1849  Opc = FPOpcTable[Idx][0];
1850  RC = &AArch64::FPR32RegClass;
1851  break;
1852  case MVT::f64:
1853  Opc = FPOpcTable[Idx][1];
1854  RC = &AArch64::FPR64RegClass;
1855  break;
1856  }
1857 
1858  // Create the base instruction, then add the operands.
1859  unsigned ResultReg = createResultReg(RC);
1860  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861  TII.get(Opc), ResultReg);
1862  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863 
1864  // Loading an i1 requires special handling.
1865  if (VT == MVT::i1) {
1866  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1867  assert(ANDReg && "Unexpected AND instruction emission failure.");
1868  ResultReg = ANDReg;
1869  }
1870 
1871  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872  // the 32bit reg to a 64bit reg.
1873  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1876  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877  .addImm(0)
1878  .addReg(ResultReg, getKillRegState(true))
1879  .addImm(AArch64::sub_32);
1880  ResultReg = Reg64;
1881  }
1882  return ResultReg;
1883 }
1884 
1885 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886  MVT VT;
1887  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888  return false;
1889 
1890  if (VT.isVector())
1891  return selectOperator(I, I->getOpcode());
1892 
1893  unsigned ResultReg;
1894  switch (I->getOpcode()) {
1895  default:
1896  llvm_unreachable("Unexpected instruction.");
1897  case Instruction::Add:
1898  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899  break;
1900  case Instruction::Sub:
1901  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902  break;
1903  }
1904  if (!ResultReg)
1905  return false;
1906 
1907  updateValueMap(I, ResultReg);
1908  return true;
1909 }
1910 
1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912  MVT VT;
1913  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914  return false;
1915 
1916  if (VT.isVector())
1917  return selectOperator(I, I->getOpcode());
1918 
1919  unsigned ResultReg;
1920  switch (I->getOpcode()) {
1921  default:
1922  llvm_unreachable("Unexpected instruction.");
1923  case Instruction::And:
1924  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925  break;
1926  case Instruction::Or:
1927  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928  break;
1929  case Instruction::Xor:
1930  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931  break;
1932  }
1933  if (!ResultReg)
1934  return false;
1935 
1936  updateValueMap(I, ResultReg);
1937  return true;
1938 }
1939 
1940 bool AArch64FastISel::selectLoad(const Instruction *I) {
1941  MVT VT;
1942  // Verify we have a legal type before going any further. Currently, we handle
1943  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946  cast<LoadInst>(I)->isAtomic())
1947  return false;
1948 
1949  const Value *SV = I->getOperand(0);
1950  if (TLI.supportSwiftError()) {
1951  // Swifterror values can come from either a function parameter with
1952  // swifterror attribute or an alloca with swifterror attribute.
1953  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954  if (Arg->hasSwiftErrorAttr())
1955  return false;
1956  }
1957 
1958  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959  if (Alloca->isSwiftError())
1960  return false;
1961  }
1962  }
1963 
1964  // See if we can handle this address.
1965  Address Addr;
1966  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967  return false;
1968 
1969  // Fold the following sign-/zero-extend into the load instruction.
1970  bool WantZExt = true;
1971  MVT RetVT = VT;
1972  const Value *IntExtVal = nullptr;
1973  if (I->hasOneUse()) {
1974  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975  if (isTypeSupported(ZE->getType(), RetVT))
1976  IntExtVal = ZE;
1977  else
1978  RetVT = VT;
1979  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980  if (isTypeSupported(SE->getType(), RetVT))
1981  IntExtVal = SE;
1982  else
1983  RetVT = VT;
1984  WantZExt = false;
1985  }
1986  }
1987 
1988  unsigned ResultReg =
1989  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990  if (!ResultReg)
1991  return false;
1992 
1993  // There are a few different cases we have to handle, because the load or the
1994  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995  // SelectionDAG. There is also an ordering issue when both instructions are in
1996  // different basic blocks.
1997  // 1.) The load instruction is selected by FastISel, but the integer extend
1998  // not. This usually happens when the integer extend is in a different
1999  // basic block and SelectionDAG took over for that basic block.
2000  // 2.) The load instruction is selected before the integer extend. This only
2001  // happens when the integer extend is in a different basic block.
2002  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003  // by FastISel. This happens if there are instructions between the load
2004  // and the integer extend that couldn't be selected by FastISel.
2005  if (IntExtVal) {
2006  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008  // it when it selects the integer extend.
2009  unsigned Reg = lookUpRegForValue(IntExtVal);
2010  auto *MI = MRI.getUniqueVRegDef(Reg);
2011  if (!MI) {
2012  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013  if (WantZExt) {
2014  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016  ResultReg = std::prev(I)->getOperand(0).getReg();
2017  removeDeadCode(I, std::next(I));
2018  } else
2019  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020  /*IsKill=*/true,
2021  AArch64::sub_32);
2022  }
2023  updateValueMap(I, ResultReg);
2024  return true;
2025  }
2026 
2027  // The integer extend has already been emitted - delete all the instructions
2028  // that have been emitted by the integer extend lowering code and use the
2029  // result from the load instruction directly.
2030  while (MI) {
2031  Reg = 0;
2032  for (auto &Opnd : MI->uses()) {
2033  if (Opnd.isReg()) {
2034  Reg = Opnd.getReg();
2035  break;
2036  }
2037  }
2039  removeDeadCode(I, std::next(I));
2040  MI = nullptr;
2041  if (Reg)
2042  MI = MRI.getUniqueVRegDef(Reg);
2043  }
2044  updateValueMap(IntExtVal, ResultReg);
2045  return true;
2046  }
2047 
2048  updateValueMap(I, ResultReg);
2049  return true;
2050 }
2051 
2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053  unsigned AddrReg,
2054  MachineMemOperand *MMO) {
2055  unsigned Opc;
2056  switch (VT.SimpleTy) {
2057  default: return false;
2058  case MVT::i8: Opc = AArch64::STLRB; break;
2059  case MVT::i16: Opc = AArch64::STLRH; break;
2060  case MVT::i32: Opc = AArch64::STLRW; break;
2061  case MVT::i64: Opc = AArch64::STLRX; break;
2062  }
2063 
2064  const MCInstrDesc &II = TII.get(Opc);
2065  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068  .addReg(SrcReg)
2069  .addReg(AddrReg)
2070  .addMemOperand(MMO);
2071  return true;
2072 }
2073 
2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075  MachineMemOperand *MMO) {
2076  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077  return false;
2078 
2079  // Simplify this down to something we can handle.
2080  if (!simplifyAddress(Addr, VT))
2081  return false;
2082 
2083  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084  if (!ScaleFactor)
2085  llvm_unreachable("Unexpected value type.");
2086 
2087  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089  bool UseScaled = true;
2090  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2091  UseScaled = false;
2092  ScaleFactor = 1;
2093  }
2094 
2095  static const unsigned OpcTable[4][6] = {
2096  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2097  AArch64::STURSi, AArch64::STURDi },
2098  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2099  AArch64::STRSui, AArch64::STRDui },
2100  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101  AArch64::STRSroX, AArch64::STRDroX },
2102  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103  AArch64::STRSroW, AArch64::STRDroW }
2104  };
2105 
2106  unsigned Opc;
2107  bool VTIsi1 = false;
2108  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2109  Addr.getOffsetReg();
2110  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2111  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112  Addr.getExtendType() == AArch64_AM::SXTW)
2113  Idx++;
2114 
2115  switch (VT.SimpleTy) {
2116  default: llvm_unreachable("Unexpected value type.");
2117  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2118  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2119  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2123  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2124  }
2125 
2126  // Storing an i1 requires special handling.
2127  if (VTIsi1 && SrcReg != AArch64::WZR) {
2128  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129  assert(ANDReg && "Unexpected AND instruction emission failure.");
2130  SrcReg = ANDReg;
2131  }
2132  // Create the base instruction, then add the operands.
2133  const MCInstrDesc &II = TII.get(Opc);
2134  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135  MachineInstrBuilder MIB =
2136  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2138 
2139  return true;
2140 }
2141 
2142 bool AArch64FastISel::selectStore(const Instruction *I) {
2143  MVT VT;
2144  const Value *Op0 = I->getOperand(0);
2145  // Verify we have a legal type before going any further. Currently, we handle
2146  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149  return false;
2150 
2151  const Value *PtrV = I->getOperand(1);
2152  if (TLI.supportSwiftError()) {
2153  // Swifterror values can come from either a function parameter with
2154  // swifterror attribute or an alloca with swifterror attribute.
2155  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156  if (Arg->hasSwiftErrorAttr())
2157  return false;
2158  }
2159 
2160  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161  if (Alloca->isSwiftError())
2162  return false;
2163  }
2164  }
2165 
2166  // Get the value to be stored into a register. Use the zero register directly
2167  // when possible to avoid an unnecessary copy and a wasted register.
2168  unsigned SrcReg = 0;
2169  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170  if (CI->isZero())
2171  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173  if (CF->isZero() && !CF->isNegative()) {
2174  VT = MVT::getIntegerVT(VT.getSizeInBits());
2175  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176  }
2177  }
2178 
2179  if (!SrcReg)
2180  SrcReg = getRegForValue(Op0);
2181 
2182  if (!SrcReg)
2183  return false;
2184 
2185  auto *SI = cast<StoreInst>(I);
2186 
2187  // Try to emit a STLR for seq_cst/release.
2188  if (SI->isAtomic()) {
2189  AtomicOrdering Ord = SI->getOrdering();
2190  // The non-atomic instructions are sufficient for relaxed stores.
2191  if (isReleaseOrStronger(Ord)) {
2192  // The STLR addressing mode only supports a base reg; pass that directly.
2193  unsigned AddrReg = getRegForValue(PtrV);
2194  return emitStoreRelease(VT, SrcReg, AddrReg,
2195  createMachineMemOperandFor(I));
2196  }
2197  }
2198 
2199  // See if we can handle this address.
2200  Address Addr;
2201  if (!computeAddress(PtrV, Addr, Op0->getType()))
2202  return false;
2203 
2204  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205  return false;
2206  return true;
2207 }
2208 
2210  switch (Pred) {
2211  case CmpInst::FCMP_ONE:
2212  case CmpInst::FCMP_UEQ:
2213  default:
2214  // AL is our "false" for now. The other two need more compares.
2215  return AArch64CC::AL;
2216  case CmpInst::ICMP_EQ:
2217  case CmpInst::FCMP_OEQ:
2218  return AArch64CC::EQ;
2219  case CmpInst::ICMP_SGT:
2220  case CmpInst::FCMP_OGT:
2221  return AArch64CC::GT;
2222  case CmpInst::ICMP_SGE:
2223  case CmpInst::FCMP_OGE:
2224  return AArch64CC::GE;
2225  case CmpInst::ICMP_UGT:
2226  case CmpInst::FCMP_UGT:
2227  return AArch64CC::HI;
2228  case CmpInst::FCMP_OLT:
2229  return AArch64CC::MI;
2230  case CmpInst::ICMP_ULE:
2231  case CmpInst::FCMP_OLE:
2232  return AArch64CC::LS;
2233  case CmpInst::FCMP_ORD:
2234  return AArch64CC::VC;
2235  case CmpInst::FCMP_UNO:
2236  return AArch64CC::VS;
2237  case CmpInst::FCMP_UGE:
2238  return AArch64CC::PL;
2239  case CmpInst::ICMP_SLT:
2240  case CmpInst::FCMP_ULT:
2241  return AArch64CC::LT;
2242  case CmpInst::ICMP_SLE:
2243  case CmpInst::FCMP_ULE:
2244  return AArch64CC::LE;
2245  case CmpInst::FCMP_UNE:
2246  case CmpInst::ICMP_NE:
2247  return AArch64CC::NE;
2248  case CmpInst::ICMP_UGE:
2249  return AArch64CC::HS;
2250  case CmpInst::ICMP_ULT:
2251  return AArch64CC::LO;
2252  }
2253 }
2254 
2255 /// Try to emit a combined compare-and-branch instruction.
2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258  // will not be produced, as they are conditional branch instructions that do
2259  // not set flags.
2260  if (FuncInfo.MF->getFunction().hasFnAttribute(
2261  Attribute::SpeculativeLoadHardening))
2262  return false;
2263 
2264  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267 
2268  const Value *LHS = CI->getOperand(0);
2269  const Value *RHS = CI->getOperand(1);
2270 
2271  MVT VT;
2272  if (!isTypeSupported(LHS->getType(), VT))
2273  return false;
2274 
2275  unsigned BW = VT.getSizeInBits();
2276  if (BW > 64)
2277  return false;
2278 
2279  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2280  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2281 
2282  // Try to take advantage of fallthrough opportunities.
2283  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284  std::swap(TBB, FBB);
2285  Predicate = CmpInst::getInversePredicate(Predicate);
2286  }
2287 
2288  int TestBit = -1;
2289  bool IsCmpNE;
2290  switch (Predicate) {
2291  default:
2292  return false;
2293  case CmpInst::ICMP_EQ:
2294  case CmpInst::ICMP_NE:
2295  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2296  std::swap(LHS, RHS);
2297 
2298  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2299  return false;
2300 
2301  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303  const Value *AndLHS = AI->getOperand(0);
2304  const Value *AndRHS = AI->getOperand(1);
2305 
2306  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307  if (C->getValue().isPowerOf2())
2308  std::swap(AndLHS, AndRHS);
2309 
2310  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311  if (C->getValue().isPowerOf2()) {
2312  TestBit = C->getValue().logBase2();
2313  LHS = AndLHS;
2314  }
2315  }
2316 
2317  if (VT == MVT::i1)
2318  TestBit = 0;
2319 
2320  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321  break;
2322  case CmpInst::ICMP_SLT:
2323  case CmpInst::ICMP_SGE:
2324  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2325  return false;
2326 
2327  TestBit = BW - 1;
2328  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329  break;
2330  case CmpInst::ICMP_SGT:
2331  case CmpInst::ICMP_SLE:
2332  if (!isa<ConstantInt>(RHS))
2333  return false;
2334 
2335  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336  return false;
2337 
2338  TestBit = BW - 1;
2339  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340  break;
2341  } // end switch
2342 
2343  static const unsigned OpcTable[2][2][2] = {
2344  { {AArch64::CBZW, AArch64::CBZX },
2345  {AArch64::CBNZW, AArch64::CBNZX} },
2346  { {AArch64::TBZW, AArch64::TBZX },
2347  {AArch64::TBNZW, AArch64::TBNZX} }
2348  };
2349 
2350  bool IsBitTest = TestBit != -1;
2351  bool Is64Bit = BW == 64;
2352  if (TestBit < 32 && TestBit >= 0)
2353  Is64Bit = false;
2354 
2355  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356  const MCInstrDesc &II = TII.get(Opc);
2357 
2358  unsigned SrcReg = getRegForValue(LHS);
2359  if (!SrcReg)
2360  return false;
2361  bool SrcIsKill = hasTrivialKill(LHS);
2362 
2363  if (BW == 64 && !Is64Bit)
2364  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2365  AArch64::sub_32);
2366 
2367  if ((BW < 32) && !IsBitTest)
2368  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369 
2370  // Emit the combined compare and branch instruction.
2371  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2372  MachineInstrBuilder MIB =
2373  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2374  .addReg(SrcReg, getKillRegState(SrcIsKill));
2375  if (IsBitTest)
2376  MIB.addImm(TestBit);
2377  MIB.addMBB(TBB);
2378 
2379  finishCondBranch(BI->getParent(), TBB, FBB);
2380  return true;
2381 }
2382 
2383 bool AArch64FastISel::selectBranch(const Instruction *I) {
2384  const BranchInst *BI = cast<BranchInst>(I);
2385  if (BI->isUnconditional()) {
2386  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387  fastEmitBranch(MSucc, BI->getDebugLoc());
2388  return true;
2389  }
2390 
2391  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393 
2394  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395  if (CI->hasOneUse() && isValueAvailable(CI)) {
2396  // Try to optimize or fold the cmp.
2397  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398  switch (Predicate) {
2399  default:
2400  break;
2401  case CmpInst::FCMP_FALSE:
2402  fastEmitBranch(FBB, DbgLoc);
2403  return true;
2404  case CmpInst::FCMP_TRUE:
2405  fastEmitBranch(TBB, DbgLoc);
2406  return true;
2407  }
2408 
2409  // Try to emit a combined compare-and-branch first.
2410  if (emitCompareAndBranch(BI))
2411  return true;
2412 
2413  // Try to take advantage of fallthrough opportunities.
2414  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415  std::swap(TBB, FBB);
2416  Predicate = CmpInst::getInversePredicate(Predicate);
2417  }
2418 
2419  // Emit the cmp.
2420  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421  return false;
2422 
2423  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424  // instruction.
2425  AArch64CC::CondCode CC = getCompareCC(Predicate);
2427  switch (Predicate) {
2428  default:
2429  break;
2430  case CmpInst::FCMP_UEQ:
2431  ExtraCC = AArch64CC::EQ;
2432  CC = AArch64CC::VS;
2433  break;
2434  case CmpInst::FCMP_ONE:
2435  ExtraCC = AArch64CC::MI;
2436  CC = AArch64CC::GT;
2437  break;
2438  }
2439  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440 
2441  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442  if (ExtraCC != AArch64CC::AL) {
2443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444  .addImm(ExtraCC)
2445  .addMBB(TBB);
2446  }
2447 
2448  // Emit the branch.
2449  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2450  .addImm(CC)
2451  .addMBB(TBB);
2452 
2453  finishCondBranch(BI->getParent(), TBB, FBB);
2454  return true;
2455  }
2456  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457  uint64_t Imm = CI->getZExtValue();
2458  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2460  .addMBB(Target);
2461 
2462  // Obtain the branch probability and add the target to the successor list.
2463  if (FuncInfo.BPI) {
2464  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465  BI->getParent(), Target->getBasicBlock());
2466  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467  } else
2468  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469  return true;
2470  } else {
2472  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473  // Fake request the condition, otherwise the intrinsic might be completely
2474  // optimized away.
2475  unsigned CondReg = getRegForValue(BI->getCondition());
2476  if (!CondReg)
2477  return false;
2478 
2479  // Emit the branch.
2480  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481  .addImm(CC)
2482  .addMBB(TBB);
2483 
2484  finishCondBranch(BI->getParent(), TBB, FBB);
2485  return true;
2486  }
2487  }
2488 
2489  unsigned CondReg = getRegForValue(BI->getCondition());
2490  if (CondReg == 0)
2491  return false;
2492  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2493 
2494  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495  unsigned Opcode = AArch64::TBNZW;
2496  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497  std::swap(TBB, FBB);
2498  Opcode = AArch64::TBZW;
2499  }
2500 
2501  const MCInstrDesc &II = TII.get(Opcode);
2502  unsigned ConstrainedCondReg
2503  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2505  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2506  .addImm(0)
2507  .addMBB(TBB);
2508 
2509  finishCondBranch(BI->getParent(), TBB, FBB);
2510  return true;
2511 }
2512 
2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2516  if (AddrReg == 0)
2517  return false;
2518 
2519  // Emit the indirect branch.
2520  const MCInstrDesc &II = TII.get(AArch64::BR);
2521  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2522  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2523 
2524  // Make sure the CFG is up-to-date.
2525  for (auto *Succ : BI->successors())
2526  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2527 
2528  return true;
2529 }
2530 
2531 bool AArch64FastISel::selectCmp(const Instruction *I) {
2532  const CmpInst *CI = cast<CmpInst>(I);
2533 
2534  // Vectors of i1 are weird: bail out.
2535  if (CI->getType()->isVectorTy())
2536  return false;
2537 
2538  // Try to optimize or fold the cmp.
2539  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540  unsigned ResultReg = 0;
2541  switch (Predicate) {
2542  default:
2543  break;
2544  case CmpInst::FCMP_FALSE:
2545  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2547  TII.get(TargetOpcode::COPY), ResultReg)
2548  .addReg(AArch64::WZR, getKillRegState(true));
2549  break;
2550  case CmpInst::FCMP_TRUE:
2551  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552  break;
2553  }
2554 
2555  if (ResultReg) {
2556  updateValueMap(I, ResultReg);
2557  return true;
2558  }
2559 
2560  // Emit the cmp.
2561  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562  return false;
2563 
2564  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2565 
2566  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567  // condition codes are inverted, because they are used by CSINC.
2568  static unsigned CondCodeTable[2][2] = {
2571  };
2572  unsigned *CondCodes = nullptr;
2573  switch (Predicate) {
2574  default:
2575  break;
2576  case CmpInst::FCMP_UEQ:
2577  CondCodes = &CondCodeTable[0][0];
2578  break;
2579  case CmpInst::FCMP_ONE:
2580  CondCodes = &CondCodeTable[1][0];
2581  break;
2582  }
2583 
2584  if (CondCodes) {
2585  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  TmpReg1)
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(CondCodes[0]);
2591  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2592  ResultReg)
2593  .addReg(TmpReg1, getKillRegState(true))
2594  .addReg(AArch64::WZR, getKillRegState(true))
2595  .addImm(CondCodes[1]);
2596 
2597  updateValueMap(I, ResultReg);
2598  return true;
2599  }
2600 
2601  // Now set a register based on the comparison.
2602  AArch64CC::CondCode CC = getCompareCC(Predicate);
2603  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2606  ResultReg)
2607  .addReg(AArch64::WZR, getKillRegState(true))
2608  .addReg(AArch64::WZR, getKillRegState(true))
2609  .addImm(invertedCC);
2610 
2611  updateValueMap(I, ResultReg);
2612  return true;
2613 }
2614 
2615 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616 /// value.
2617 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618  if (!SI->getType()->isIntegerTy(1))
2619  return false;
2620 
2621  const Value *Src1Val, *Src2Val;
2622  unsigned Opc = 0;
2623  bool NeedExtraOp = false;
2624  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625  if (CI->isOne()) {
2626  Src1Val = SI->getCondition();
2627  Src2Val = SI->getFalseValue();
2628  Opc = AArch64::ORRWrr;
2629  } else {
2630  assert(CI->isZero());
2631  Src1Val = SI->getFalseValue();
2632  Src2Val = SI->getCondition();
2633  Opc = AArch64::BICWrr;
2634  }
2635  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636  if (CI->isOne()) {
2637  Src1Val = SI->getCondition();
2638  Src2Val = SI->getTrueValue();
2639  Opc = AArch64::ORRWrr;
2640  NeedExtraOp = true;
2641  } else {
2642  assert(CI->isZero());
2643  Src1Val = SI->getCondition();
2644  Src2Val = SI->getTrueValue();
2645  Opc = AArch64::ANDWrr;
2646  }
2647  }
2648 
2649  if (!Opc)
2650  return false;
2651 
2652  unsigned Src1Reg = getRegForValue(Src1Val);
2653  if (!Src1Reg)
2654  return false;
2655  bool Src1IsKill = hasTrivialKill(Src1Val);
2656 
2657  unsigned Src2Reg = getRegForValue(Src2Val);
2658  if (!Src2Reg)
2659  return false;
2660  bool Src2IsKill = hasTrivialKill(Src2Val);
2661 
2662  if (NeedExtraOp) {
2663  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2664  Src1IsKill = true;
2665  }
2666  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2667  Src1IsKill, Src2Reg, Src2IsKill);
2668  updateValueMap(SI, ResultReg);
2669  return true;
2670 }
2671 
2672 bool AArch64FastISel::selectSelect(const Instruction *I) {
2673  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2674  MVT VT;
2675  if (!isTypeSupported(I->getType(), VT))
2676  return false;
2677 
2678  unsigned Opc;
2679  const TargetRegisterClass *RC;
2680  switch (VT.SimpleTy) {
2681  default:
2682  return false;
2683  case MVT::i1:
2684  case MVT::i8:
2685  case MVT::i16:
2686  case MVT::i32:
2687  Opc = AArch64::CSELWr;
2688  RC = &AArch64::GPR32RegClass;
2689  break;
2690  case MVT::i64:
2691  Opc = AArch64::CSELXr;
2692  RC = &AArch64::GPR64RegClass;
2693  break;
2694  case MVT::f32:
2695  Opc = AArch64::FCSELSrrr;
2696  RC = &AArch64::FPR32RegClass;
2697  break;
2698  case MVT::f64:
2699  Opc = AArch64::FCSELDrrr;
2700  RC = &AArch64::FPR64RegClass;
2701  break;
2702  }
2703 
2704  const SelectInst *SI = cast<SelectInst>(I);
2705  const Value *Cond = SI->getCondition();
2708 
2709  if (optimizeSelect(SI))
2710  return true;
2711 
2712  // Try to pickup the flags, so we don't have to emit another compare.
2713  if (foldXALUIntrinsic(CC, I, Cond)) {
2714  // Fake request the condition to force emission of the XALU intrinsic.
2715  unsigned CondReg = getRegForValue(Cond);
2716  if (!CondReg)
2717  return false;
2718  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2719  isValueAvailable(Cond)) {
2720  const auto *Cmp = cast<CmpInst>(Cond);
2721  // Try to optimize or fold the cmp.
2722  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2723  const Value *FoldSelect = nullptr;
2724  switch (Predicate) {
2725  default:
2726  break;
2727  case CmpInst::FCMP_FALSE:
2728  FoldSelect = SI->getFalseValue();
2729  break;
2730  case CmpInst::FCMP_TRUE:
2731  FoldSelect = SI->getTrueValue();
2732  break;
2733  }
2734 
2735  if (FoldSelect) {
2736  unsigned SrcReg = getRegForValue(FoldSelect);
2737  if (!SrcReg)
2738  return false;
2739  unsigned UseReg = lookUpRegForValue(SI);
2740  if (UseReg)
2741  MRI.clearKillFlags(UseReg);
2742 
2743  updateValueMap(I, SrcReg);
2744  return true;
2745  }
2746 
2747  // Emit the cmp.
2748  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749  return false;
2750 
2751  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752  CC = getCompareCC(Predicate);
2753  switch (Predicate) {
2754  default:
2755  break;
2756  case CmpInst::FCMP_UEQ:
2757  ExtraCC = AArch64CC::EQ;
2758  CC = AArch64CC::VS;
2759  break;
2760  case CmpInst::FCMP_ONE:
2761  ExtraCC = AArch64CC::MI;
2762  CC = AArch64CC::GT;
2763  break;
2764  }
2765  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766  } else {
2767  unsigned CondReg = getRegForValue(Cond);
2768  if (!CondReg)
2769  return false;
2770  bool CondIsKill = hasTrivialKill(Cond);
2771 
2772  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2773  CondReg = constrainOperandRegClass(II, CondReg, 1);
2774 
2775  // Emit a TST instruction (ANDS wzr, reg, #imm).
2776  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2777  AArch64::WZR)
2778  .addReg(CondReg, getKillRegState(CondIsKill))
2780  }
2781 
2782  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2783  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2784 
2785  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2786  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2787 
2788  if (!Src1Reg || !Src2Reg)
2789  return false;
2790 
2791  if (ExtraCC != AArch64CC::AL) {
2792  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2793  Src2IsKill, ExtraCC);
2794  Src2IsKill = true;
2795  }
2796  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797  Src2IsKill, CC);
2798  updateValueMap(I, ResultReg);
2799  return true;
2800 }
2801 
2802 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2803  Value *V = I->getOperand(0);
2804  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2805  return false;
2806 
2807  unsigned Op = getRegForValue(V);
2808  if (Op == 0)
2809  return false;
2810 
2811  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2812  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2813  ResultReg).addReg(Op);
2814  updateValueMap(I, ResultReg);
2815  return true;
2816 }
2817 
2818 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2819  Value *V = I->getOperand(0);
2820  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2821  return false;
2822 
2823  unsigned Op = getRegForValue(V);
2824  if (Op == 0)
2825  return false;
2826 
2827  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2828  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2829  ResultReg).addReg(Op);
2830  updateValueMap(I, ResultReg);
2831  return true;
2832 }
2833 
2834 // FPToUI and FPToSI
2835 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2836  MVT DestVT;
2837  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2838  return false;
2839 
2840  unsigned SrcReg = getRegForValue(I->getOperand(0));
2841  if (SrcReg == 0)
2842  return false;
2843 
2844  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2845  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2846  return false;
2847 
2848  unsigned Opc;
2849  if (SrcVT == MVT::f64) {
2850  if (Signed)
2851  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2852  else
2853  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2854  } else {
2855  if (Signed)
2856  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2857  else
2858  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2859  }
2860  unsigned ResultReg = createResultReg(
2861  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2862  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2863  .addReg(SrcReg);
2864  updateValueMap(I, ResultReg);
2865  return true;
2866 }
2867 
2868 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2869  MVT DestVT;
2870  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2871  return false;
2872  // Let regular ISEL handle FP16
2873  if (DestVT == MVT::f16)
2874  return false;
2875 
2876  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2877  "Unexpected value type.");
2878 
2879  unsigned SrcReg = getRegForValue(I->getOperand(0));
2880  if (!SrcReg)
2881  return false;
2882  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2883 
2884  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2885 
2886  // Handle sign-extension.
2887  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2888  SrcReg =
2889  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2890  if (!SrcReg)
2891  return false;
2892  SrcIsKill = true;
2893  }
2894 
2895  unsigned Opc;
2896  if (SrcVT == MVT::i64) {
2897  if (Signed)
2898  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2899  else
2900  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2901  } else {
2902  if (Signed)
2903  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2904  else
2905  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2906  }
2907 
2908  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2909  SrcIsKill);
2910  updateValueMap(I, ResultReg);
2911  return true;
2912 }
2913 
2914 bool AArch64FastISel::fastLowerArguments() {
2915  if (!FuncInfo.CanLowerReturn)
2916  return false;
2917 
2918  const Function *F = FuncInfo.Fn;
2919  if (F->isVarArg())
2920  return false;
2921 
2922  CallingConv::ID CC = F->getCallingConv();
2923  if (CC != CallingConv::C && CC != CallingConv::Swift)
2924  return false;
2925 
2926  if (Subtarget->hasCustomCallingConv())
2927  return false;
2928 
2929  // Only handle simple cases of up to 8 GPR and FPR each.
2930  unsigned GPRCnt = 0;
2931  unsigned FPRCnt = 0;
2932  for (auto const &Arg : F->args()) {
2933  if (Arg.hasAttribute(Attribute::ByVal) ||
2934  Arg.hasAttribute(Attribute::InReg) ||
2935  Arg.hasAttribute(Attribute::StructRet) ||
2936  Arg.hasAttribute(Attribute::SwiftSelf) ||
2937  Arg.hasAttribute(Attribute::SwiftError) ||
2938  Arg.hasAttribute(Attribute::Nest))
2939  return false;
2940 
2941  Type *ArgTy = Arg.getType();
2942  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2943  return false;
2944 
2945  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2946  if (!ArgVT.isSimple())
2947  return false;
2948 
2949  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2950  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2951  return false;
2952 
2953  if (VT.isVector() &&
2954  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2955  return false;
2956 
2957  if (VT >= MVT::i1 && VT <= MVT::i64)
2958  ++GPRCnt;
2959  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2960  VT.is128BitVector())
2961  ++FPRCnt;
2962  else
2963  return false;
2964 
2965  if (GPRCnt > 8 || FPRCnt > 8)
2966  return false;
2967  }
2968 
2969  static const MCPhysReg Registers[6][8] = {
2970  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2971  AArch64::W5, AArch64::W6, AArch64::W7 },
2972  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2973  AArch64::X5, AArch64::X6, AArch64::X7 },
2974  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2975  AArch64::H5, AArch64::H6, AArch64::H7 },
2976  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2977  AArch64::S5, AArch64::S6, AArch64::S7 },
2978  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2979  AArch64::D5, AArch64::D6, AArch64::D7 },
2980  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2981  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2982  };
2983 
2984  unsigned GPRIdx = 0;
2985  unsigned FPRIdx = 0;
2986  for (auto const &Arg : F->args()) {
2987  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2988  unsigned SrcReg;
2989  const TargetRegisterClass *RC;
2990  if (VT >= MVT::i1 && VT <= MVT::i32) {
2991  SrcReg = Registers[0][GPRIdx++];
2992  RC = &AArch64::GPR32RegClass;
2993  VT = MVT::i32;
2994  } else if (VT == MVT::i64) {
2995  SrcReg = Registers[1][GPRIdx++];
2996  RC = &AArch64::GPR64RegClass;
2997  } else if (VT == MVT::f16) {
2998  SrcReg = Registers[2][FPRIdx++];
2999  RC = &AArch64::FPR16RegClass;
3000  } else if (VT == MVT::f32) {
3001  SrcReg = Registers[3][FPRIdx++];
3002  RC = &AArch64::FPR32RegClass;
3003  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3004  SrcReg = Registers[4][FPRIdx++];
3005  RC = &AArch64::FPR64RegClass;
3006  } else if (VT.is128BitVector()) {
3007  SrcReg = Registers[5][FPRIdx++];
3008  RC = &AArch64::FPR128RegClass;
3009  } else
3010  llvm_unreachable("Unexpected value type.");
3011 
3012  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3013  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3014  // Without this, EmitLiveInCopies may eliminate the livein if its only
3015  // use is a bitcast (which isn't turned into an instruction).
3016  unsigned ResultReg = createResultReg(RC);
3017  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3018  TII.get(TargetOpcode::COPY), ResultReg)
3019  .addReg(DstReg, getKillRegState(true));
3020  updateValueMap(&Arg, ResultReg);
3021  }
3022  return true;
3023 }
3024 
3025 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3026  SmallVectorImpl<MVT> &OutVTs,
3027  unsigned &NumBytes) {
3028  CallingConv::ID CC = CLI.CallConv;
3030  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3031  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3032 
3033  // Get a count of how many bytes are to be pushed on the stack.
3034  NumBytes = CCInfo.getNextStackOffset();
3035 
3036  // Issue CALLSEQ_START
3037  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3039  .addImm(NumBytes).addImm(0);
3040 
3041  // Process the args.
3042  for (CCValAssign &VA : ArgLocs) {
3043  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3044  MVT ArgVT = OutVTs[VA.getValNo()];
3045 
3046  unsigned ArgReg = getRegForValue(ArgVal);
3047  if (!ArgReg)
3048  return false;
3049 
3050  // Handle arg promotion: SExt, ZExt, AExt.
3051  switch (VA.getLocInfo()) {
3052  case CCValAssign::Full:
3053  break;
3054  case CCValAssign::SExt: {
3055  MVT DestVT = VA.getLocVT();
3056  MVT SrcVT = ArgVT;
3057  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3058  if (!ArgReg)
3059  return false;
3060  break;
3061  }
3062  case CCValAssign::AExt:
3063  // Intentional fall-through.
3064  case CCValAssign::ZExt: {
3065  MVT DestVT = VA.getLocVT();
3066  MVT SrcVT = ArgVT;
3067  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3068  if (!ArgReg)
3069  return false;
3070  break;
3071  }
3072  default:
3073  llvm_unreachable("Unknown arg promotion!");
3074  }
3075 
3076  // Now copy/store arg to correct locations.
3077  if (VA.isRegLoc() && !VA.needsCustom()) {
3078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3080  CLI.OutRegs.push_back(VA.getLocReg());
3081  } else if (VA.needsCustom()) {
3082  // FIXME: Handle custom args.
3083  return false;
3084  } else {
3085  assert(VA.isMemLoc() && "Assuming store on stack.");
3086 
3087  // Don't emit stores for undef values.
3088  if (isa<UndefValue>(ArgVal))
3089  continue;
3090 
3091  // Need to store on the stack.
3092  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3093 
3094  unsigned BEAlign = 0;
3095  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3096  BEAlign = 8 - ArgSize;
3097 
3098  Address Addr;
3099  Addr.setKind(Address::RegBase);
3100  Addr.setReg(AArch64::SP);
3101  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3102 
3103  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3104  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3105  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3106  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3107 
3108  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3109  return false;
3110  }
3111  }
3112  return true;
3113 }
3114 
3115 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3116  unsigned NumBytes) {
3117  CallingConv::ID CC = CLI.CallConv;
3118 
3119  // Issue CALLSEQ_END
3120  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3121  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3122  .addImm(NumBytes).addImm(0);
3123 
3124  // Now the return value.
3125  if (RetVT != MVT::isVoid) {
3127  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3128  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3129 
3130  // Only handle a single return value.
3131  if (RVLocs.size() != 1)
3132  return false;
3133 
3134  // Copy all of the result registers out of their specified physreg.
3135  MVT CopyVT = RVLocs[0].getValVT();
3136 
3137  // TODO: Handle big-endian results
3138  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3139  return false;
3140 
3141  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3142  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3143  TII.get(TargetOpcode::COPY), ResultReg)
3144  .addReg(RVLocs[0].getLocReg());
3145  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3146 
3147  CLI.ResultReg = ResultReg;
3148  CLI.NumResultRegs = 1;
3149  }
3150 
3151  return true;
3152 }
3153 
3154 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155  CallingConv::ID CC = CLI.CallConv;
3156  bool IsTailCall = CLI.IsTailCall;
3157  bool IsVarArg = CLI.IsVarArg;
3158  const Value *Callee = CLI.Callee;
3159  MCSymbol *Symbol = CLI.Symbol;
3160 
3161  if (!Callee && !Symbol)
3162  return false;
3163 
3164  // Allow SelectionDAG isel to handle tail calls.
3165  if (IsTailCall)
3166  return false;
3167 
3168  CodeModel::Model CM = TM.getCodeModel();
3169  // Only support the small-addressing and large code models.
3170  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171  return false;
3172 
3173  // FIXME: Add large code model support for ELF.
3174  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175  return false;
3176 
3177  // Let SDISel handle vararg functions.
3178  if (IsVarArg)
3179  return false;
3180 
3181  // FIXME: Only handle *simple* calls for now.
3182  MVT RetVT;
3183  if (CLI.RetTy->isVoidTy())
3184  RetVT = MVT::isVoid;
3185  else if (!isTypeLegal(CLI.RetTy, RetVT))
3186  return false;
3187 
3188  for (auto Flag : CLI.OutFlags)
3189  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190  Flag.isSwiftSelf() || Flag.isSwiftError())
3191  return false;
3192 
3193  // Set up the argument vectors.
3194  SmallVector<MVT, 16> OutVTs;
3195  OutVTs.reserve(CLI.OutVals.size());
3196 
3197  for (auto *Val : CLI.OutVals) {
3198  MVT VT;
3199  if (!isTypeLegal(Val->getType(), VT) &&
3200  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201  return false;
3202 
3203  // We don't handle vector parameters yet.
3204  if (VT.isVector() || VT.getSizeInBits() > 64)
3205  return false;
3206 
3207  OutVTs.push_back(VT);
3208  }
3209 
3210  Address Addr;
3211  if (Callee && !computeCallAddress(Callee, Addr))
3212  return false;
3213 
3214  // Handle the arguments now that we've gotten them.
3215  unsigned NumBytes;
3216  if (!processCallArgs(CLI, OutVTs, NumBytes))
3217  return false;
3218 
3219  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3220  if (RegInfo->isAnyArgRegReserved(*MF))
3221  RegInfo->emitReservedArgRegCallError(*MF);
3222 
3223  // Issue the call.
3224  MachineInstrBuilder MIB;
3225  if (Subtarget->useSmallAddressing()) {
3226  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3227  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3228  if (Symbol)
3229  MIB.addSym(Symbol, 0);
3230  else if (Addr.getGlobalValue())
3231  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3232  else if (Addr.getReg()) {
3233  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3234  MIB.addReg(Reg);
3235  } else
3236  return false;
3237  } else {
3238  unsigned CallReg = 0;
3239  if (Symbol) {
3240  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3241  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3242  ADRPReg)
3243  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3244 
3245  CallReg = createResultReg(&AArch64::GPR64RegClass);
3246  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3247  TII.get(AArch64::LDRXui), CallReg)
3248  .addReg(ADRPReg)
3249  .addSym(Symbol,
3251  } else if (Addr.getGlobalValue())
3252  CallReg = materializeGV(Addr.getGlobalValue());
3253  else if (Addr.getReg())
3254  CallReg = Addr.getReg();
3255 
3256  if (!CallReg)
3257  return false;
3258 
3259  const MCInstrDesc &II = TII.get(AArch64::BLR);
3260  CallReg = constrainOperandRegClass(II, CallReg, 0);
3261  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3262  }
3263 
3264  // Add implicit physical register uses to the call.
3265  for (auto Reg : CLI.OutRegs)
3266  MIB.addReg(Reg, RegState::Implicit);
3267 
3268  // Add a register mask with the call-preserved registers.
3269  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3270  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3271 
3272  CLI.Call = MIB;
3273 
3274  // Finish off the call including any return values.
3275  return finishCall(CLI, RetVT, NumBytes);
3276 }
3277 
3278 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3279  if (Alignment)
3280  return Len / Alignment <= 4;
3281  else
3282  return Len < 32;
3283 }
3284 
3285 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3286  uint64_t Len, unsigned Alignment) {
3287  // Make sure we don't bloat code by inlining very large memcpy's.
3288  if (!isMemCpySmall(Len, Alignment))
3289  return false;
3290 
3291  int64_t UnscaledOffset = 0;
3292  Address OrigDest = Dest;
3293  Address OrigSrc = Src;
3294 
3295  while (Len) {
3296  MVT VT;
3297  if (!Alignment || Alignment >= 8) {
3298  if (Len >= 8)
3299  VT = MVT::i64;
3300  else if (Len >= 4)
3301  VT = MVT::i32;
3302  else if (Len >= 2)
3303  VT = MVT::i16;
3304  else {
3305  VT = MVT::i8;
3306  }
3307  } else {
3308  // Bound based on alignment.
3309  if (Len >= 4 && Alignment == 4)
3310  VT = MVT::i32;
3311  else if (Len >= 2 && Alignment == 2)
3312  VT = MVT::i16;
3313  else {
3314  VT = MVT::i8;
3315  }
3316  }
3317 
3318  unsigned ResultReg = emitLoad(VT, VT, Src);
3319  if (!ResultReg)
3320  return false;
3321 
3322  if (!emitStore(VT, ResultReg, Dest))
3323  return false;
3324 
3325  int64_t Size = VT.getSizeInBits() / 8;
3326  Len -= Size;
3327  UnscaledOffset += Size;
3328 
3329  // We need to recompute the unscaled offset for each iteration.
3330  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3331  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3332  }
3333 
3334  return true;
3335 }
3336 
3337 /// Check if it is possible to fold the condition from the XALU intrinsic
3338 /// into the user. The condition code will only be updated on success.
3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3340  const Instruction *I,
3341  const Value *Cond) {
3342  if (!isa<ExtractValueInst>(Cond))
3343  return false;
3344 
3345  const auto *EV = cast<ExtractValueInst>(Cond);
3346  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3347  return false;
3348 
3349  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3350  MVT RetVT;
3351  const Function *Callee = II->getCalledFunction();
3352  Type *RetTy =
3353  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3354  if (!isTypeLegal(RetTy, RetVT))
3355  return false;
3356 
3357  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3358  return false;
3359 
3360  const Value *LHS = II->getArgOperand(0);
3361  const Value *RHS = II->getArgOperand(1);
3362 
3363  // Canonicalize immediate to the RHS.
3364  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3365  isCommutativeIntrinsic(II))
3366  std::swap(LHS, RHS);
3367 
3368  // Simplify multiplies.
3369  Intrinsic::ID IID = II->getIntrinsicID();
3370  switch (IID) {
3371  default:
3372  break;
3373  case Intrinsic::smul_with_overflow:
3374  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375  if (C->getValue() == 2)
3376  IID = Intrinsic::sadd_with_overflow;
3377  break;
3378  case Intrinsic::umul_with_overflow:
3379  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3380  if (C->getValue() == 2)
3381  IID = Intrinsic::uadd_with_overflow;
3382  break;
3383  }
3384 
3385  AArch64CC::CondCode TmpCC;
3386  switch (IID) {
3387  default:
3388  return false;
3389  case Intrinsic::sadd_with_overflow:
3390  case Intrinsic::ssub_with_overflow:
3391  TmpCC = AArch64CC::VS;
3392  break;
3393  case Intrinsic::uadd_with_overflow:
3394  TmpCC = AArch64CC::HS;
3395  break;
3396  case Intrinsic::usub_with_overflow:
3397  TmpCC = AArch64CC::LO;
3398  break;
3399  case Intrinsic::smul_with_overflow:
3400  case Intrinsic::umul_with_overflow:
3401  TmpCC = AArch64CC::NE;
3402  break;
3403  }
3404 
3405  // Check if both instructions are in the same basic block.
3406  if (!isValueAvailable(II))
3407  return false;
3408 
3409  // Make sure nothing is in the way
3410  BasicBlock::const_iterator Start(I);
3412  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3413  // We only expect extractvalue instructions between the intrinsic and the
3414  // instruction to be selected.
3415  if (!isa<ExtractValueInst>(Itr))
3416  return false;
3417 
3418  // Check that the extractvalue operand comes from the intrinsic.
3419  const auto *EVI = cast<ExtractValueInst>(Itr);
3420  if (EVI->getAggregateOperand() != II)
3421  return false;
3422  }
3423 
3424  CC = TmpCC;
3425  return true;
3426 }
3427 
3428 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3429  // FIXME: Handle more intrinsics.
3430  switch (II->getIntrinsicID()) {
3431  default: return false;
3432  case Intrinsic::frameaddress: {
3433  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3434  MFI.setFrameAddressIsTaken(true);
3435 
3436  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3437  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3438  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3439  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3440  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3441  // Recursively load frame address
3442  // ldr x0, [fp]
3443  // ldr x0, [x0]
3444  // ldr x0, [x0]
3445  // ...
3446  unsigned DestReg;
3447  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3448  while (Depth--) {
3449  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3450  SrcReg, /*IsKill=*/true, 0);
3451  assert(DestReg && "Unexpected LDR instruction emission failure.");
3452  SrcReg = DestReg;
3453  }
3454 
3455  updateValueMap(II, SrcReg);
3456  return true;
3457  }
3458  case Intrinsic::sponentry: {
3459  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3460 
3461  // SP = FP + Fixed Object + 16
3462  int FI = MFI.CreateFixedObject(4, 0, false);
3463  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3464  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3465  TII.get(AArch64::ADDXri), ResultReg)
3466  .addFrameIndex(FI)
3467  .addImm(0)
3468  .addImm(0);
3469 
3470  updateValueMap(II, ResultReg);
3471  return true;
3472  }
3473  case Intrinsic::memcpy:
3474  case Intrinsic::memmove: {
3475  const auto *MTI = cast<MemTransferInst>(II);
3476  // Don't handle volatile.
3477  if (MTI->isVolatile())
3478  return false;
3479 
3480  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3481  // we would emit dead code because we don't currently handle memmoves.
3482  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3483  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3484  // Small memcpy's are common enough that we want to do them without a call
3485  // if possible.
3486  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3487  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3488  MTI->getSourceAlignment());
3489  if (isMemCpySmall(Len, Alignment)) {
3490  Address Dest, Src;
3491  if (!computeAddress(MTI->getRawDest(), Dest) ||
3492  !computeAddress(MTI->getRawSource(), Src))
3493  return false;
3494  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3495  return true;
3496  }
3497  }
3498 
3499  if (!MTI->getLength()->getType()->isIntegerTy(64))
3500  return false;
3501 
3502  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3503  // Fast instruction selection doesn't support the special
3504  // address spaces.
3505  return false;
3506 
3507  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3508  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3509  }
3510  case Intrinsic::memset: {
3511  const MemSetInst *MSI = cast<MemSetInst>(II);
3512  // Don't handle volatile.
3513  if (MSI->isVolatile())
3514  return false;
3515 
3516  if (!MSI->getLength()->getType()->isIntegerTy(64))
3517  return false;
3518 
3519  if (MSI->getDestAddressSpace() > 255)
3520  // Fast instruction selection doesn't support the special
3521  // address spaces.
3522  return false;
3523 
3524  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3525  }
3526  case Intrinsic::sin:
3527  case Intrinsic::cos:
3528  case Intrinsic::pow: {
3529  MVT RetVT;
3530  if (!isTypeLegal(II->getType(), RetVT))
3531  return false;
3532 
3533  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3534  return false;
3535 
3536  static const RTLIB::Libcall LibCallTable[3][2] = {
3537  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3538  { RTLIB::COS_F32, RTLIB::COS_F64 },
3539  { RTLIB::POW_F32, RTLIB::POW_F64 }
3540  };
3541  RTLIB::Libcall LC;
3542  bool Is64Bit = RetVT == MVT::f64;
3543  switch (II->getIntrinsicID()) {
3544  default:
3545  llvm_unreachable("Unexpected intrinsic.");
3546  case Intrinsic::sin:
3547  LC = LibCallTable[0][Is64Bit];
3548  break;
3549  case Intrinsic::cos:
3550  LC = LibCallTable[1][Is64Bit];
3551  break;
3552  case Intrinsic::pow:
3553  LC = LibCallTable[2][Is64Bit];
3554  break;
3555  }
3556 
3557  ArgListTy Args;
3558  Args.reserve(II->getNumArgOperands());
3559 
3560  // Populate the argument list.
3561  for (auto &Arg : II->arg_operands()) {
3562  ArgListEntry Entry;
3563  Entry.Val = Arg;
3564  Entry.Ty = Arg->getType();
3565  Args.push_back(Entry);
3566  }
3567 
3568  CallLoweringInfo CLI;
3569  MCContext &Ctx = MF->getContext();
3570  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3571  TLI.getLibcallName(LC), std::move(Args));
3572  if (!lowerCallTo(CLI))
3573  return false;
3574  updateValueMap(II, CLI.ResultReg);
3575  return true;
3576  }
3577  case Intrinsic::fabs: {
3578  MVT VT;
3579  if (!isTypeLegal(II->getType(), VT))
3580  return false;
3581 
3582  unsigned Opc;
3583  switch (VT.SimpleTy) {
3584  default:
3585  return false;
3586  case MVT::f32:
3587  Opc = AArch64::FABSSr;
3588  break;
3589  case MVT::f64:
3590  Opc = AArch64::FABSDr;
3591  break;
3592  }
3593  unsigned SrcReg = getRegForValue(II->getOperand(0));
3594  if (!SrcReg)
3595  return false;
3596  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3597  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3598  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3599  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3600  updateValueMap(II, ResultReg);
3601  return true;
3602  }
3603  case Intrinsic::trap:
3604  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3605  .addImm(1);
3606  return true;
3607  case Intrinsic::debugtrap: {
3608  if (Subtarget->isTargetWindows()) {
3609  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3610  .addImm(0xF000);
3611  return true;
3612  }
3613  break;
3614  }
3615 
3616  case Intrinsic::sqrt: {
3617  Type *RetTy = II->getCalledFunction()->getReturnType();
3618 
3619  MVT VT;
3620  if (!isTypeLegal(RetTy, VT))
3621  return false;
3622 
3623  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3624  if (!Op0Reg)
3625  return false;
3626  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3627 
3628  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3629  if (!ResultReg)
3630  return false;
3631 
3632  updateValueMap(II, ResultReg);
3633  return true;
3634  }
3635  case Intrinsic::sadd_with_overflow:
3636  case Intrinsic::uadd_with_overflow:
3637  case Intrinsic::ssub_with_overflow:
3638  case Intrinsic::usub_with_overflow:
3639  case Intrinsic::smul_with_overflow:
3640  case Intrinsic::umul_with_overflow: {
3641  // This implements the basic lowering of the xalu with overflow intrinsics.
3642  const Function *Callee = II->getCalledFunction();
3643  auto *Ty = cast<StructType>(Callee->getReturnType());
3644  Type *RetTy = Ty->getTypeAtIndex(0U);
3645 
3646  MVT VT;
3647  if (!isTypeLegal(RetTy, VT))
3648  return false;
3649 
3650  if (VT != MVT::i32 && VT != MVT::i64)
3651  return false;
3652 
3653  const Value *LHS = II->getArgOperand(0);
3654  const Value *RHS = II->getArgOperand(1);
3655  // Canonicalize immediate to the RHS.
3656  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3657  isCommutativeIntrinsic(II))
3658  std::swap(LHS, RHS);
3659 
3660  // Simplify multiplies.
3661  Intrinsic::ID IID = II->getIntrinsicID();
3662  switch (IID) {
3663  default:
3664  break;
3665  case Intrinsic::smul_with_overflow:
3666  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3667  if (C->getValue() == 2) {
3668  IID = Intrinsic::sadd_with_overflow;
3669  RHS = LHS;
3670  }
3671  break;
3672  case Intrinsic::umul_with_overflow:
3673  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674  if (C->getValue() == 2) {
3675  IID = Intrinsic::uadd_with_overflow;
3676  RHS = LHS;
3677  }
3678  break;
3679  }
3680 
3681  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3683  switch (IID) {
3684  default: llvm_unreachable("Unexpected intrinsic!");
3685  case Intrinsic::sadd_with_overflow:
3686  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687  CC = AArch64CC::VS;
3688  break;
3689  case Intrinsic::uadd_with_overflow:
3690  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3691  CC = AArch64CC::HS;
3692  break;
3693  case Intrinsic::ssub_with_overflow:
3694  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695  CC = AArch64CC::VS;
3696  break;
3697  case Intrinsic::usub_with_overflow:
3698  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3699  CC = AArch64CC::LO;
3700  break;
3701  case Intrinsic::smul_with_overflow: {
3702  CC = AArch64CC::NE;
3703  unsigned LHSReg = getRegForValue(LHS);
3704  if (!LHSReg)
3705  return false;
3706  bool LHSIsKill = hasTrivialKill(LHS);
3707 
3708  unsigned RHSReg = getRegForValue(RHS);
3709  if (!RHSReg)
3710  return false;
3711  bool RHSIsKill = hasTrivialKill(RHS);
3712 
3713  if (VT == MVT::i32) {
3714  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3716  /*IsKill=*/false, 32);
3717  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3718  AArch64::sub_32);
3719  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3720  AArch64::sub_32);
3721  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3722  AArch64_AM::ASR, 31, /*WantResult=*/false);
3723  } else {
3724  assert(VT == MVT::i64 && "Unexpected value type.");
3725  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3726  // reused in the next instruction.
3727  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3728  /*IsKill=*/false);
3729  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3730  RHSReg, RHSIsKill);
3731  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3732  AArch64_AM::ASR, 63, /*WantResult=*/false);
3733  }
3734  break;
3735  }
3736  case Intrinsic::umul_with_overflow: {
3737  CC = AArch64CC::NE;
3738  unsigned LHSReg = getRegForValue(LHS);
3739  if (!LHSReg)
3740  return false;
3741  bool LHSIsKill = hasTrivialKill(LHS);
3742 
3743  unsigned RHSReg = getRegForValue(RHS);
3744  if (!RHSReg)
3745  return false;
3746  bool RHSIsKill = hasTrivialKill(RHS);
3747 
3748  if (VT == MVT::i32) {
3749  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3750  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3751  /*IsKill=*/false, AArch64_AM::LSR, 32,
3752  /*WantResult=*/false);
3753  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3754  AArch64::sub_32);
3755  } else {
3756  assert(VT == MVT::i64 && "Unexpected value type.");
3757  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758  // reused in the next instruction.
3759  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3760  /*IsKill=*/false);
3761  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3762  RHSReg, RHSIsKill);
3763  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3764  /*IsKill=*/false, /*WantResult=*/false);
3765  }
3766  break;
3767  }
3768  }
3769 
3770  if (MulReg) {
3771  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3772  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3773  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3774  }
3775 
3776  if (!ResultReg1)
3777  return false;
3778 
3779  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3780  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3781  /*IsKill=*/true, getInvertedCondCode(CC));
3782  (void)ResultReg2;
3783  assert((ResultReg1 + 1) == ResultReg2 &&
3784  "Nonconsecutive result registers.");
3785  updateValueMap(II, ResultReg1, 2);
3786  return true;
3787  }
3788  }
3789  return false;
3790 }
3791 
3792 bool AArch64FastISel::selectRet(const Instruction *I) {
3793  const ReturnInst *Ret = cast<ReturnInst>(I);
3794  const Function &F = *I->getParent()->getParent();
3795 
3796  if (!FuncInfo.CanLowerReturn)
3797  return false;
3798 
3799  if (F.isVarArg())
3800  return false;
3801 
3802  if (TLI.supportSwiftError() &&
3803  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3804  return false;
3805 
3806  if (TLI.supportSplitCSR(FuncInfo.MF))
3807  return false;
3808 
3809  // Build a list of return value registers.
3810  SmallVector<unsigned, 4> RetRegs;
3811 
3812  if (Ret->getNumOperands() > 0) {
3813  CallingConv::ID CC = F.getCallingConv();
3815  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3816 
3817  // Analyze operands of the call, assigning locations to each operand.
3819  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3822  CCInfo.AnalyzeReturn(Outs, RetCC);
3823 
3824  // Only handle a single return value for now.
3825  if (ValLocs.size() != 1)
3826  return false;
3827 
3828  CCValAssign &VA = ValLocs[0];
3829  const Value *RV = Ret->getOperand(0);
3830 
3831  // Don't bother handling odd stuff for now.
3832  if ((VA.getLocInfo() != CCValAssign::Full) &&
3833  (VA.getLocInfo() != CCValAssign::BCvt))
3834  return false;
3835 
3836  // Only handle register returns for now.
3837  if (!VA.isRegLoc())
3838  return false;
3839 
3840  unsigned Reg = getRegForValue(RV);
3841  if (Reg == 0)
3842  return false;
3843 
3844  unsigned SrcReg = Reg + VA.getValNo();
3845  unsigned DestReg = VA.getLocReg();
3846  // Avoid a cross-class copy. This is very unlikely.
3847  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3848  return false;
3849 
3850  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3851  if (!RVEVT.isSimple())
3852  return false;
3853 
3854  // Vectors (of > 1 lane) in big endian need tricky handling.
3855  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3856  !Subtarget->isLittleEndian())
3857  return false;
3858 
3859  MVT RVVT = RVEVT.getSimpleVT();
3860  if (RVVT == MVT::f128)
3861  return false;
3862 
3863  MVT DestVT = VA.getValVT();
3864  // Special handling for extended integers.
3865  if (RVVT != DestVT) {
3866  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3867  return false;
3868 
3869  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3870  return false;
3871 
3872  bool IsZExt = Outs[0].Flags.isZExt();
3873  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3874  if (SrcReg == 0)
3875  return false;
3876  }
3877 
3878  // Make the copy.
3879  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3880  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3881 
3882  // Add register to return instruction.
3883  RetRegs.push_back(VA.getLocReg());
3884  }
3885 
3886  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3887  TII.get(AArch64::RET_ReallyLR));
3888  for (unsigned RetReg : RetRegs)
3889  MIB.addReg(RetReg, RegState::Implicit);
3890  return true;
3891 }
3892 
3893 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3894  Type *DestTy = I->getType();
3895  Value *Op = I->getOperand(0);
3896  Type *SrcTy = Op->getType();
3897 
3898  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3899  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3900  if (!SrcEVT.isSimple())
3901  return false;
3902  if (!DestEVT.isSimple())
3903  return false;
3904 
3905  MVT SrcVT = SrcEVT.getSimpleVT();
3906  MVT DestVT = DestEVT.getSimpleVT();
3907 
3908  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3909  SrcVT != MVT::i8)
3910  return false;
3911  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3912  DestVT != MVT::i1)
3913  return false;
3914 
3915  unsigned SrcReg = getRegForValue(Op);
3916  if (!SrcReg)
3917  return false;
3918  bool SrcIsKill = hasTrivialKill(Op);
3919 
3920  // If we're truncating from i64 to a smaller non-legal type then generate an
3921  // AND. Otherwise, we know the high bits are undefined and a truncate only
3922  // generate a COPY. We cannot mark the source register also as result
3923  // register, because this can incorrectly transfer the kill flag onto the
3924  // source register.
3925  unsigned ResultReg;
3926  if (SrcVT == MVT::i64) {
3927  uint64_t Mask = 0;
3928  switch (DestVT.SimpleTy) {
3929  default:
3930  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3931  return false;
3932  case MVT::i1:
3933  Mask = 0x1;
3934  break;
3935  case MVT::i8:
3936  Mask = 0xff;
3937  break;
3938  case MVT::i16:
3939  Mask = 0xffff;
3940  break;
3941  }
3942  // Issue an extract_subreg to get the lower 32-bits.
3943  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3944  AArch64::sub_32);
3945  // Create the AND instruction which performs the actual truncation.
3946  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3947  assert(ResultReg && "Unexpected AND instruction emission failure.");
3948  } else {
3949  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3950  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3951  TII.get(TargetOpcode::COPY), ResultReg)
3952  .addReg(SrcReg, getKillRegState(SrcIsKill));
3953  }
3954 
3955  updateValueMap(I, ResultReg);
3956  return true;
3957 }
3958 
3959 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3960  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3961  DestVT == MVT::i64) &&
3962  "Unexpected value type.");
3963  // Handle i8 and i16 as i32.
3964  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3965  DestVT = MVT::i32;
3966 
3967  if (IsZExt) {
3968  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3969  assert(ResultReg && "Unexpected AND instruction emission failure.");
3970  if (DestVT == MVT::i64) {
3971  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3972  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3973  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3974  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3975  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3976  .addImm(0)
3977  .addReg(ResultReg)
3978  .addImm(AArch64::sub_32);
3979  ResultReg = Reg64;
3980  }
3981  return ResultReg;
3982  } else {
3983  if (DestVT == MVT::i64) {
3984  // FIXME: We're SExt i1 to i64.
3985  return 0;
3986  }
3987  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3988  /*TODO:IsKill=*/false, 0, 0);
3989  }
3990 }
3991 
3992 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3993  unsigned Op1, bool Op1IsKill) {
3994  unsigned Opc, ZReg;
3995  switch (RetVT.SimpleTy) {
3996  default: return 0;
3997  case MVT::i8:
3998  case MVT::i16:
3999  case MVT::i32:
4000  RetVT = MVT::i32;
4001  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4002  case MVT::i64:
4003  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4004  }
4005 
4006  const TargetRegisterClass *RC =
4007  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4008  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4009  /*IsKill=*/ZReg, true);
4010 }
4011 
4012 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4013  unsigned Op1, bool Op1IsKill) {
4014  if (RetVT != MVT::i64)
4015  return 0;
4016 
4017  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4018  Op0, Op0IsKill, Op1, Op1IsKill,
4019  AArch64::XZR, /*IsKill=*/true);
4020 }
4021 
4022 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4023  unsigned Op1, bool Op1IsKill) {
4024  if (RetVT != MVT::i64)
4025  return 0;
4026 
4027  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028  Op0, Op0IsKill, Op1, Op1IsKill,
4029  AArch64::XZR, /*IsKill=*/true);
4030 }
4031 
4032 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4033  unsigned Op1Reg, bool Op1IsKill) {
4034  unsigned Opc = 0;
4035  bool NeedTrunc = false;
4036  uint64_t Mask = 0;
4037  switch (RetVT.SimpleTy) {
4038  default: return 0;
4039  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4040  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4041  case MVT::i32: Opc = AArch64::LSLVWr; break;
4042  case MVT::i64: Opc = AArch64::LSLVXr; break;
4043  }
4044 
4045  const TargetRegisterClass *RC =
4046  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4047  if (NeedTrunc) {
4048  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4049  Op1IsKill = true;
4050  }
4051  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4052  Op1IsKill);
4053  if (NeedTrunc)
4054  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4055  return ResultReg;
4056 }
4057 
4058 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4059  bool Op0IsKill, uint64_t Shift,
4060  bool IsZExt) {
4061  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4062  "Unexpected source/return type pair.");
4063  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4064  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4065  "Unexpected source value type.");
4066  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4067  RetVT == MVT::i64) && "Unexpected return value type.");
4068 
4069  bool Is64Bit = (RetVT == MVT::i64);
4070  unsigned RegSize = Is64Bit ? 64 : 32;
4071  unsigned DstBits = RetVT.getSizeInBits();
4072  unsigned SrcBits = SrcVT.getSizeInBits();
4073  const TargetRegisterClass *RC =
4074  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4075 
4076  // Just emit a copy for "zero" shifts.
4077  if (Shift == 0) {
4078  if (RetVT == SrcVT) {
4079  unsigned ResultReg = createResultReg(RC);
4080  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4081  TII.get(TargetOpcode::COPY), ResultReg)
4082  .addReg(Op0, getKillRegState(Op0IsKill));
4083  return ResultReg;
4084  } else
4085  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4086  }
4087 
4088  // Don't deal with undefined shifts.
4089  if (Shift >= DstBits)
4090  return 0;
4091 
4092  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4093  // {S|U}BFM Wd, Wn, #r, #s
4094  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4095 
4096  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4097  // %2 = shl i16 %1, 4
4098  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4099  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4100  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4101  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4102 
4103  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4104  // %2 = shl i16 %1, 8
4105  // Wd<32+7-24,32-24> = Wn<7:0>
4106  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4107  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4108  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4109 
4110  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4111  // %2 = shl i16 %1, 12
4112  // Wd<32+3-20,32-20> = Wn<3:0>
4113  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4114  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4115  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4116 
4117  unsigned ImmR = RegSize - Shift;
4118  // Limit the width to the length of the source type.
4119  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4120  static const unsigned OpcTable[2][2] = {
4121  {AArch64::SBFMWri, AArch64::SBFMXri},
4122  {AArch64::UBFMWri, AArch64::UBFMXri}
4123  };
4124  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4125  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4126  unsigned TmpReg = MRI.createVirtualRegister(RC);
4127  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4128  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4129  .addImm(0)
4130  .addReg(Op0, getKillRegState(Op0IsKill))
4131  .addImm(AArch64::sub_32);
4132  Op0 = TmpReg;
4133  Op0IsKill = true;
4134  }
4135  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4136 }
4137 
4138 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4139  unsigned Op1Reg, bool Op1IsKill) {
4140  unsigned Opc = 0;
4141  bool NeedTrunc = false;
4142  uint64_t Mask = 0;
4143  switch (RetVT.SimpleTy) {
4144  default: return 0;
4145  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4146  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4147  case MVT::i32: Opc = AArch64::LSRVWr; break;
4148  case MVT::i64: Opc = AArch64::LSRVXr; break;
4149  }
4150 
4151  const TargetRegisterClass *RC =
4152  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4153  if (NeedTrunc) {
4154  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4155  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4156  Op0IsKill = Op1IsKill = true;
4157  }
4158  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4159  Op1IsKill);
4160  if (NeedTrunc)
4161  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4162  return ResultReg;
4163 }
4164 
4165 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4166  bool Op0IsKill, uint64_t Shift,
4167  bool IsZExt) {
4168  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4169  "Unexpected source/return type pair.");
4170  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4171  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4172  "Unexpected source value type.");
4173  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4174  RetVT == MVT::i64) && "Unexpected return value type.");
4175 
4176  bool Is64Bit = (RetVT == MVT::i64);
4177  unsigned RegSize = Is64Bit ? 64 : 32;
4178  unsigned DstBits = RetVT.getSizeInBits();
4179  unsigned SrcBits = SrcVT.getSizeInBits();
4180  const TargetRegisterClass *RC =
4181  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4182 
4183  // Just emit a copy for "zero" shifts.
4184  if (Shift == 0) {
4185  if (RetVT == SrcVT) {
4186  unsigned ResultReg = createResultReg(RC);
4187  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4188  TII.get(TargetOpcode::COPY), ResultReg)
4189  .addReg(Op0, getKillRegState(Op0IsKill));
4190  return ResultReg;
4191  } else
4192  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4193  }
4194 
4195  // Don't deal with undefined shifts.
4196  if (Shift >= DstBits)
4197  return 0;
4198 
4199  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4200  // {S|U}BFM Wd, Wn, #r, #s
4201  // Wd<s-r:0> = Wn<s:r> when r <= s
4202 
4203  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4204  // %2 = lshr i16 %1, 4
4205  // Wd<7-4:0> = Wn<7:4>
4206  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4207  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4208  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4209 
4210  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4211  // %2 = lshr i16 %1, 8
4212  // Wd<7-7,0> = Wn<7:7>
4213  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4214  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4215  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4216 
4217  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4218  // %2 = lshr i16 %1, 12
4219  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4220  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4221  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4222  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4223 
4224  if (Shift >= SrcBits && IsZExt)
4225  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4226 
4227  // It is not possible to fold a sign-extend into the LShr instruction. In this
4228  // case emit a sign-extend.
4229  if (!IsZExt) {
4230  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4231  if (!Op0)
4232  return 0;
4233  Op0IsKill = true;
4234  SrcVT = RetVT;
4235  SrcBits = SrcVT.getSizeInBits();
4236  IsZExt = true;
4237  }
4238 
4239  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4240  unsigned ImmS = SrcBits - 1;
4241  static const unsigned OpcTable[2][2] = {
4242  {AArch64::SBFMWri, AArch64::SBFMXri},
4243  {AArch64::UBFMWri, AArch64::UBFMXri}
4244  };
4245  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4246  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4247  unsigned TmpReg = MRI.createVirtualRegister(RC);
4248  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4249  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4250  .addImm(0)
4251  .addReg(Op0, getKillRegState(Op0IsKill))
4252  .addImm(AArch64::sub_32);
4253  Op0 = TmpReg;
4254  Op0IsKill = true;
4255  }
4256  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4257 }
4258 
4259 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4260  unsigned Op1Reg, bool Op1IsKill) {
4261  unsigned Opc = 0;
4262  bool NeedTrunc = false;
4263  uint64_t Mask = 0;
4264  switch (RetVT.SimpleTy) {
4265  default: return 0;
4266  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4267  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4268  case MVT::i32: Opc = AArch64::ASRVWr; break;
4269  case MVT::i64: Opc = AArch64::ASRVXr; break;
4270  }
4271 
4272  const TargetRegisterClass *RC =
4273  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4274  if (NeedTrunc) {
4275  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4276  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4277  Op0IsKill = Op1IsKill = true;
4278  }
4279  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4280  Op1IsKill);
4281  if (NeedTrunc)
4282  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4283  return ResultReg;
4284 }
4285 
4286 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4287  bool Op0IsKill, uint64_t Shift,
4288  bool IsZExt) {
4289  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4290  "Unexpected source/return type pair.");
4291  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4292  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4293  "Unexpected source value type.");
4294  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4295  RetVT == MVT::i64) && "Unexpected return value type.");
4296 
4297  bool Is64Bit = (RetVT == MVT::i64);
4298  unsigned RegSize = Is64Bit ? 64 : 32;
4299  unsigned DstBits = RetVT.getSizeInBits();
4300  unsigned SrcBits = SrcVT.getSizeInBits();
4301  const TargetRegisterClass *RC =
4302  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4303 
4304  // Just emit a copy for "zero" shifts.
4305  if (Shift == 0) {
4306  if (RetVT == SrcVT) {
4307  unsigned ResultReg = createResultReg(RC);
4308  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4309  TII.get(TargetOpcode::COPY), ResultReg)
4310  .addReg(Op0, getKillRegState(Op0IsKill));
4311  return ResultReg;
4312  } else
4313  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4314  }
4315 
4316  // Don't deal with undefined shifts.
4317  if (Shift >= DstBits)
4318  return 0;
4319 
4320  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4321  // {S|U}BFM Wd, Wn, #r, #s
4322  // Wd<s-r:0> = Wn<s:r> when r <= s
4323 
4324  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4325  // %2 = ashr i16 %1, 4
4326  // Wd<7-4:0> = Wn<7:4>
4327  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4328  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4329  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4330 
4331  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4332  // %2 = ashr i16 %1, 8
4333  // Wd<7-7,0> = Wn<7:7>
4334  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4335  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4336  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4337 
4338  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4339  // %2 = ashr i16 %1, 12
4340  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4341  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4342  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4343  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4344 
4345  if (Shift >= SrcBits && IsZExt)
4346  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4347 
4348  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4349  unsigned ImmS = SrcBits - 1;
4350  static const unsigned OpcTable[2][2] = {
4351  {AArch64::SBFMWri, AArch64::SBFMXri},
4352  {AArch64::UBFMWri, AArch64::UBFMXri}
4353  };
4354  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4355  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4356  unsigned TmpReg = MRI.createVirtualRegister(RC);
4357  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4358  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4359  .addImm(0)
4360  .addReg(Op0, getKillRegState(Op0IsKill))
4361  .addImm(AArch64::sub_32);
4362  Op0 = TmpReg;
4363  Op0IsKill = true;
4364  }
4365  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4366 }
4367 
4368 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4369  bool IsZExt) {
4370  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4371 
4372  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4373  // DestVT are odd things, so test to make sure that they are both types we can
4374  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4375  // bail out to SelectionDAG.
4376  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4377  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4378  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4379  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4380  return 0;
4381 
4382  unsigned Opc;
4383  unsigned Imm = 0;
4384 
4385  switch (SrcVT.SimpleTy) {
4386  default:
4387  return 0;
4388  case MVT::i1:
4389  return emiti1Ext(SrcReg, DestVT, IsZExt);
4390  case MVT::i8:
4391  if (DestVT == MVT::i64)
4392  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393  else
4394  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4395  Imm = 7;
4396  break;
4397  case MVT::i16:
4398  if (DestVT == MVT::i64)
4399  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4400  else
4401  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4402  Imm = 15;
4403  break;
4404  case MVT::i32:
4405  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4406  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4407  Imm = 31;
4408  break;
4409  }
4410 
4411  // Handle i8 and i16 as i32.
4412  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4413  DestVT = MVT::i32;
4414  else if (DestVT == MVT::i64) {
4415  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4416  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4417  TII.get(AArch64::SUBREG_TO_REG), Src64)
4418  .addImm(0)
4419  .addReg(SrcReg)
4420  .addImm(AArch64::sub_32);
4421  SrcReg = Src64;
4422  }
4423 
4424  const TargetRegisterClass *RC =
4425  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4426  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4427 }
4428 
4429 static bool isZExtLoad(const MachineInstr *LI) {
4430  switch (LI->getOpcode()) {
4431  default:
4432  return false;
4433  case AArch64::LDURBBi:
4434  case AArch64::LDURHHi:
4435  case AArch64::LDURWi:
4436  case AArch64::LDRBBui:
4437  case AArch64::LDRHHui:
4438  case AArch64::LDRWui:
4439  case AArch64::LDRBBroX:
4440  case AArch64::LDRHHroX:
4441  case AArch64::LDRWroX:
4442  case AArch64::LDRBBroW:
4443  case AArch64::LDRHHroW:
4444  case AArch64::LDRWroW:
4445  return true;
4446  }
4447 }
4448 
4449 static bool isSExtLoad(const MachineInstr *LI) {
4450  switch (LI->getOpcode()) {
4451  default:
4452  return false;
4453  case AArch64::LDURSBWi:
4454  case AArch64::LDURSHWi:
4455  case AArch64::LDURSBXi:
4456  case AArch64::LDURSHXi:
4457  case AArch64::LDURSWi:
4458  case AArch64::LDRSBWui:
4459  case AArch64::LDRSHWui:
4460  case AArch64::LDRSBXui:
4461  case AArch64::LDRSHXui:
4462  case AArch64::LDRSWui:
4463  case AArch64::LDRSBWroX:
4464  case AArch64::LDRSHWroX:
4465  case AArch64::LDRSBXroX:
4466  case AArch64::LDRSHXroX:
4467  case AArch64::LDRSWroX:
4468  case AArch64::LDRSBWroW:
4469  case AArch64::LDRSHWroW:
4470  case AArch64::LDRSBXroW:
4471  case AArch64::LDRSHXroW:
4472  case AArch64::LDRSWroW:
4473  return true;
4474  }
4475 }
4476 
4477 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4478  MVT SrcVT) {
4479  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4480  if (!LI || !LI->hasOneUse())
4481  return false;
4482 
4483  // Check if the load instruction has already been selected.
4484  unsigned Reg = lookUpRegForValue(LI);
4485  if (!Reg)
4486  return false;
4487 
4488  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4489  if (!MI)
4490  return false;
4491 
4492  // Check if the correct load instruction has been emitted - SelectionDAG might
4493  // have emitted a zero-extending load, but we need a sign-extending load.
4494  bool IsZExt = isa<ZExtInst>(I);
4495  const auto *LoadMI = MI;
4496  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4497  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4498  unsigned LoadReg = MI->getOperand(1).getReg();
4499  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4500  assert(LoadMI && "Expected valid instruction");
4501  }
4502  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4503  return false;
4504 
4505  // Nothing to be done.
4506  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4507  updateValueMap(I, Reg);
4508  return true;
4509  }
4510 
4511  if (IsZExt) {
4512  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4514  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4515  .addImm(0)
4516  .addReg(Reg, getKillRegState(true))
4517  .addImm(AArch64::sub_32);
4518  Reg = Reg64;
4519  } else {
4520  assert((MI->getOpcode() == TargetOpcode::COPY &&
4521  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4522  "Expected copy instruction");
4523  Reg = MI->getOperand(1).getReg();
4525  removeDeadCode(I, std::next(I));
4526  }
4527  updateValueMap(I, Reg);
4528  return true;
4529 }
4530 
4531 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4532  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4533  "Unexpected integer extend instruction.");
4534  MVT RetVT;
4535  MVT SrcVT;
4536  if (!isTypeSupported(I->getType(), RetVT))
4537  return false;
4538 
4539  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4540  return false;
4541 
4542  // Try to optimize already sign-/zero-extended values from load instructions.
4543  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4544  return true;
4545 
4546  unsigned SrcReg = getRegForValue(I->getOperand(0));
4547  if (!SrcReg)
4548  return false;
4549  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4550 
4551  // Try to optimize already sign-/zero-extended values from function arguments.
4552  bool IsZExt = isa<ZExtInst>(I);
4553  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4554  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4555  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4556  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4558  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4559  .addImm(0)
4560  .addReg(SrcReg, getKillRegState(SrcIsKill))
4561  .addImm(AArch64::sub_32);
4562  SrcReg = ResultReg;
4563  }
4564  // Conservatively clear all kill flags from all uses, because we are
4565  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4566  // level. The result of the instruction at IR level might have been
4567  // trivially dead, which is now not longer true.
4568  unsigned UseReg = lookUpRegForValue(I);
4569  if (UseReg)
4570  MRI.clearKillFlags(UseReg);
4571 
4572  updateValueMap(I, SrcReg);
4573  return true;
4574  }
4575  }
4576 
4577  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4578  if (!ResultReg)
4579  return false;
4580 
4581  updateValueMap(I, ResultReg);
4582  return true;
4583 }
4584 
4585 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4586  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4587  if (!DestEVT.isSimple())
4588  return false;
4589 
4590  MVT DestVT = DestEVT.getSimpleVT();
4591  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4592  return false;
4593 
4594  unsigned DivOpc;
4595  bool Is64bit = (DestVT == MVT::i64);
4596  switch (ISDOpcode) {
4597  default:
4598  return false;
4599  case ISD::SREM:
4600  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4601  break;
4602  case ISD::UREM:
4603  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4604  break;
4605  }
4606  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4607  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4608  if (!Src0Reg)
4609  return false;
4610  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4611 
4612  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4613  if (!Src1Reg)
4614  return false;
4615  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4616 
4617  const TargetRegisterClass *RC =
4618  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4619  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4620  Src1Reg, /*IsKill=*/false);
4621  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4622  // The remainder is computed as numerator - (quotient * denominator) using the
4623  // MSUB instruction.
4624  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4625  Src1Reg, Src1IsKill, Src0Reg,
4626  Src0IsKill);
4627  updateValueMap(I, ResultReg);
4628  return true;
4629 }
4630 
4631 bool AArch64FastISel::selectMul(const Instruction *I) {
4632  MVT VT;
4633  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4634  return false;
4635 
4636  if (VT.isVector())
4637  return selectBinaryOp(I, ISD::MUL);
4638 
4639  const Value *Src0 = I->getOperand(0);
4640  const Value *Src1 = I->getOperand(1);
4641  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4642  if (C->getValue().isPowerOf2())
4643  std::swap(Src0, Src1);
4644 
4645  // Try to simplify to a shift instruction.
4646  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4647  if (C->getValue().isPowerOf2()) {
4648  uint64_t ShiftVal = C->getValue().logBase2();
4649  MVT SrcVT = VT;
4650  bool IsZExt = true;
4651  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4652  if (!isIntExtFree(ZExt)) {
4653  MVT VT;
4654  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4655  SrcVT = VT;
4656  IsZExt = true;
4657  Src0 = ZExt->getOperand(0);
4658  }
4659  }
4660  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4661  if (!isIntExtFree(SExt)) {
4662  MVT VT;
4663  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4664  SrcVT = VT;
4665  IsZExt = false;
4666  Src0 = SExt->getOperand(0);
4667  }
4668  }
4669  }
4670 
4671  unsigned Src0Reg = getRegForValue(Src0);
4672  if (!Src0Reg)
4673  return false;
4674  bool Src0IsKill = hasTrivialKill(Src0);
4675 
4676  unsigned ResultReg =
4677  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4678 
4679  if (ResultReg) {
4680  updateValueMap(I, ResultReg);
4681  return true;
4682  }
4683  }
4684 
4685  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4686  if (!Src0Reg)
4687  return false;
4688  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4689 
4690  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4691  if (!Src1Reg)
4692  return false;
4693  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4694 
4695  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4696 
4697  if (!ResultReg)
4698  return false;
4699 
4700  updateValueMap(I, ResultReg);
4701  return true;
4702 }
4703 
4704 bool AArch64FastISel::selectShift(const Instruction *I) {
4705  MVT RetVT;
4706  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4707  return false;
4708 
4709  if (RetVT.isVector())
4710  return selectOperator(I, I->getOpcode());
4711 
4712  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4713  unsigned ResultReg = 0;
4714  uint64_t ShiftVal = C->getZExtValue();
4715  MVT SrcVT = RetVT;
4716  bool IsZExt = I->getOpcode() != Instruction::AShr;
4717  const Value *Op0 = I->getOperand(0);
4718  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4719  if (!isIntExtFree(ZExt)) {
4720  MVT TmpVT;
4721  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4722  SrcVT = TmpVT;
4723  IsZExt = true;
4724  Op0 = ZExt->getOperand(0);
4725  }
4726  }
4727  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4728  if (!isIntExtFree(SExt)) {
4729  MVT TmpVT;
4730  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4731  SrcVT = TmpVT;
4732  IsZExt = false;
4733  Op0 = SExt->getOperand(0);
4734  }
4735  }
4736  }
4737 
4738  unsigned Op0Reg = getRegForValue(Op0);
4739  if (!Op0Reg)
4740  return false;
4741  bool Op0IsKill = hasTrivialKill(Op0);
4742 
4743  switch (I->getOpcode()) {
4744  default: llvm_unreachable("Unexpected instruction.");
4745  case Instruction::Shl:
4746  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4747  break;
4748  case Instruction::AShr:
4749  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4750  break;
4751  case Instruction::LShr:
4752  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4753  break;
4754  }
4755  if (!ResultReg)
4756  return false;
4757 
4758  updateValueMap(I, ResultReg);
4759  return true;
4760  }
4761 
4762  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4763  if (!Op0Reg)
4764  return false;
4765  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4766 
4767  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4768  if (!Op1Reg)
4769  return false;
4770  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4771 
4772  unsigned ResultReg = 0;
4773  switch (I->getOpcode()) {
4774  default: llvm_unreachable("Unexpected instruction.");
4775  case Instruction::Shl:
4776  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4777  break;
4778  case Instruction::AShr:
4779  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4780  break;
4781  case Instruction::LShr:
4782  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4783  break;
4784  }
4785 
4786  if (!ResultReg)
4787  return false;
4788 
4789  updateValueMap(I, ResultReg);
4790  return true;
4791 }
4792 
4793 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4794  MVT RetVT, SrcVT;
4795 
4796  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4797  return false;
4798  if (!isTypeLegal(I->getType(), RetVT))
4799  return false;
4800 
4801  unsigned Opc;
4802  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4803  Opc = AArch64::FMOVWSr;
4804  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4805  Opc = AArch64::FMOVXDr;
4806  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4807  Opc = AArch64::FMOVSWr;
4808  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4809  Opc = AArch64::FMOVDXr;
4810  else
4811  return false;
4812 
4813  const TargetRegisterClass *RC = nullptr;
4814  switch (RetVT.SimpleTy) {
4815  default: llvm_unreachable("Unexpected value type.");
4816  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4817  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4818  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4819  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4820  }
4821  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4822  if (!Op0Reg)
4823  return false;
4824  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4825  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4826 
4827  if (!ResultReg)
4828  return false;
4829 
4830  updateValueMap(I, ResultReg);
4831  return true;
4832 }
4833 
4834 bool AArch64FastISel::selectFRem(const Instruction *I) {
4835  MVT RetVT;
4836  if (!isTypeLegal(I->getType(), RetVT))
4837  return false;
4838 
4839  RTLIB::Libcall LC;
4840  switch (RetVT.SimpleTy) {
4841  default:
4842  return false;
4843  case MVT::f32:
4844  LC = RTLIB::REM_F32;
4845  break;
4846  case MVT::f64:
4847  LC = RTLIB::REM_F64;
4848  break;
4849  }
4850 
4851  ArgListTy Args;
4852  Args.reserve(I->getNumOperands());
4853 
4854  // Populate the argument list.
4855  for (auto &Arg : I->operands()) {
4856  ArgListEntry Entry;
4857  Entry.Val = Arg;
4858  Entry.Ty = Arg->getType();
4859  Args.push_back(Entry);
4860  }
4861 
4862  CallLoweringInfo CLI;
4863  MCContext &Ctx = MF->getContext();
4864  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4865  TLI.getLibcallName(LC), std::move(Args));
4866  if (!lowerCallTo(CLI))
4867  return false;
4868  updateValueMap(I, CLI.ResultReg);
4869  return true;
4870 }
4871 
4872 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4873  MVT VT;
4874  if (!isTypeLegal(I->getType(), VT))
4875  return false;
4876 
4877  if (!isa<ConstantInt>(I->getOperand(1)))
4878  return selectBinaryOp(I, ISD::SDIV);
4879 
4880  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4881  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4882  !(C.isPowerOf2() || (-C).isPowerOf2()))
4883  return selectBinaryOp(I, ISD::SDIV);
4884 
4885  unsigned Lg2 = C.countTrailingZeros();
4886  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4887  if (!Src0Reg)
4888  return false;
4889  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4890 
4891  if (cast<BinaryOperator>(I)->isExact()) {
4892  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4893  if (!ResultReg)
4894  return false;
4895  updateValueMap(I, ResultReg);
4896  return true;
4897  }
4898 
4899  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4900  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4901  if (!AddReg)
4902  return false;
4903 
4904  // (Src0 < 0) ? Pow2 - 1 : 0;
4905  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4906  return false;
4907 
4908  unsigned SelectOpc;
4909  const TargetRegisterClass *RC;
4910  if (VT == MVT::i64) {
4911  SelectOpc = AArch64::CSELXr;
4912  RC = &AArch64::GPR64RegClass;
4913  } else {
4914  SelectOpc = AArch64::CSELWr;
4915  RC = &AArch64::GPR32RegClass;
4916  }
4917  unsigned SelectReg =
4918  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4919  Src0IsKill, AArch64CC::LT);
4920  if (!SelectReg)
4921  return false;
4922 
4923  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924  // negate the result.
4925  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4926  unsigned ResultReg;
4927  if (C.isNegative())
4928  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4929  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4930  else
4931  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4932 
4933  if (!ResultReg)
4934  return false;
4935 
4936  updateValueMap(I, ResultReg);
4937  return true;
4938 }
4939 
4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941 /// have to duplicate it for AArch64, because otherwise we would fail during the
4942 /// sign-extend emission.
4943 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944  unsigned IdxN = getRegForValue(Idx);
4945  if (IdxN == 0)
4946  // Unhandled operand. Halt "fast" selection and bail.
4947  return std::pair<unsigned, bool>(0, false);
4948 
4949  bool IdxNIsKill = hasTrivialKill(Idx);
4950 
4951  // If the index is smaller or larger than intptr_t, truncate or extend it.
4952  MVT PtrVT = TLI.getPointerTy(DL);
4953  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4954  if (IdxVT.bitsLT(PtrVT)) {
4955  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4956  IdxNIsKill = true;
4957  } else if (IdxVT.bitsGT(PtrVT))
4958  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4959  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4960 }
4961 
4962 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4963 /// duplicate it for AArch64, because otherwise we would bail out even for
4964 /// simple cases. This is because the standard fastEmit functions don't cover
4965 /// MUL at all and ADD is lowered very inefficientily.
4966 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4967  unsigned N = getRegForValue(I->getOperand(0));
4968  if (!N)
4969  return false;
4970  bool NIsKill = hasTrivialKill(I->getOperand(0));
4971 
4972  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4973  // into a single N = N + TotalOffset.
4974  uint64_t TotalOffs = 0;
4975  MVT VT = TLI.getPointerTy(DL);
4976  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4977  GTI != E; ++GTI) {
4978  const Value *Idx = GTI.getOperand();
4979  if (auto *StTy = GTI.getStructTypeOrNull()) {
4980  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4981  // N = N + Offset
4982  if (Field)
4983  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4984  } else {
4985  Type *Ty = GTI.getIndexedType();
4986 
4987  // If this is a constant subscript, handle it quickly.
4988  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4989  if (CI->isZero())
4990  continue;
4991  // N = N + Offset
4992  TotalOffs +=
4993  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4994  continue;
4995  }
4996  if (TotalOffs) {
4997  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4998  if (!N)
4999  return false;
5000  NIsKill = true;
5001  TotalOffs = 0;
5002  }
5003 
5004  // N = N + Idx * ElementSize;
5005  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5006  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5007  unsigned IdxN = Pair.first;
5008  bool IdxNIsKill = Pair.second;
5009  if (!IdxN)
5010  return false;
5011 
5012  if (ElementSize != 1) {
5013  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5014  if (!C)
5015  return false;
5016  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5017  if (!IdxN)
5018  return false;
5019  IdxNIsKill = true;
5020  }
5021  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5022  if (!N)
5023  return false;
5024  }
5025  }
5026  if (TotalOffs) {
5027  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5028  if (!N)
5029  return false;
5030  }
5031  updateValueMap(I, N);
5032  return true;
5033 }
5034 
5035 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5036  assert(TM.getOptLevel() == CodeGenOpt::None &&
5037  "cmpxchg survived AtomicExpand at optlevel > -O0");
5038 
5039  auto *RetPairTy = cast<StructType>(I->getType());
5040  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5041  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5042  "cmpxchg has a non-i1 status result");
5043 
5044  MVT VT;
5045  if (!isTypeLegal(RetTy, VT))
5046  return false;
5047 
5048  const TargetRegisterClass *ResRC;
5049  unsigned Opc, CmpOpc;
5050  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5051  // extractvalue selection doesn't support that.
5052  if (VT == MVT::i32) {
5053  Opc = AArch64::CMP_SWAP_32;
5054  CmpOpc = AArch64::SUBSWrs;
5055  ResRC = &AArch64::GPR32RegClass;
5056  } else if (VT == MVT::i64) {
5057  Opc = AArch64::CMP_SWAP_64;
5058  CmpOpc = AArch64::SUBSXrs;
5059  ResRC = &AArch64::GPR64RegClass;
5060  } else {
5061  return false;
5062  }
5063 
5064  const MCInstrDesc &II = TII.get(Opc);
5065 
5066  const unsigned AddrReg = constrainOperandRegClass(
5067  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5068  const unsigned DesiredReg = constrainOperandRegClass(
5069  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5070  const unsigned NewReg = constrainOperandRegClass(
5071  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5072 
5073  const unsigned ResultReg1 = createResultReg(ResRC);
5074  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5075  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5076 
5077  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5079  .addDef(ResultReg1)
5080  .addDef(ScratchReg)
5081  .addUse(AddrReg)
5082  .addUse(DesiredReg)
5083  .addUse(NewReg);
5084 
5085  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5086  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5087  .addUse(ResultReg1)
5088  .addUse(DesiredReg)
5089  .addImm(0);
5090 
5091  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5092  .addDef(ResultReg2)
5093  .addUse(AArch64::WZR)
5094  .addUse(AArch64::WZR)
5096 
5097  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5098  updateValueMap(I, ResultReg1, 2);
5099  return true;
5100 }
5101 
5102 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5103  switch (I->getOpcode()) {
5104  default:
5105  break;
5106  case Instruction::Add:
5107  case Instruction::Sub:
5108  return selectAddSub(I);
5109  case Instruction::Mul:
5110  return selectMul(I);
5111  case Instruction::SDiv:
5112  return selectSDiv(I);
5113  case Instruction::SRem:
5114  if (!selectBinaryOp(I, ISD::SREM))
5115  return selectRem(I, ISD::SREM);
5116  return true;
5117  case Instruction::URem:
5118  if (!selectBinaryOp(I, ISD::UREM))
5119  return selectRem(I, ISD::UREM);
5120  return true;
5121  case Instruction::Shl:
5122  case Instruction::LShr:
5123  case Instruction::AShr:
5124  return selectShift(I);
5125  case Instruction::And:
5126  case Instruction::Or:
5127  case Instruction::Xor:
5128  return selectLogicalOp(I);
5129  case Instruction::Br:
5130  return selectBranch(I);
5131  case Instruction::IndirectBr:
5132  return selectIndirectBr(I);
5133  case Instruction::BitCast:
5134  if (!FastISel::selectBitCast(I))
5135  return selectBitCast(I);
5136  return true;
5137  case Instruction::FPToSI:
5138  if (!selectCast(I, ISD::FP_TO_SINT))
5139  return selectFPToInt(I, /*Signed=*/true);
5140  return true;
5141  case Instruction::FPToUI:
5142  return selectFPToInt(I, /*Signed=*/false);
5143  case Instruction::ZExt:
5144  case Instruction::SExt:
5145  return selectIntExt(I);
5146  case Instruction::Trunc:
5147  if (!selectCast(I, ISD::TRUNCATE))
5148  return selectTrunc(I);
5149  return true;
5150  case Instruction::FPExt:
5151  return selectFPExt(I);
5152  case Instruction::FPTrunc:
5153  return selectFPTrunc(I);
5154  case Instruction::SIToFP:
5155  if (!selectCast(I, ISD::SINT_TO_FP))
5156  return selectIntToFP(I, /*Signed=*/true);
5157  return true;
5158  case Instruction::UIToFP:
5159  return selectIntToFP(I, /*Signed=*/false);
5160  case Instruction::Load:
5161  return selectLoad(I);
5162  case Instruction::Store:
5163  return selectStore(I);
5164  case Instruction::FCmp:
5165  case Instruction::ICmp:
5166  return selectCmp(I);
5167  case Instruction::Select:
5168  return selectSelect(I);
5169  case Instruction::Ret:
5170  return selectRet(I);
5171  case Instruction::FRem:
5172  return selectFRem(I);
5173  case Instruction::GetElementPtr:
5174  return selectGetElementPtr(I);
5175  case Instruction::AtomicCmpXchg:
5176  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5177  }
5178 
5179  // fall-back to target-independent instruction selection.
5180  return selectOperator(I, I->getOpcode());
5181 }
5182 
5183 namespace llvm {
5184 
5186  const TargetLibraryInfo *LibInfo) {
5187  return new AArch64FastISel(FuncInfo, LibInfo);
5188 }
5189 
5190 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVMContext & Context
Atomic ordering constants.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:40
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasCustomCallingConv() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:530
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned Reg
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:758
unsigned less than
Definition: InstrTypes.h:757
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
This class wraps the llvm.memset intrinsic.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
void reserve(size_type N)
Definition: SmallVector.h:369
Value * getLength() const
bool isAnyArgRegReserved(const MachineFunction &MF) const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:562
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:742
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
Class to represent struct types.
Definition: DerivedTypes.h:233
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isUnsigned() const
Definition: InstrTypes.h:908
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:64
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
iterator_range< succ_op_iterator > successors()
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:502
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1233
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SI Pre allocate WWM Registers
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
static Register UseReg(const MachineOperand &MO)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:741
Register getFrameRegister(const MachineFunction &MF) const override
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:749
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:33
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:759
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:302
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
bool isReleaseOrStronger(AtomicOrdering ao)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746