LLVM  8.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  unsigned Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207  bool SetFlags = false, bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213  AArch64_AM::ShiftExtendType ShiftType,
214  uint64_t ShiftImm, bool SetFlags = false,
215  bool WantResult = true);
216  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
219  uint64_t ShiftImm, bool SetFlags = false,
220  bool WantResult = true);
221 
222  // Emit functions.
223  bool emitCompareAndBranch(const BranchInst *BI);
224  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231  MachineMemOperand *MMO = nullptr);
232  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233  MachineMemOperand *MMO = nullptr);
234  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237  bool SetFlags = false, bool WantResult = true,
238  bool IsZExt = false);
239  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241  bool SetFlags = false, bool WantResult = true,
242  bool IsZExt = false);
243  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246  unsigned RHSReg, bool RHSIsKill,
247  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248  bool WantResult = true);
249  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250  const Value *RHS);
251  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252  bool LHSIsKill, uint64_t Imm);
253  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255  uint64_t ShiftImm);
256  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258  unsigned Op1, bool Op1IsKill);
259  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260  unsigned Op1, bool Op1IsKill);
261  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262  unsigned Op1, bool Op1IsKill);
263  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264  unsigned Op1Reg, bool Op1IsKill);
265  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266  uint64_t Imm, bool IsZExt = true);
267  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268  unsigned Op1Reg, bool Op1IsKill);
269  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270  uint64_t Imm, bool IsZExt = true);
271  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272  unsigned Op1Reg, bool Op1IsKill);
273  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274  uint64_t Imm, bool IsZExt = false);
275 
276  unsigned materializeInt(const ConstantInt *CI, MVT VT);
277  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278  unsigned materializeGV(const GlobalValue *GV);
279 
280  // Call handling routines.
281 private:
282  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284  unsigned &NumBytes);
285  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 
287 public:
288  // Backend specific FastISel code.
289  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290  unsigned fastMaterializeConstant(const Constant *C) override;
291  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 
293  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294  const TargetLibraryInfo *LibInfo)
295  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296  Subtarget =
297  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298  Context = &FuncInfo.Fn->getContext();
299  }
300 
301  bool fastSelectInstruction(const Instruction *I) override;
302 
303 #include "AArch64GenFastISel.inc"
304 };
305 
306 } // end anonymous namespace
307 
308 #include "AArch64GenCallingConv.inc"
309 
310 /// Check if the sign-/zero-extend will be a noop.
311 static bool isIntExtFree(const Instruction *I) {
312  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313  "Unexpected integer extend instruction.");
314  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315  "Unexpected value type.");
316  bool IsZExt = isa<ZExtInst>(I);
317 
318  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319  if (LI->hasOneUse())
320  return true;
321 
322  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324  return true;
325 
326  return false;
327 }
328 
329 /// Determine the implicit scale factor that is applied by a memory
330 /// operation for a given value type.
331 static unsigned getImplicitScaleFactor(MVT VT) {
332  switch (VT.SimpleTy) {
333  default:
334  return 0; // invalid
335  case MVT::i1: // fall-through
336  case MVT::i8:
337  return 1;
338  case MVT::i16:
339  return 2;
340  case MVT::i32: // fall-through
341  case MVT::f32:
342  return 4;
343  case MVT::i64: // fall-through
344  case MVT::f64:
345  return 8;
346  }
347 }
348 
349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350  if (CC == CallingConv::WebKit_JS)
351  return CC_AArch64_WebKit_JS;
352  if (CC == CallingConv::GHC)
353  return CC_AArch64_GHC;
354  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 }
356 
357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359  "Alloca should always return a pointer.");
360 
361  // Don't handle dynamic allocas.
362  if (!FuncInfo.StaticAllocaMap.count(AI))
363  return 0;
364 
366  FuncInfo.StaticAllocaMap.find(AI);
367 
368  if (SI != FuncInfo.StaticAllocaMap.end()) {
369  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371  ResultReg)
372  .addFrameIndex(SI->second)
373  .addImm(0)
374  .addImm(0);
375  return ResultReg;
376  }
377 
378  return 0;
379 }
380 
381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382  if (VT > MVT::i64)
383  return 0;
384 
385  if (!CI->isZero())
386  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387 
388  // Create a copy from the zero register to materialize a "0" value.
389  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390  : &AArch64::GPR32RegClass;
391  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392  unsigned ResultReg = createResultReg(RC);
393  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394  ResultReg).addReg(ZeroReg, getKillRegState(true));
395  return ResultReg;
396 }
397 
398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399  // Positive zero (+0.0) has to be materialized with a fmov from the zero
400  // register, because the immediate version of fmov cannot encode zero.
401  if (CFP->isNullValue())
402  return fastMaterializeFloatZero(CFP);
403 
404  if (VT != MVT::f32 && VT != MVT::f64)
405  return 0;
406 
407  const APFloat Val = CFP->getValueAPF();
408  bool Is64Bit = (VT == MVT::f64);
409  // This checks to see if we can use FMOV instructions to materialize
410  // a constant, otherwise we have to materialize via the constant pool.
411  if (TLI.isFPImmLegal(Val, VT)) {
412  int Imm =
413  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414  assert((Imm != -1) && "Cannot encode floating-point constant.");
415  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417  }
418 
419  // For the MachO large code model materialize the FP constant in code.
420  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422  const TargetRegisterClass *RC = Is64Bit ?
423  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424 
425  unsigned TmpReg = createResultReg(RC);
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428 
429  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431  TII.get(TargetOpcode::COPY), ResultReg)
432  .addReg(TmpReg, getKillRegState(true));
433 
434  return ResultReg;
435  }
436 
437  // Materialize via constant pool. MachineConstantPool wants an explicit
438  // alignment.
439  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440  if (Align == 0)
441  Align = DL.getTypeAllocSize(CFP->getType());
442 
443  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447 
448  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451  .addReg(ADRPReg)
453  return ResultReg;
454 }
455 
456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457  // We can't handle thread-local variables quickly yet.
458  if (GV->isThreadLocal())
459  return 0;
460 
461  // MachO still uses GOT for large code-model accesses, but ELF requires
462  // movz/movk sequences, which FastISel doesn't handle yet.
463  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464  return 0;
465 
466  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467 
468  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469  if (!DestEVT.isSimple())
470  return 0;
471 
472  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473  unsigned ResultReg;
474 
475  if (OpFlags & AArch64II::MO_GOT) {
476  // ADRP + LDRX
477  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478  ADRPReg)
479  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
480 
481  ResultReg = createResultReg(&AArch64::GPR64RegClass);
482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483  ResultReg)
484  .addReg(ADRPReg)
485  .addGlobalAddress(GV, 0,
487  } else {
488  // ADRP + ADDX
489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490  ADRPReg)
491  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
492 
493  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495  ResultReg)
496  .addReg(ADRPReg)
497  .addGlobalAddress(GV, 0,
499  .addImm(0);
500  }
501  return ResultReg;
502 }
503 
504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
505  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
506 
507  // Only handle simple types.
508  if (!CEVT.isSimple())
509  return 0;
510  MVT VT = CEVT.getSimpleVT();
511 
512  if (const auto *CI = dyn_cast<ConstantInt>(C))
513  return materializeInt(CI, VT);
514  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
515  return materializeFP(CFP, VT);
516  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
517  return materializeGV(GV);
518 
519  return 0;
520 }
521 
522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
523  assert(CFP->isNullValue() &&
524  "Floating-point constant is not a positive zero.");
525  MVT VT;
526  if (!isTypeLegal(CFP->getType(), VT))
527  return 0;
528 
529  if (VT != MVT::f32 && VT != MVT::f64)
530  return 0;
531 
532  bool Is64Bit = (VT == MVT::f64);
533  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
534  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
535  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
536 }
537 
538 /// Check if the multiply is by a power-of-2 constant.
539 static bool isMulPowOf2(const Value *I) {
540  if (const auto *MI = dyn_cast<MulOperator>(I)) {
541  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
542  if (C->getValue().isPowerOf2())
543  return true;
544  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
545  if (C->getValue().isPowerOf2())
546  return true;
547  }
548  return false;
549 }
550 
551 // Computes the address to get to an object.
552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
553 {
554  const User *U = nullptr;
555  unsigned Opcode = Instruction::UserOp1;
556  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
557  // Don't walk into other basic blocks unless the object is an alloca from
558  // another block, otherwise it may not have a virtual register assigned.
559  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
560  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
561  Opcode = I->getOpcode();
562  U = I;
563  }
564  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
565  Opcode = C->getOpcode();
566  U = C;
567  }
568 
569  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
570  if (Ty->getAddressSpace() > 255)
571  // Fast instruction selection doesn't support the special
572  // address spaces.
573  return false;
574 
575  switch (Opcode) {
576  default:
577  break;
578  case Instruction::BitCast:
579  // Look through bitcasts.
580  return computeAddress(U->getOperand(0), Addr, Ty);
581 
582  case Instruction::IntToPtr:
583  // Look past no-op inttoptrs.
584  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
585  TLI.getPointerTy(DL))
586  return computeAddress(U->getOperand(0), Addr, Ty);
587  break;
588 
589  case Instruction::PtrToInt:
590  // Look past no-op ptrtoints.
591  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
592  return computeAddress(U->getOperand(0), Addr, Ty);
593  break;
594 
595  case Instruction::GetElementPtr: {
596  Address SavedAddr = Addr;
597  uint64_t TmpOffset = Addr.getOffset();
598 
599  // Iterate through the GEP folding the constants into offsets where
600  // we can.
601  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
602  GTI != E; ++GTI) {
603  const Value *Op = GTI.getOperand();
604  if (StructType *STy = GTI.getStructTypeOrNull()) {
605  const StructLayout *SL = DL.getStructLayout(STy);
606  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
607  TmpOffset += SL->getElementOffset(Idx);
608  } else {
609  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
610  while (true) {
611  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
612  // Constant-offset addressing.
613  TmpOffset += CI->getSExtValue() * S;
614  break;
615  }
616  if (canFoldAddIntoGEP(U, Op)) {
617  // A compatible add with a constant operand. Fold the constant.
618  ConstantInt *CI =
619  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
620  TmpOffset += CI->getSExtValue() * S;
621  // Iterate on the other operand.
622  Op = cast<AddOperator>(Op)->getOperand(0);
623  continue;
624  }
625  // Unsupported
626  goto unsupported_gep;
627  }
628  }
629  }
630 
631  // Try to grab the base operand now.
632  Addr.setOffset(TmpOffset);
633  if (computeAddress(U->getOperand(0), Addr, Ty))
634  return true;
635 
636  // We failed, restore everything and try the other options.
637  Addr = SavedAddr;
638 
639  unsupported_gep:
640  break;
641  }
642  case Instruction::Alloca: {
643  const AllocaInst *AI = cast<AllocaInst>(Obj);
645  FuncInfo.StaticAllocaMap.find(AI);
646  if (SI != FuncInfo.StaticAllocaMap.end()) {
647  Addr.setKind(Address::FrameIndexBase);
648  Addr.setFI(SI->second);
649  return true;
650  }
651  break;
652  }
653  case Instruction::Add: {
654  // Adds of constants are common and easy enough.
655  const Value *LHS = U->getOperand(0);
656  const Value *RHS = U->getOperand(1);
657 
658  if (isa<ConstantInt>(LHS))
659  std::swap(LHS, RHS);
660 
661  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
662  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
663  return computeAddress(LHS, Addr, Ty);
664  }
665 
666  Address Backup = Addr;
667  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
668  return true;
669  Addr = Backup;
670 
671  break;
672  }
673  case Instruction::Sub: {
674  // Subs of constants are common and easy enough.
675  const Value *LHS = U->getOperand(0);
676  const Value *RHS = U->getOperand(1);
677 
678  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
679  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
680  return computeAddress(LHS, Addr, Ty);
681  }
682  break;
683  }
684  case Instruction::Shl: {
685  if (Addr.getOffsetReg())
686  break;
687 
688  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
689  if (!CI)
690  break;
691 
692  unsigned Val = CI->getZExtValue();
693  if (Val < 1 || Val > 3)
694  break;
695 
696  uint64_t NumBytes = 0;
697  if (Ty && Ty->isSized()) {
698  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
699  NumBytes = NumBits / 8;
700  if (!isPowerOf2_64(NumBits))
701  NumBytes = 0;
702  }
703 
704  if (NumBytes != (1ULL << Val))
705  break;
706 
707  Addr.setShift(Val);
708  Addr.setExtendType(AArch64_AM::LSL);
709 
710  const Value *Src = U->getOperand(0);
711  if (const auto *I = dyn_cast<Instruction>(Src)) {
712  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
713  // Fold the zext or sext when it won't become a noop.
714  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
715  if (!isIntExtFree(ZE) &&
716  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
717  Addr.setExtendType(AArch64_AM::UXTW);
718  Src = ZE->getOperand(0);
719  }
720  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
721  if (!isIntExtFree(SE) &&
722  SE->getOperand(0)->getType()->isIntegerTy(32)) {
723  Addr.setExtendType(AArch64_AM::SXTW);
724  Src = SE->getOperand(0);
725  }
726  }
727  }
728  }
729 
730  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
731  if (AI->getOpcode() == Instruction::And) {
732  const Value *LHS = AI->getOperand(0);
733  const Value *RHS = AI->getOperand(1);
734 
735  if (const auto *C = dyn_cast<ConstantInt>(LHS))
736  if (C->getValue() == 0xffffffff)
737  std::swap(LHS, RHS);
738 
739  if (const auto *C = dyn_cast<ConstantInt>(RHS))
740  if (C->getValue() == 0xffffffff) {
741  Addr.setExtendType(AArch64_AM::UXTW);
742  unsigned Reg = getRegForValue(LHS);
743  if (!Reg)
744  return false;
745  bool RegIsKill = hasTrivialKill(LHS);
746  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
747  AArch64::sub_32);
748  Addr.setOffsetReg(Reg);
749  return true;
750  }
751  }
752 
753  unsigned Reg = getRegForValue(Src);
754  if (!Reg)
755  return false;
756  Addr.setOffsetReg(Reg);
757  return true;
758  }
759  case Instruction::Mul: {
760  if (Addr.getOffsetReg())
761  break;
762 
763  if (!isMulPowOf2(U))
764  break;
765 
766  const Value *LHS = U->getOperand(0);
767  const Value *RHS = U->getOperand(1);
768 
769  // Canonicalize power-of-2 value to the RHS.
770  if (const auto *C = dyn_cast<ConstantInt>(LHS))
771  if (C->getValue().isPowerOf2())
772  std::swap(LHS, RHS);
773 
774  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
775  const auto *C = cast<ConstantInt>(RHS);
776  unsigned Val = C->getValue().logBase2();
777  if (Val < 1 || Val > 3)
778  break;
779 
780  uint64_t NumBytes = 0;
781  if (Ty && Ty->isSized()) {
782  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
783  NumBytes = NumBits / 8;
784  if (!isPowerOf2_64(NumBits))
785  NumBytes = 0;
786  }
787 
788  if (NumBytes != (1ULL << Val))
789  break;
790 
791  Addr.setShift(Val);
792  Addr.setExtendType(AArch64_AM::LSL);
793 
794  const Value *Src = LHS;
795  if (const auto *I = dyn_cast<Instruction>(Src)) {
796  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
797  // Fold the zext or sext when it won't become a noop.
798  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
799  if (!isIntExtFree(ZE) &&
800  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
801  Addr.setExtendType(AArch64_AM::UXTW);
802  Src = ZE->getOperand(0);
803  }
804  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
805  if (!isIntExtFree(SE) &&
806  SE->getOperand(0)->getType()->isIntegerTy(32)) {
807  Addr.setExtendType(AArch64_AM::SXTW);
808  Src = SE->getOperand(0);
809  }
810  }
811  }
812  }
813 
814  unsigned Reg = getRegForValue(Src);
815  if (!Reg)
816  return false;
817  Addr.setOffsetReg(Reg);
818  return true;
819  }
820  case Instruction::And: {
821  if (Addr.getOffsetReg())
822  break;
823 
824  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
825  break;
826 
827  const Value *LHS = U->getOperand(0);
828  const Value *RHS = U->getOperand(1);
829 
830  if (const auto *C = dyn_cast<ConstantInt>(LHS))
831  if (C->getValue() == 0xffffffff)
832  std::swap(LHS, RHS);
833 
834  if (const auto *C = dyn_cast<ConstantInt>(RHS))
835  if (C->getValue() == 0xffffffff) {
836  Addr.setShift(0);
837  Addr.setExtendType(AArch64_AM::LSL);
838  Addr.setExtendType(AArch64_AM::UXTW);
839 
840  unsigned Reg = getRegForValue(LHS);
841  if (!Reg)
842  return false;
843  bool RegIsKill = hasTrivialKill(LHS);
844  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
845  AArch64::sub_32);
846  Addr.setOffsetReg(Reg);
847  return true;
848  }
849  break;
850  }
851  case Instruction::SExt:
852  case Instruction::ZExt: {
853  if (!Addr.getReg() || Addr.getOffsetReg())
854  break;
855 
856  const Value *Src = nullptr;
857  // Fold the zext or sext when it won't become a noop.
858  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
859  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
860  Addr.setExtendType(AArch64_AM::UXTW);
861  Src = ZE->getOperand(0);
862  }
863  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
864  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
865  Addr.setExtendType(AArch64_AM::SXTW);
866  Src = SE->getOperand(0);
867  }
868  }
869 
870  if (!Src)
871  break;
872 
873  Addr.setShift(0);
874  unsigned Reg = getRegForValue(Src);
875  if (!Reg)
876  return false;
877  Addr.setOffsetReg(Reg);
878  return true;
879  }
880  } // end switch
881 
882  if (Addr.isRegBase() && !Addr.getReg()) {
883  unsigned Reg = getRegForValue(Obj);
884  if (!Reg)
885  return false;
886  Addr.setReg(Reg);
887  return true;
888  }
889 
890  if (!Addr.getOffsetReg()) {
891  unsigned Reg = getRegForValue(Obj);
892  if (!Reg)
893  return false;
894  Addr.setOffsetReg(Reg);
895  return true;
896  }
897 
898  return false;
899 }
900 
901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
902  const User *U = nullptr;
903  unsigned Opcode = Instruction::UserOp1;
904  bool InMBB = true;
905 
906  if (const auto *I = dyn_cast<Instruction>(V)) {
907  Opcode = I->getOpcode();
908  U = I;
909  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
910  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
911  Opcode = C->getOpcode();
912  U = C;
913  }
914 
915  switch (Opcode) {
916  default: break;
917  case Instruction::BitCast:
918  // Look past bitcasts if its operand is in the same BB.
919  if (InMBB)
920  return computeCallAddress(U->getOperand(0), Addr);
921  break;
922  case Instruction::IntToPtr:
923  // Look past no-op inttoptrs if its operand is in the same BB.
924  if (InMBB &&
925  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
926  TLI.getPointerTy(DL))
927  return computeCallAddress(U->getOperand(0), Addr);
928  break;
929  case Instruction::PtrToInt:
930  // Look past no-op ptrtoints if its operand is in the same BB.
931  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
932  return computeCallAddress(U->getOperand(0), Addr);
933  break;
934  }
935 
936  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
937  Addr.setGlobalValue(GV);
938  return true;
939  }
940 
941  // If all else fails, try to materialize the value in a register.
942  if (!Addr.getGlobalValue()) {
943  Addr.setReg(getRegForValue(V));
944  return Addr.getReg() != 0;
945  }
946 
947  return false;
948 }
949 
950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
951  EVT evt = TLI.getValueType(DL, Ty, true);
952 
953  // Only handle simple types.
954  if (evt == MVT::Other || !evt.isSimple())
955  return false;
956  VT = evt.getSimpleVT();
957 
958  // This is a legal type, but it's not something we handle in fast-isel.
959  if (VT == MVT::f128)
960  return false;
961 
962  // Handle all other legal types, i.e. a register that will directly hold this
963  // value.
964  return TLI.isTypeLegal(VT);
965 }
966 
967 /// Determine if the value type is supported by FastISel.
968 ///
969 /// FastISel for AArch64 can handle more value types than are legal. This adds
970 /// simple value type such as i1, i8, and i16.
971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
972  if (Ty->isVectorTy() && !IsVectorAllowed)
973  return false;
974 
975  if (isTypeLegal(Ty, VT))
976  return true;
977 
978  // If this is a type than can be sign or zero-extended to a basic operation
979  // go ahead and accept it now.
980  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
981  return true;
982 
983  return false;
984 }
985 
986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
987  if (!isa<Instruction>(V))
988  return true;
989 
990  const auto *I = cast<Instruction>(V);
991  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
992 }
993 
994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
995  unsigned ScaleFactor = getImplicitScaleFactor(VT);
996  if (!ScaleFactor)
997  return false;
998 
999  bool ImmediateOffsetNeedsLowering = false;
1000  bool RegisterOffsetNeedsLowering = false;
1001  int64_t Offset = Addr.getOffset();
1002  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003  ImmediateOffsetNeedsLowering = true;
1004  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005  !isUInt<12>(Offset / ScaleFactor))
1006  ImmediateOffsetNeedsLowering = true;
1007 
1008  // Cannot encode an offset register and an immediate offset in the same
1009  // instruction. Fold the immediate offset into the load/store instruction and
1010  // emit an additional add to take care of the offset register.
1011  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012  RegisterOffsetNeedsLowering = true;
1013 
1014  // Cannot encode zero register as base.
1015  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016  RegisterOffsetNeedsLowering = true;
1017 
1018  // If this is a stack pointer and the offset needs to be simplified then put
1019  // the alloca address into a register, set the base type back to register and
1020  // continue. This should almost never happen.
1021  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022  {
1023  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025  ResultReg)
1026  .addFrameIndex(Addr.getFI())
1027  .addImm(0)
1028  .addImm(0);
1029  Addr.setKind(Address::RegBase);
1030  Addr.setReg(ResultReg);
1031  }
1032 
1033  if (RegisterOffsetNeedsLowering) {
1034  unsigned ResultReg = 0;
1035  if (Addr.getReg()) {
1036  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037  Addr.getExtendType() == AArch64_AM::UXTW )
1038  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040  /*TODO:IsKill=*/false, Addr.getExtendType(),
1041  Addr.getShift());
1042  else
1043  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046  Addr.getShift());
1047  } else {
1048  if (Addr.getExtendType() == AArch64_AM::UXTW)
1049  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050  /*Op0IsKill=*/false, Addr.getShift(),
1051  /*IsZExt=*/true);
1052  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054  /*Op0IsKill=*/false, Addr.getShift(),
1055  /*IsZExt=*/false);
1056  else
1057  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058  /*Op0IsKill=*/false, Addr.getShift());
1059  }
1060  if (!ResultReg)
1061  return false;
1062 
1063  Addr.setReg(ResultReg);
1064  Addr.setOffsetReg(0);
1065  Addr.setShift(0);
1066  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067  }
1068 
1069  // Since the offset is too large for the load/store instruction get the
1070  // reg+offset into a register.
1071  if (ImmediateOffsetNeedsLowering) {
1072  unsigned ResultReg;
1073  if (Addr.getReg())
1074  // Try to fold the immediate into the add instruction.
1075  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076  else
1077  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078 
1079  if (!ResultReg)
1080  return false;
1081  Addr.setReg(ResultReg);
1082  Addr.setOffset(0);
1083  }
1084  return true;
1085 }
1086 
1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088  const MachineInstrBuilder &MIB,
1090  unsigned ScaleFactor,
1091  MachineMemOperand *MMO) {
1092  int64_t Offset = Addr.getOffset() / ScaleFactor;
1093  // Frame base works a bit differently. Handle it separately.
1094  if (Addr.isFIBase()) {
1095  int FI = Addr.getFI();
1096  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1097  // and alignment should be based on the VT.
1098  MMO = FuncInfo.MF->getMachineMemOperand(
1099  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101  // Now add the rest of the operands.
1102  MIB.addFrameIndex(FI).addImm(Offset);
1103  } else {
1104  assert(Addr.isRegBase() && "Unexpected address kind.");
1105  const MCInstrDesc &II = MIB->getDesc();
1106  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107  Addr.setReg(
1108  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109  Addr.setOffsetReg(
1110  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111  if (Addr.getOffsetReg()) {
1112  assert(Addr.getOffset() == 0 && "Unexpected offset");
1113  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114  Addr.getExtendType() == AArch64_AM::SXTX;
1115  MIB.addReg(Addr.getReg());
1116  MIB.addReg(Addr.getOffsetReg());
1117  MIB.addImm(IsSigned);
1118  MIB.addImm(Addr.getShift() != 0);
1119  } else
1120  MIB.addReg(Addr.getReg()).addImm(Offset);
1121  }
1122 
1123  if (MMO)
1124  MIB.addMemOperand(MMO);
1125 }
1126 
1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128  const Value *RHS, bool SetFlags,
1129  bool WantResult, bool IsZExt) {
1131  bool NeedExtend = false;
1132  switch (RetVT.SimpleTy) {
1133  default:
1134  return 0;
1135  case MVT::i1:
1136  NeedExtend = true;
1137  break;
1138  case MVT::i8:
1139  NeedExtend = true;
1140  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141  break;
1142  case MVT::i16:
1143  NeedExtend = true;
1144  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145  break;
1146  case MVT::i32: // fall-through
1147  case MVT::i64:
1148  break;
1149  }
1150  MVT SrcVT = RetVT;
1151  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152 
1153  // Canonicalize immediates to the RHS first.
1154  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155  std::swap(LHS, RHS);
1156 
1157  // Canonicalize mul by power of 2 to the RHS.
1158  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159  if (isMulPowOf2(LHS))
1160  std::swap(LHS, RHS);
1161 
1162  // Canonicalize shift immediate to the RHS.
1163  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165  if (isa<ConstantInt>(SI->getOperand(1)))
1166  if (SI->getOpcode() == Instruction::Shl ||
1167  SI->getOpcode() == Instruction::LShr ||
1168  SI->getOpcode() == Instruction::AShr )
1169  std::swap(LHS, RHS);
1170 
1171  unsigned LHSReg = getRegForValue(LHS);
1172  if (!LHSReg)
1173  return 0;
1174  bool LHSIsKill = hasTrivialKill(LHS);
1175 
1176  if (NeedExtend)
1177  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178 
1179  unsigned ResultReg = 0;
1180  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182  if (C->isNegative())
1183  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184  SetFlags, WantResult);
1185  else
1186  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187  WantResult);
1188  } else if (const auto *C = dyn_cast<Constant>(RHS))
1189  if (C->isNullValue())
1190  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191  WantResult);
1192 
1193  if (ResultReg)
1194  return ResultReg;
1195 
1196  // Only extend the RHS within the instruction if there is a valid extend type.
1197  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198  isValueAvailable(RHS)) {
1199  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203  if (!RHSReg)
1204  return 0;
1205  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207  RHSIsKill, ExtendType, C->getZExtValue(),
1208  SetFlags, WantResult);
1209  }
1210  unsigned RHSReg = getRegForValue(RHS);
1211  if (!RHSReg)
1212  return 0;
1213  bool RHSIsKill = hasTrivialKill(RHS);
1214  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215  ExtendType, 0, SetFlags, WantResult);
1216  }
1217 
1218  // Check if the mul can be folded into the instruction.
1219  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220  if (isMulPowOf2(RHS)) {
1221  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223 
1224  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225  if (C->getValue().isPowerOf2())
1226  std::swap(MulLHS, MulRHS);
1227 
1228  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230  unsigned RHSReg = getRegForValue(MulLHS);
1231  if (!RHSReg)
1232  return 0;
1233  bool RHSIsKill = hasTrivialKill(MulLHS);
1234  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236  WantResult);
1237  if (ResultReg)
1238  return ResultReg;
1239  }
1240  }
1241 
1242  // Check if the shift can be folded into the instruction.
1243  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1247  switch (SI->getOpcode()) {
1248  default: break;
1249  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1250  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252  }
1253  uint64_t ShiftVal = C->getZExtValue();
1254  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256  if (!RHSReg)
1257  return 0;
1258  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261  WantResult);
1262  if (ResultReg)
1263  return ResultReg;
1264  }
1265  }
1266  }
1267  }
1268 
1269  unsigned RHSReg = getRegForValue(RHS);
1270  if (!RHSReg)
1271  return 0;
1272  bool RHSIsKill = hasTrivialKill(RHS);
1273 
1274  if (NeedExtend)
1275  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278  SetFlags, WantResult);
1279 }
1280 
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282  bool LHSIsKill, unsigned RHSReg,
1283  bool RHSIsKill, bool SetFlags,
1284  bool WantResult) {
1285  assert(LHSReg && RHSReg && "Invalid register number.");
1286 
1287  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289  return 0;
1290 
1291  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292  return 0;
1293 
1294  static const unsigned OpcTable[2][2][2] = {
1295  { { AArch64::SUBWrr, AArch64::SUBXrr },
1296  { AArch64::ADDWrr, AArch64::ADDXrr } },
1297  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1299  };
1300  bool Is64Bit = RetVT == MVT::i64;
1301  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302  const TargetRegisterClass *RC =
1303  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304  unsigned ResultReg;
1305  if (WantResult)
1306  ResultReg = createResultReg(RC);
1307  else
1308  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309 
1310  const MCInstrDesc &II = TII.get(Opc);
1311  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314  .addReg(LHSReg, getKillRegState(LHSIsKill))
1315  .addReg(RHSReg, getKillRegState(RHSIsKill));
1316  return ResultReg;
1317 }
1318 
1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320  bool LHSIsKill, uint64_t Imm,
1321  bool SetFlags, bool WantResult) {
1322  assert(LHSReg && "Invalid register number.");
1323 
1324  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325  return 0;
1326 
1327  unsigned ShiftImm;
1328  if (isUInt<12>(Imm))
1329  ShiftImm = 0;
1330  else if ((Imm & 0xfff000) == Imm) {
1331  ShiftImm = 12;
1332  Imm >>= 12;
1333  } else
1334  return 0;
1335 
1336  static const unsigned OpcTable[2][2][2] = {
1337  { { AArch64::SUBWri, AArch64::SUBXri },
1338  { AArch64::ADDWri, AArch64::ADDXri } },
1339  { { AArch64::SUBSWri, AArch64::SUBSXri },
1340  { AArch64::ADDSWri, AArch64::ADDSXri } }
1341  };
1342  bool Is64Bit = RetVT == MVT::i64;
1343  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344  const TargetRegisterClass *RC;
1345  if (SetFlags)
1346  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347  else
1348  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349  unsigned ResultReg;
1350  if (WantResult)
1351  ResultReg = createResultReg(RC);
1352  else
1353  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354 
1355  const MCInstrDesc &II = TII.get(Opc);
1356  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358  .addReg(LHSReg, getKillRegState(LHSIsKill))
1359  .addImm(Imm)
1360  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361  return ResultReg;
1362 }
1363 
1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365  bool LHSIsKill, unsigned RHSReg,
1366  bool RHSIsKill,
1367  AArch64_AM::ShiftExtendType ShiftType,
1368  uint64_t ShiftImm, bool SetFlags,
1369  bool WantResult) {
1370  assert(LHSReg && RHSReg && "Invalid register number.");
1371  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373 
1374  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375  return 0;
1376 
1377  // Don't deal with undefined shifts.
1378  if (ShiftImm >= RetVT.getSizeInBits())
1379  return 0;
1380 
1381  static const unsigned OpcTable[2][2][2] = {
1382  { { AArch64::SUBWrs, AArch64::SUBXrs },
1383  { AArch64::ADDWrs, AArch64::ADDXrs } },
1384  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1386  };
1387  bool Is64Bit = RetVT == MVT::i64;
1388  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389  const TargetRegisterClass *RC =
1390  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391  unsigned ResultReg;
1392  if (WantResult)
1393  ResultReg = createResultReg(RC);
1394  else
1395  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396 
1397  const MCInstrDesc &II = TII.get(Opc);
1398  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401  .addReg(LHSReg, getKillRegState(LHSIsKill))
1402  .addReg(RHSReg, getKillRegState(RHSIsKill))
1403  .addImm(getShifterImm(ShiftType, ShiftImm));
1404  return ResultReg;
1405 }
1406 
1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408  bool LHSIsKill, unsigned RHSReg,
1409  bool RHSIsKill,
1411  uint64_t ShiftImm, bool SetFlags,
1412  bool WantResult) {
1413  assert(LHSReg && RHSReg && "Invalid register number.");
1414  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416 
1417  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418  return 0;
1419 
1420  if (ShiftImm >= 4)
1421  return 0;
1422 
1423  static const unsigned OpcTable[2][2][2] = {
1424  { { AArch64::SUBWrx, AArch64::SUBXrx },
1425  { AArch64::ADDWrx, AArch64::ADDXrx } },
1426  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1428  };
1429  bool Is64Bit = RetVT == MVT::i64;
1430  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431  const TargetRegisterClass *RC = nullptr;
1432  if (SetFlags)
1433  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434  else
1435  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436  unsigned ResultReg;
1437  if (WantResult)
1438  ResultReg = createResultReg(RC);
1439  else
1440  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441 
1442  const MCInstrDesc &II = TII.get(Opc);
1443  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446  .addReg(LHSReg, getKillRegState(LHSIsKill))
1447  .addReg(RHSReg, getKillRegState(RHSIsKill))
1448  .addImm(getArithExtendImm(ExtType, ShiftImm));
1449  return ResultReg;
1450 }
1451 
1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453  Type *Ty = LHS->getType();
1454  EVT EVT = TLI.getValueType(DL, Ty, true);
1455  if (!EVT.isSimple())
1456  return false;
1457  MVT VT = EVT.getSimpleVT();
1458 
1459  switch (VT.SimpleTy) {
1460  default:
1461  return false;
1462  case MVT::i1:
1463  case MVT::i8:
1464  case MVT::i16:
1465  case MVT::i32:
1466  case MVT::i64:
1467  return emitICmp(VT, LHS, RHS, IsZExt);
1468  case MVT::f32:
1469  case MVT::f64:
1470  return emitFCmp(VT, LHS, RHS);
1471  }
1472 }
1473 
1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475  bool IsZExt) {
1476  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477  IsZExt) != 0;
1478 }
1479 
1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481  uint64_t Imm) {
1482  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485 
1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488  return false;
1489 
1490  // Check to see if the 2nd operand is a constant that we can encode directly
1491  // in the compare.
1492  bool UseImm = false;
1493  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494  if (CFP->isZero() && !CFP->isNegative())
1495  UseImm = true;
1496 
1497  unsigned LHSReg = getRegForValue(LHS);
1498  if (!LHSReg)
1499  return false;
1500  bool LHSIsKill = hasTrivialKill(LHS);
1501 
1502  if (UseImm) {
1503  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505  .addReg(LHSReg, getKillRegState(LHSIsKill));
1506  return true;
1507  }
1508 
1509  unsigned RHSReg = getRegForValue(RHS);
1510  if (!RHSReg)
1511  return false;
1512  bool RHSIsKill = hasTrivialKill(RHS);
1513 
1514  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516  .addReg(LHSReg, getKillRegState(LHSIsKill))
1517  .addReg(RHSReg, getKillRegState(RHSIsKill));
1518  return true;
1519 }
1520 
1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522  bool SetFlags, bool WantResult, bool IsZExt) {
1523  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524  IsZExt);
1525 }
1526 
1527 /// This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533  int64_t Imm) {
1534  unsigned ResultReg;
1535  if (Imm < 0)
1536  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537  else
1538  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539 
1540  if (ResultReg)
1541  return ResultReg;
1542 
1543  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544  if (!CReg)
1545  return 0;
1546 
1547  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548  return ResultReg;
1549 }
1550 
1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552  bool SetFlags, bool WantResult, bool IsZExt) {
1553  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554  IsZExt);
1555 }
1556 
1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558  bool LHSIsKill, unsigned RHSReg,
1559  bool RHSIsKill, bool WantResult) {
1560  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561  RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563 
1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565  bool LHSIsKill, unsigned RHSReg,
1566  bool RHSIsKill,
1567  AArch64_AM::ShiftExtendType ShiftType,
1568  uint64_t ShiftImm, bool WantResult) {
1569  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571  WantResult);
1572 }
1573 
1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575  const Value *LHS, const Value *RHS) {
1576  // Canonicalize immediates to the RHS first.
1577  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578  std::swap(LHS, RHS);
1579 
1580  // Canonicalize mul by power-of-2 to the RHS.
1581  if (LHS->hasOneUse() && isValueAvailable(LHS))
1582  if (isMulPowOf2(LHS))
1583  std::swap(LHS, RHS);
1584 
1585  // Canonicalize shift immediate to the RHS.
1586  if (LHS->hasOneUse() && isValueAvailable(LHS))
1587  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588  if (isa<ConstantInt>(SI->getOperand(1)))
1589  std::swap(LHS, RHS);
1590 
1591  unsigned LHSReg = getRegForValue(LHS);
1592  if (!LHSReg)
1593  return 0;
1594  bool LHSIsKill = hasTrivialKill(LHS);
1595 
1596  unsigned ResultReg = 0;
1597  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598  uint64_t Imm = C->getZExtValue();
1599  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600  }
1601  if (ResultReg)
1602  return ResultReg;
1603 
1604  // Check if the mul can be folded into the instruction.
1605  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606  if (isMulPowOf2(RHS)) {
1607  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609 
1610  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611  if (C->getValue().isPowerOf2())
1612  std::swap(MulLHS, MulRHS);
1613 
1614  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616 
1617  unsigned RHSReg = getRegForValue(MulLHS);
1618  if (!RHSReg)
1619  return 0;
1620  bool RHSIsKill = hasTrivialKill(MulLHS);
1621  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622  RHSIsKill, ShiftVal);
1623  if (ResultReg)
1624  return ResultReg;
1625  }
1626  }
1627 
1628  // Check if the shift can be folded into the instruction.
1629  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632  uint64_t ShiftVal = C->getZExtValue();
1633  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634  if (!RHSReg)
1635  return 0;
1636  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638  RHSIsKill, ShiftVal);
1639  if (ResultReg)
1640  return ResultReg;
1641  }
1642  }
1643 
1644  unsigned RHSReg = getRegForValue(RHS);
1645  if (!RHSReg)
1646  return 0;
1647  bool RHSIsKill = hasTrivialKill(RHS);
1648 
1649  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654  }
1655  return ResultReg;
1656 }
1657 
1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659  unsigned LHSReg, bool LHSIsKill,
1660  uint64_t Imm) {
1661  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662  "ISD nodes are not consecutive!");
1663  static const unsigned OpcTable[3][2] = {
1664  { AArch64::ANDWri, AArch64::ANDXri },
1665  { AArch64::ORRWri, AArch64::ORRXri },
1666  { AArch64::EORWri, AArch64::EORXri }
1667  };
1668  const TargetRegisterClass *RC;
1669  unsigned Opc;
1670  unsigned RegSize;
1671  switch (RetVT.SimpleTy) {
1672  default:
1673  return 0;
1674  case MVT::i1:
1675  case MVT::i8:
1676  case MVT::i16:
1677  case MVT::i32: {
1678  unsigned Idx = ISDOpc - ISD::AND;
1679  Opc = OpcTable[Idx][0];
1680  RC = &AArch64::GPR32spRegClass;
1681  RegSize = 32;
1682  break;
1683  }
1684  case MVT::i64:
1685  Opc = OpcTable[ISDOpc - ISD::AND][1];
1686  RC = &AArch64::GPR64spRegClass;
1687  RegSize = 64;
1688  break;
1689  }
1690 
1691  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692  return 0;
1693 
1694  unsigned ResultReg =
1695  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700  }
1701  return ResultReg;
1702 }
1703 
1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705  unsigned LHSReg, bool LHSIsKill,
1706  unsigned RHSReg, bool RHSIsKill,
1707  uint64_t ShiftImm) {
1708  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709  "ISD nodes are not consecutive!");
1710  static const unsigned OpcTable[3][2] = {
1711  { AArch64::ANDWrs, AArch64::ANDXrs },
1712  { AArch64::ORRWrs, AArch64::ORRXrs },
1713  { AArch64::EORWrs, AArch64::EORXrs }
1714  };
1715 
1716  // Don't deal with undefined shifts.
1717  if (ShiftImm >= RetVT.getSizeInBits())
1718  return 0;
1719 
1720  const TargetRegisterClass *RC;
1721  unsigned Opc;
1722  switch (RetVT.SimpleTy) {
1723  default:
1724  return 0;
1725  case MVT::i1:
1726  case MVT::i8:
1727  case MVT::i16:
1728  case MVT::i32:
1729  Opc = OpcTable[ISDOpc - ISD::AND][0];
1730  RC = &AArch64::GPR32RegClass;
1731  break;
1732  case MVT::i64:
1733  Opc = OpcTable[ISDOpc - ISD::AND][1];
1734  RC = &AArch64::GPR64RegClass;
1735  break;
1736  }
1737  unsigned ResultReg =
1738  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1740  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743  }
1744  return ResultReg;
1745 }
1746 
1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748  uint64_t Imm) {
1749  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751 
1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753  bool WantZExt, MachineMemOperand *MMO) {
1754  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755  return 0;
1756 
1757  // Simplify this down to something we can handle.
1758  if (!simplifyAddress(Addr, VT))
1759  return 0;
1760 
1761  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762  if (!ScaleFactor)
1763  llvm_unreachable("Unexpected value type.");
1764 
1765  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767  bool UseScaled = true;
1768  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769  UseScaled = false;
1770  ScaleFactor = 1;
1771  }
1772 
1773  static const unsigned GPOpcTable[2][8][4] = {
1774  // Sign-extend.
1775  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776  AArch64::LDURXi },
1777  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778  AArch64::LDURXi },
1779  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780  AArch64::LDRXui },
1781  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782  AArch64::LDRXui },
1783  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784  AArch64::LDRXroX },
1785  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786  AArch64::LDRXroX },
1787  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788  AArch64::LDRXroW },
1789  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790  AArch64::LDRXroW }
1791  },
1792  // Zero-extend.
1793  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794  AArch64::LDURXi },
1795  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796  AArch64::LDURXi },
1797  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798  AArch64::LDRXui },
1799  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800  AArch64::LDRXui },
1801  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802  AArch64::LDRXroX },
1803  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804  AArch64::LDRXroX },
1805  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806  AArch64::LDRXroW },
1807  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808  AArch64::LDRXroW }
1809  }
1810  };
1811 
1812  static const unsigned FPOpcTable[4][2] = {
1813  { AArch64::LDURSi, AArch64::LDURDi },
1814  { AArch64::LDRSui, AArch64::LDRDui },
1815  { AArch64::LDRSroX, AArch64::LDRDroX },
1816  { AArch64::LDRSroW, AArch64::LDRDroW }
1817  };
1818 
1819  unsigned Opc;
1820  const TargetRegisterClass *RC;
1821  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822  Addr.getOffsetReg();
1823  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825  Addr.getExtendType() == AArch64_AM::SXTW)
1826  Idx++;
1827 
1828  bool IsRet64Bit = RetVT == MVT::i64;
1829  switch (VT.SimpleTy) {
1830  default:
1831  llvm_unreachable("Unexpected value type.");
1832  case MVT::i1: // Intentional fall-through.
1833  case MVT::i8:
1834  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835  RC = (IsRet64Bit && !WantZExt) ?
1836  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837  break;
1838  case MVT::i16:
1839  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840  RC = (IsRet64Bit && !WantZExt) ?
1841  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842  break;
1843  case MVT::i32:
1844  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845  RC = (IsRet64Bit && !WantZExt) ?
1846  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847  break;
1848  case MVT::i64:
1849  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850  RC = &AArch64::GPR64RegClass;
1851  break;
1852  case MVT::f32:
1853  Opc = FPOpcTable[Idx][0];
1854  RC = &AArch64::FPR32RegClass;
1855  break;
1856  case MVT::f64:
1857  Opc = FPOpcTable[Idx][1];
1858  RC = &AArch64::FPR64RegClass;
1859  break;
1860  }
1861 
1862  // Create the base instruction, then add the operands.
1863  unsigned ResultReg = createResultReg(RC);
1864  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865  TII.get(Opc), ResultReg);
1866  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867 
1868  // Loading an i1 requires special handling.
1869  if (VT == MVT::i1) {
1870  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871  assert(ANDReg && "Unexpected AND instruction emission failure.");
1872  ResultReg = ANDReg;
1873  }
1874 
1875  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876  // the 32bit reg to a 64bit reg.
1877  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881  .addImm(0)
1882  .addReg(ResultReg, getKillRegState(true))
1883  .addImm(AArch64::sub_32);
1884  ResultReg = Reg64;
1885  }
1886  return ResultReg;
1887 }
1888 
1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890  MVT VT;
1891  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892  return false;
1893 
1894  if (VT.isVector())
1895  return selectOperator(I, I->getOpcode());
1896 
1897  unsigned ResultReg;
1898  switch (I->getOpcode()) {
1899  default:
1900  llvm_unreachable("Unexpected instruction.");
1901  case Instruction::Add:
1902  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903  break;
1904  case Instruction::Sub:
1905  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906  break;
1907  }
1908  if (!ResultReg)
1909  return false;
1910 
1911  updateValueMap(I, ResultReg);
1912  return true;
1913 }
1914 
1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916  MVT VT;
1917  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918  return false;
1919 
1920  if (VT.isVector())
1921  return selectOperator(I, I->getOpcode());
1922 
1923  unsigned ResultReg;
1924  switch (I->getOpcode()) {
1925  default:
1926  llvm_unreachable("Unexpected instruction.");
1927  case Instruction::And:
1928  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929  break;
1930  case Instruction::Or:
1931  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932  break;
1933  case Instruction::Xor:
1934  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935  break;
1936  }
1937  if (!ResultReg)
1938  return false;
1939 
1940  updateValueMap(I, ResultReg);
1941  return true;
1942 }
1943 
1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945  MVT VT;
1946  // Verify we have a legal type before going any further. Currently, we handle
1947  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950  cast<LoadInst>(I)->isAtomic())
1951  return false;
1952 
1953  const Value *SV = I->getOperand(0);
1954  if (TLI.supportSwiftError()) {
1955  // Swifterror values can come from either a function parameter with
1956  // swifterror attribute or an alloca with swifterror attribute.
1957  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958  if (Arg->hasSwiftErrorAttr())
1959  return false;
1960  }
1961 
1962  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963  if (Alloca->isSwiftError())
1964  return false;
1965  }
1966  }
1967 
1968  // See if we can handle this address.
1969  Address Addr;
1970  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971  return false;
1972 
1973  // Fold the following sign-/zero-extend into the load instruction.
1974  bool WantZExt = true;
1975  MVT RetVT = VT;
1976  const Value *IntExtVal = nullptr;
1977  if (I->hasOneUse()) {
1978  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979  if (isTypeSupported(ZE->getType(), RetVT))
1980  IntExtVal = ZE;
1981  else
1982  RetVT = VT;
1983  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984  if (isTypeSupported(SE->getType(), RetVT))
1985  IntExtVal = SE;
1986  else
1987  RetVT = VT;
1988  WantZExt = false;
1989  }
1990  }
1991 
1992  unsigned ResultReg =
1993  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994  if (!ResultReg)
1995  return false;
1996 
1997  // There are a few different cases we have to handle, because the load or the
1998  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999  // SelectionDAG. There is also an ordering issue when both instructions are in
2000  // different basic blocks.
2001  // 1.) The load instruction is selected by FastISel, but the integer extend
2002  // not. This usually happens when the integer extend is in a different
2003  // basic block and SelectionDAG took over for that basic block.
2004  // 2.) The load instruction is selected before the integer extend. This only
2005  // happens when the integer extend is in a different basic block.
2006  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007  // by FastISel. This happens if there are instructions between the load
2008  // and the integer extend that couldn't be selected by FastISel.
2009  if (IntExtVal) {
2010  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012  // it when it selects the integer extend.
2013  unsigned Reg = lookUpRegForValue(IntExtVal);
2014  auto *MI = MRI.getUniqueVRegDef(Reg);
2015  if (!MI) {
2016  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017  if (WantZExt) {
2018  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019  std::prev(FuncInfo.InsertPt)->eraseFromParent();
2020  ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2021  } else
2022  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023  /*IsKill=*/true,
2024  AArch64::sub_32);
2025  }
2026  updateValueMap(I, ResultReg);
2027  return true;
2028  }
2029 
2030  // The integer extend has already been emitted - delete all the instructions
2031  // that have been emitted by the integer extend lowering code and use the
2032  // result from the load instruction directly.
2033  while (MI) {
2034  Reg = 0;
2035  for (auto &Opnd : MI->uses()) {
2036  if (Opnd.isReg()) {
2037  Reg = Opnd.getReg();
2038  break;
2039  }
2040  }
2041  MI->eraseFromParent();
2042  MI = nullptr;
2043  if (Reg)
2044  MI = MRI.getUniqueVRegDef(Reg);
2045  }
2046  updateValueMap(IntExtVal, ResultReg);
2047  return true;
2048  }
2049 
2050  updateValueMap(I, ResultReg);
2051  return true;
2052 }
2053 
2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055  unsigned AddrReg,
2056  MachineMemOperand *MMO) {
2057  unsigned Opc;
2058  switch (VT.SimpleTy) {
2059  default: return false;
2060  case MVT::i8: Opc = AArch64::STLRB; break;
2061  case MVT::i16: Opc = AArch64::STLRH; break;
2062  case MVT::i32: Opc = AArch64::STLRW; break;
2063  case MVT::i64: Opc = AArch64::STLRX; break;
2064  }
2065 
2066  const MCInstrDesc &II = TII.get(Opc);
2067  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2070  .addReg(SrcReg)
2071  .addReg(AddrReg)
2072  .addMemOperand(MMO);
2073  return true;
2074 }
2075 
2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077  MachineMemOperand *MMO) {
2078  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079  return false;
2080 
2081  // Simplify this down to something we can handle.
2082  if (!simplifyAddress(Addr, VT))
2083  return false;
2084 
2085  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086  if (!ScaleFactor)
2087  llvm_unreachable("Unexpected value type.");
2088 
2089  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091  bool UseScaled = true;
2092  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093  UseScaled = false;
2094  ScaleFactor = 1;
2095  }
2096 
2097  static const unsigned OpcTable[4][6] = {
2098  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099  AArch64::STURSi, AArch64::STURDi },
2100  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101  AArch64::STRSui, AArch64::STRDui },
2102  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103  AArch64::STRSroX, AArch64::STRDroX },
2104  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105  AArch64::STRSroW, AArch64::STRDroW }
2106  };
2107 
2108  unsigned Opc;
2109  bool VTIsi1 = false;
2110  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111  Addr.getOffsetReg();
2112  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114  Addr.getExtendType() == AArch64_AM::SXTW)
2115  Idx++;
2116 
2117  switch (VT.SimpleTy) {
2118  default: llvm_unreachable("Unexpected value type.");
2119  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2120  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126  }
2127 
2128  // Storing an i1 requires special handling.
2129  if (VTIsi1 && SrcReg != AArch64::WZR) {
2130  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2131  assert(ANDReg && "Unexpected AND instruction emission failure.");
2132  SrcReg = ANDReg;
2133  }
2134  // Create the base instruction, then add the operands.
2135  const MCInstrDesc &II = TII.get(Opc);
2136  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2137  MachineInstrBuilder MIB =
2138  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2139  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140 
2141  return true;
2142 }
2143 
2144 bool AArch64FastISel::selectStore(const Instruction *I) {
2145  MVT VT;
2146  const Value *Op0 = I->getOperand(0);
2147  // Verify we have a legal type before going any further. Currently, we handle
2148  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151  return false;
2152 
2153  const Value *PtrV = I->getOperand(1);
2154  if (TLI.supportSwiftError()) {
2155  // Swifterror values can come from either a function parameter with
2156  // swifterror attribute or an alloca with swifterror attribute.
2157  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158  if (Arg->hasSwiftErrorAttr())
2159  return false;
2160  }
2161 
2162  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163  if (Alloca->isSwiftError())
2164  return false;
2165  }
2166  }
2167 
2168  // Get the value to be stored into a register. Use the zero register directly
2169  // when possible to avoid an unnecessary copy and a wasted register.
2170  unsigned SrcReg = 0;
2171  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172  if (CI->isZero())
2173  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175  if (CF->isZero() && !CF->isNegative()) {
2176  VT = MVT::getIntegerVT(VT.getSizeInBits());
2177  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178  }
2179  }
2180 
2181  if (!SrcReg)
2182  SrcReg = getRegForValue(Op0);
2183 
2184  if (!SrcReg)
2185  return false;
2186 
2187  auto *SI = cast<StoreInst>(I);
2188 
2189  // Try to emit a STLR for seq_cst/release.
2190  if (SI->isAtomic()) {
2191  AtomicOrdering Ord = SI->getOrdering();
2192  // The non-atomic instructions are sufficient for relaxed stores.
2193  if (isReleaseOrStronger(Ord)) {
2194  // The STLR addressing mode only supports a base reg; pass that directly.
2195  unsigned AddrReg = getRegForValue(PtrV);
2196  return emitStoreRelease(VT, SrcReg, AddrReg,
2197  createMachineMemOperandFor(I));
2198  }
2199  }
2200 
2201  // See if we can handle this address.
2202  Address Addr;
2203  if (!computeAddress(PtrV, Addr, Op0->getType()))
2204  return false;
2205 
2206  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207  return false;
2208  return true;
2209 }
2210 
2212  switch (Pred) {
2213  case CmpInst::FCMP_ONE:
2214  case CmpInst::FCMP_UEQ:
2215  default:
2216  // AL is our "false" for now. The other two need more compares.
2217  return AArch64CC::AL;
2218  case CmpInst::ICMP_EQ:
2219  case CmpInst::FCMP_OEQ:
2220  return AArch64CC::EQ;
2221  case CmpInst::ICMP_SGT:
2222  case CmpInst::FCMP_OGT:
2223  return AArch64CC::GT;
2224  case CmpInst::ICMP_SGE:
2225  case CmpInst::FCMP_OGE:
2226  return AArch64CC::GE;
2227  case CmpInst::ICMP_UGT:
2228  case CmpInst::FCMP_UGT:
2229  return AArch64CC::HI;
2230  case CmpInst::FCMP_OLT:
2231  return AArch64CC::MI;
2232  case CmpInst::ICMP_ULE:
2233  case CmpInst::FCMP_OLE:
2234  return AArch64CC::LS;
2235  case CmpInst::FCMP_ORD:
2236  return AArch64CC::VC;
2237  case CmpInst::FCMP_UNO:
2238  return AArch64CC::VS;
2239  case CmpInst::FCMP_UGE:
2240  return AArch64CC::PL;
2241  case CmpInst::ICMP_SLT:
2242  case CmpInst::FCMP_ULT:
2243  return AArch64CC::LT;
2244  case CmpInst::ICMP_SLE:
2245  case CmpInst::FCMP_ULE:
2246  return AArch64CC::LE;
2247  case CmpInst::FCMP_UNE:
2248  case CmpInst::ICMP_NE:
2249  return AArch64CC::NE;
2250  case CmpInst::ICMP_UGE:
2251  return AArch64CC::HS;
2252  case CmpInst::ICMP_ULT:
2253  return AArch64CC::LO;
2254  }
2255 }
2256 
2257 /// Try to emit a combined compare-and-branch instruction.
2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2260  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2261  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2262 
2263  const Value *LHS = CI->getOperand(0);
2264  const Value *RHS = CI->getOperand(1);
2265 
2266  MVT VT;
2267  if (!isTypeSupported(LHS->getType(), VT))
2268  return false;
2269 
2270  unsigned BW = VT.getSizeInBits();
2271  if (BW > 64)
2272  return false;
2273 
2274  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2275  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2276 
2277  // Try to take advantage of fallthrough opportunities.
2278  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2279  std::swap(TBB, FBB);
2280  Predicate = CmpInst::getInversePredicate(Predicate);
2281  }
2282 
2283  int TestBit = -1;
2284  bool IsCmpNE;
2285  switch (Predicate) {
2286  default:
2287  return false;
2288  case CmpInst::ICMP_EQ:
2289  case CmpInst::ICMP_NE:
2290  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2291  std::swap(LHS, RHS);
2292 
2293  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2294  return false;
2295 
2296  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2297  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2298  const Value *AndLHS = AI->getOperand(0);
2299  const Value *AndRHS = AI->getOperand(1);
2300 
2301  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2302  if (C->getValue().isPowerOf2())
2303  std::swap(AndLHS, AndRHS);
2304 
2305  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2306  if (C->getValue().isPowerOf2()) {
2307  TestBit = C->getValue().logBase2();
2308  LHS = AndLHS;
2309  }
2310  }
2311 
2312  if (VT == MVT::i1)
2313  TestBit = 0;
2314 
2315  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2316  break;
2317  case CmpInst::ICMP_SLT:
2318  case CmpInst::ICMP_SGE:
2319  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2320  return false;
2321 
2322  TestBit = BW - 1;
2323  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2324  break;
2325  case CmpInst::ICMP_SGT:
2326  case CmpInst::ICMP_SLE:
2327  if (!isa<ConstantInt>(RHS))
2328  return false;
2329 
2330  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2331  return false;
2332 
2333  TestBit = BW - 1;
2334  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2335  break;
2336  } // end switch
2337 
2338  static const unsigned OpcTable[2][2][2] = {
2339  { {AArch64::CBZW, AArch64::CBZX },
2340  {AArch64::CBNZW, AArch64::CBNZX} },
2341  { {AArch64::TBZW, AArch64::TBZX },
2342  {AArch64::TBNZW, AArch64::TBNZX} }
2343  };
2344 
2345  bool IsBitTest = TestBit != -1;
2346  bool Is64Bit = BW == 64;
2347  if (TestBit < 32 && TestBit >= 0)
2348  Is64Bit = false;
2349 
2350  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2351  const MCInstrDesc &II = TII.get(Opc);
2352 
2353  unsigned SrcReg = getRegForValue(LHS);
2354  if (!SrcReg)
2355  return false;
2356  bool SrcIsKill = hasTrivialKill(LHS);
2357 
2358  if (BW == 64 && !Is64Bit)
2359  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2360  AArch64::sub_32);
2361 
2362  if ((BW < 32) && !IsBitTest)
2363  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2364 
2365  // Emit the combined compare and branch instruction.
2366  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2367  MachineInstrBuilder MIB =
2368  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2369  .addReg(SrcReg, getKillRegState(SrcIsKill));
2370  if (IsBitTest)
2371  MIB.addImm(TestBit);
2372  MIB.addMBB(TBB);
2373 
2374  finishCondBranch(BI->getParent(), TBB, FBB);
2375  return true;
2376 }
2377 
2378 bool AArch64FastISel::selectBranch(const Instruction *I) {
2379  const BranchInst *BI = cast<BranchInst>(I);
2380  if (BI->isUnconditional()) {
2381  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2382  fastEmitBranch(MSucc, BI->getDebugLoc());
2383  return true;
2384  }
2385 
2386  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2388 
2389  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2390  if (CI->hasOneUse() && isValueAvailable(CI)) {
2391  // Try to optimize or fold the cmp.
2392  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2393  switch (Predicate) {
2394  default:
2395  break;
2396  case CmpInst::FCMP_FALSE:
2397  fastEmitBranch(FBB, DbgLoc);
2398  return true;
2399  case CmpInst::FCMP_TRUE:
2400  fastEmitBranch(TBB, DbgLoc);
2401  return true;
2402  }
2403 
2404  // Try to emit a combined compare-and-branch first.
2405  if (emitCompareAndBranch(BI))
2406  return true;
2407 
2408  // Try to take advantage of fallthrough opportunities.
2409  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2410  std::swap(TBB, FBB);
2411  Predicate = CmpInst::getInversePredicate(Predicate);
2412  }
2413 
2414  // Emit the cmp.
2415  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2416  return false;
2417 
2418  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2419  // instruction.
2420  AArch64CC::CondCode CC = getCompareCC(Predicate);
2422  switch (Predicate) {
2423  default:
2424  break;
2425  case CmpInst::FCMP_UEQ:
2426  ExtraCC = AArch64CC::EQ;
2427  CC = AArch64CC::VS;
2428  break;
2429  case CmpInst::FCMP_ONE:
2430  ExtraCC = AArch64CC::MI;
2431  CC = AArch64CC::GT;
2432  break;
2433  }
2434  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2435 
2436  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2437  if (ExtraCC != AArch64CC::AL) {
2438  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2439  .addImm(ExtraCC)
2440  .addMBB(TBB);
2441  }
2442 
2443  // Emit the branch.
2444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2445  .addImm(CC)
2446  .addMBB(TBB);
2447 
2448  finishCondBranch(BI->getParent(), TBB, FBB);
2449  return true;
2450  }
2451  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2452  uint64_t Imm = CI->getZExtValue();
2453  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2455  .addMBB(Target);
2456 
2457  // Obtain the branch probability and add the target to the successor list.
2458  if (FuncInfo.BPI) {
2459  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2460  BI->getParent(), Target->getBasicBlock());
2461  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2462  } else
2463  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2464  return true;
2465  } else {
2467  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2468  // Fake request the condition, otherwise the intrinsic might be completely
2469  // optimized away.
2470  unsigned CondReg = getRegForValue(BI->getCondition());
2471  if (!CondReg)
2472  return false;
2473 
2474  // Emit the branch.
2475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2476  .addImm(CC)
2477  .addMBB(TBB);
2478 
2479  finishCondBranch(BI->getParent(), TBB, FBB);
2480  return true;
2481  }
2482  }
2483 
2484  unsigned CondReg = getRegForValue(BI->getCondition());
2485  if (CondReg == 0)
2486  return false;
2487  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2488 
2489  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490  unsigned Opcode = AArch64::TBNZW;
2491  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492  std::swap(TBB, FBB);
2493  Opcode = AArch64::TBZW;
2494  }
2495 
2496  const MCInstrDesc &II = TII.get(Opcode);
2497  unsigned ConstrainedCondReg
2498  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2500  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2501  .addImm(0)
2502  .addMBB(TBB);
2503 
2504  finishCondBranch(BI->getParent(), TBB, FBB);
2505  return true;
2506 }
2507 
2508 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2511  if (AddrReg == 0)
2512  return false;
2513 
2514  // Emit the indirect branch.
2515  const MCInstrDesc &II = TII.get(AArch64::BR);
2516  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2517  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2518 
2519  // Make sure the CFG is up-to-date.
2520  for (auto *Succ : BI->successors())
2521  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2522 
2523  return true;
2524 }
2525 
2526 bool AArch64FastISel::selectCmp(const Instruction *I) {
2527  const CmpInst *CI = cast<CmpInst>(I);
2528 
2529  // Vectors of i1 are weird: bail out.
2530  if (CI->getType()->isVectorTy())
2531  return false;
2532 
2533  // Try to optimize or fold the cmp.
2534  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2535  unsigned ResultReg = 0;
2536  switch (Predicate) {
2537  default:
2538  break;
2539  case CmpInst::FCMP_FALSE:
2540  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2541  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2542  TII.get(TargetOpcode::COPY), ResultReg)
2543  .addReg(AArch64::WZR, getKillRegState(true));
2544  break;
2545  case CmpInst::FCMP_TRUE:
2546  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2547  break;
2548  }
2549 
2550  if (ResultReg) {
2551  updateValueMap(I, ResultReg);
2552  return true;
2553  }
2554 
2555  // Emit the cmp.
2556  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2557  return false;
2558 
2559  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2560 
2561  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2562  // condition codes are inverted, because they are used by CSINC.
2563  static unsigned CondCodeTable[2][2] = {
2566  };
2567  unsigned *CondCodes = nullptr;
2568  switch (Predicate) {
2569  default:
2570  break;
2571  case CmpInst::FCMP_UEQ:
2572  CondCodes = &CondCodeTable[0][0];
2573  break;
2574  case CmpInst::FCMP_ONE:
2575  CondCodes = &CondCodeTable[1][0];
2576  break;
2577  }
2578 
2579  if (CondCodes) {
2580  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2582  TmpReg1)
2583  .addReg(AArch64::WZR, getKillRegState(true))
2584  .addReg(AArch64::WZR, getKillRegState(true))
2585  .addImm(CondCodes[0]);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  ResultReg)
2588  .addReg(TmpReg1, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(CondCodes[1]);
2591 
2592  updateValueMap(I, ResultReg);
2593  return true;
2594  }
2595 
2596  // Now set a register based on the comparison.
2597  AArch64CC::CondCode CC = getCompareCC(Predicate);
2598  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2599  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2600  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2601  ResultReg)
2602  .addReg(AArch64::WZR, getKillRegState(true))
2603  .addReg(AArch64::WZR, getKillRegState(true))
2604  .addImm(invertedCC);
2605 
2606  updateValueMap(I, ResultReg);
2607  return true;
2608 }
2609 
2610 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2611 /// value.
2612 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2613  if (!SI->getType()->isIntegerTy(1))
2614  return false;
2615 
2616  const Value *Src1Val, *Src2Val;
2617  unsigned Opc = 0;
2618  bool NeedExtraOp = false;
2619  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2620  if (CI->isOne()) {
2621  Src1Val = SI->getCondition();
2622  Src2Val = SI->getFalseValue();
2623  Opc = AArch64::ORRWrr;
2624  } else {
2625  assert(CI->isZero());
2626  Src1Val = SI->getFalseValue();
2627  Src2Val = SI->getCondition();
2628  Opc = AArch64::BICWrr;
2629  }
2630  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2631  if (CI->isOne()) {
2632  Src1Val = SI->getCondition();
2633  Src2Val = SI->getTrueValue();
2634  Opc = AArch64::ORRWrr;
2635  NeedExtraOp = true;
2636  } else {
2637  assert(CI->isZero());
2638  Src1Val = SI->getCondition();
2639  Src2Val = SI->getTrueValue();
2640  Opc = AArch64::ANDWrr;
2641  }
2642  }
2643 
2644  if (!Opc)
2645  return false;
2646 
2647  unsigned Src1Reg = getRegForValue(Src1Val);
2648  if (!Src1Reg)
2649  return false;
2650  bool Src1IsKill = hasTrivialKill(Src1Val);
2651 
2652  unsigned Src2Reg = getRegForValue(Src2Val);
2653  if (!Src2Reg)
2654  return false;
2655  bool Src2IsKill = hasTrivialKill(Src2Val);
2656 
2657  if (NeedExtraOp) {
2658  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2659  Src1IsKill = true;
2660  }
2661  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2662  Src1IsKill, Src2Reg, Src2IsKill);
2663  updateValueMap(SI, ResultReg);
2664  return true;
2665 }
2666 
2667 bool AArch64FastISel::selectSelect(const Instruction *I) {
2668  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2669  MVT VT;
2670  if (!isTypeSupported(I->getType(), VT))
2671  return false;
2672 
2673  unsigned Opc;
2674  const TargetRegisterClass *RC;
2675  switch (VT.SimpleTy) {
2676  default:
2677  return false;
2678  case MVT::i1:
2679  case MVT::i8:
2680  case MVT::i16:
2681  case MVT::i32:
2682  Opc = AArch64::CSELWr;
2683  RC = &AArch64::GPR32RegClass;
2684  break;
2685  case MVT::i64:
2686  Opc = AArch64::CSELXr;
2687  RC = &AArch64::GPR64RegClass;
2688  break;
2689  case MVT::f32:
2690  Opc = AArch64::FCSELSrrr;
2691  RC = &AArch64::FPR32RegClass;
2692  break;
2693  case MVT::f64:
2694  Opc = AArch64::FCSELDrrr;
2695  RC = &AArch64::FPR64RegClass;
2696  break;
2697  }
2698 
2699  const SelectInst *SI = cast<SelectInst>(I);
2700  const Value *Cond = SI->getCondition();
2703 
2704  if (optimizeSelect(SI))
2705  return true;
2706 
2707  // Try to pickup the flags, so we don't have to emit another compare.
2708  if (foldXALUIntrinsic(CC, I, Cond)) {
2709  // Fake request the condition to force emission of the XALU intrinsic.
2710  unsigned CondReg = getRegForValue(Cond);
2711  if (!CondReg)
2712  return false;
2713  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2714  isValueAvailable(Cond)) {
2715  const auto *Cmp = cast<CmpInst>(Cond);
2716  // Try to optimize or fold the cmp.
2717  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2718  const Value *FoldSelect = nullptr;
2719  switch (Predicate) {
2720  default:
2721  break;
2722  case CmpInst::FCMP_FALSE:
2723  FoldSelect = SI->getFalseValue();
2724  break;
2725  case CmpInst::FCMP_TRUE:
2726  FoldSelect = SI->getTrueValue();
2727  break;
2728  }
2729 
2730  if (FoldSelect) {
2731  unsigned SrcReg = getRegForValue(FoldSelect);
2732  if (!SrcReg)
2733  return false;
2734  unsigned UseReg = lookUpRegForValue(SI);
2735  if (UseReg)
2736  MRI.clearKillFlags(UseReg);
2737 
2738  updateValueMap(I, SrcReg);
2739  return true;
2740  }
2741 
2742  // Emit the cmp.
2743  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2744  return false;
2745 
2746  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747  CC = getCompareCC(Predicate);
2748  switch (Predicate) {
2749  default:
2750  break;
2751  case CmpInst::FCMP_UEQ:
2752  ExtraCC = AArch64CC::EQ;
2753  CC = AArch64CC::VS;
2754  break;
2755  case CmpInst::FCMP_ONE:
2756  ExtraCC = AArch64CC::MI;
2757  CC = AArch64CC::GT;
2758  break;
2759  }
2760  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761  } else {
2762  unsigned CondReg = getRegForValue(Cond);
2763  if (!CondReg)
2764  return false;
2765  bool CondIsKill = hasTrivialKill(Cond);
2766 
2767  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768  CondReg = constrainOperandRegClass(II, CondReg, 1);
2769 
2770  // Emit a TST instruction (ANDS wzr, reg, #imm).
2771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2772  AArch64::WZR)
2773  .addReg(CondReg, getKillRegState(CondIsKill))
2775  }
2776 
2777  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2778  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2779 
2780  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2781  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2782 
2783  if (!Src1Reg || !Src2Reg)
2784  return false;
2785 
2786  if (ExtraCC != AArch64CC::AL) {
2787  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2788  Src2IsKill, ExtraCC);
2789  Src2IsKill = true;
2790  }
2791  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2792  Src2IsKill, CC);
2793  updateValueMap(I, ResultReg);
2794  return true;
2795 }
2796 
2797 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2798  Value *V = I->getOperand(0);
2799  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2800  return false;
2801 
2802  unsigned Op = getRegForValue(V);
2803  if (Op == 0)
2804  return false;
2805 
2806  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2808  ResultReg).addReg(Op);
2809  updateValueMap(I, ResultReg);
2810  return true;
2811 }
2812 
2813 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2814  Value *V = I->getOperand(0);
2815  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2816  return false;
2817 
2818  unsigned Op = getRegForValue(V);
2819  if (Op == 0)
2820  return false;
2821 
2822  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2824  ResultReg).addReg(Op);
2825  updateValueMap(I, ResultReg);
2826  return true;
2827 }
2828 
2829 // FPToUI and FPToSI
2830 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2831  MVT DestVT;
2832  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2833  return false;
2834 
2835  unsigned SrcReg = getRegForValue(I->getOperand(0));
2836  if (SrcReg == 0)
2837  return false;
2838 
2839  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2840  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2841  return false;
2842 
2843  unsigned Opc;
2844  if (SrcVT == MVT::f64) {
2845  if (Signed)
2846  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2847  else
2848  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849  } else {
2850  if (Signed)
2851  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2852  else
2853  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2854  }
2855  unsigned ResultReg = createResultReg(
2856  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2858  .addReg(SrcReg);
2859  updateValueMap(I, ResultReg);
2860  return true;
2861 }
2862 
2863 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2864  MVT DestVT;
2865  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2866  return false;
2867  // Let regular ISEL handle FP16
2868  if (DestVT == MVT::f16)
2869  return false;
2870 
2871  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2872  "Unexpected value type.");
2873 
2874  unsigned SrcReg = getRegForValue(I->getOperand(0));
2875  if (!SrcReg)
2876  return false;
2877  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2878 
2879  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2880 
2881  // Handle sign-extension.
2882  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2883  SrcReg =
2884  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2885  if (!SrcReg)
2886  return false;
2887  SrcIsKill = true;
2888  }
2889 
2890  unsigned Opc;
2891  if (SrcVT == MVT::i64) {
2892  if (Signed)
2893  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2894  else
2895  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2896  } else {
2897  if (Signed)
2898  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2899  else
2900  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2901  }
2902 
2903  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2904  SrcIsKill);
2905  updateValueMap(I, ResultReg);
2906  return true;
2907 }
2908 
2909 bool AArch64FastISel::fastLowerArguments() {
2910  if (!FuncInfo.CanLowerReturn)
2911  return false;
2912 
2913  const Function *F = FuncInfo.Fn;
2914  if (F->isVarArg())
2915  return false;
2916 
2917  CallingConv::ID CC = F->getCallingConv();
2918  if (CC != CallingConv::C && CC != CallingConv::Swift)
2919  return false;
2920 
2921  if (Subtarget->hasCustomCallingConv())
2922  return false;
2923 
2924  // Only handle simple cases of up to 8 GPR and FPR each.
2925  unsigned GPRCnt = 0;
2926  unsigned FPRCnt = 0;
2927  for (auto const &Arg : F->args()) {
2928  if (Arg.hasAttribute(Attribute::ByVal) ||
2929  Arg.hasAttribute(Attribute::InReg) ||
2930  Arg.hasAttribute(Attribute::StructRet) ||
2931  Arg.hasAttribute(Attribute::SwiftSelf) ||
2932  Arg.hasAttribute(Attribute::SwiftError) ||
2933  Arg.hasAttribute(Attribute::Nest))
2934  return false;
2935 
2936  Type *ArgTy = Arg.getType();
2937  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2938  return false;
2939 
2940  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2941  if (!ArgVT.isSimple())
2942  return false;
2943 
2944  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2945  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2946  return false;
2947 
2948  if (VT.isVector() &&
2949  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2950  return false;
2951 
2952  if (VT >= MVT::i1 && VT <= MVT::i64)
2953  ++GPRCnt;
2954  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2955  VT.is128BitVector())
2956  ++FPRCnt;
2957  else
2958  return false;
2959 
2960  if (GPRCnt > 8 || FPRCnt > 8)
2961  return false;
2962  }
2963 
2964  static const MCPhysReg Registers[6][8] = {
2965  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2966  AArch64::W5, AArch64::W6, AArch64::W7 },
2967  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2968  AArch64::X5, AArch64::X6, AArch64::X7 },
2969  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2970  AArch64::H5, AArch64::H6, AArch64::H7 },
2971  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2972  AArch64::S5, AArch64::S6, AArch64::S7 },
2973  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2974  AArch64::D5, AArch64::D6, AArch64::D7 },
2975  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2976  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2977  };
2978 
2979  unsigned GPRIdx = 0;
2980  unsigned FPRIdx = 0;
2981  for (auto const &Arg : F->args()) {
2982  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2983  unsigned SrcReg;
2984  const TargetRegisterClass *RC;
2985  if (VT >= MVT::i1 && VT <= MVT::i32) {
2986  SrcReg = Registers[0][GPRIdx++];
2987  RC = &AArch64::GPR32RegClass;
2988  VT = MVT::i32;
2989  } else if (VT == MVT::i64) {
2990  SrcReg = Registers[1][GPRIdx++];
2991  RC = &AArch64::GPR64RegClass;
2992  } else if (VT == MVT::f16) {
2993  SrcReg = Registers[2][FPRIdx++];
2994  RC = &AArch64::FPR16RegClass;
2995  } else if (VT == MVT::f32) {
2996  SrcReg = Registers[3][FPRIdx++];
2997  RC = &AArch64::FPR32RegClass;
2998  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2999  SrcReg = Registers[4][FPRIdx++];
3000  RC = &AArch64::FPR64RegClass;
3001  } else if (VT.is128BitVector()) {
3002  SrcReg = Registers[5][FPRIdx++];
3003  RC = &AArch64::FPR128RegClass;
3004  } else
3005  llvm_unreachable("Unexpected value type.");
3006 
3007  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3008  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3009  // Without this, EmitLiveInCopies may eliminate the livein if its only
3010  // use is a bitcast (which isn't turned into an instruction).
3011  unsigned ResultReg = createResultReg(RC);
3012  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3013  TII.get(TargetOpcode::COPY), ResultReg)
3014  .addReg(DstReg, getKillRegState(true));
3015  updateValueMap(&Arg, ResultReg);
3016  }
3017  return true;
3018 }
3019 
3020 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3021  SmallVectorImpl<MVT> &OutVTs,
3022  unsigned &NumBytes) {
3023  CallingConv::ID CC = CLI.CallConv;
3025  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3026  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3027 
3028  // Get a count of how many bytes are to be pushed on the stack.
3029  NumBytes = CCInfo.getNextStackOffset();
3030 
3031  // Issue CALLSEQ_START
3032  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3033  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3034  .addImm(NumBytes).addImm(0);
3035 
3036  // Process the args.
3037  for (CCValAssign &VA : ArgLocs) {
3038  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3039  MVT ArgVT = OutVTs[VA.getValNo()];
3040 
3041  unsigned ArgReg = getRegForValue(ArgVal);
3042  if (!ArgReg)
3043  return false;
3044 
3045  // Handle arg promotion: SExt, ZExt, AExt.
3046  switch (VA.getLocInfo()) {
3047  case CCValAssign::Full:
3048  break;
3049  case CCValAssign::SExt: {
3050  MVT DestVT = VA.getLocVT();
3051  MVT SrcVT = ArgVT;
3052  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3053  if (!ArgReg)
3054  return false;
3055  break;
3056  }
3057  case CCValAssign::AExt:
3058  // Intentional fall-through.
3059  case CCValAssign::ZExt: {
3060  MVT DestVT = VA.getLocVT();
3061  MVT SrcVT = ArgVT;
3062  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3063  if (!ArgReg)
3064  return false;
3065  break;
3066  }
3067  default:
3068  llvm_unreachable("Unknown arg promotion!");
3069  }
3070 
3071  // Now copy/store arg to correct locations.
3072  if (VA.isRegLoc() && !VA.needsCustom()) {
3073  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3074  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3075  CLI.OutRegs.push_back(VA.getLocReg());
3076  } else if (VA.needsCustom()) {
3077  // FIXME: Handle custom args.
3078  return false;
3079  } else {
3080  assert(VA.isMemLoc() && "Assuming store on stack.");
3081 
3082  // Don't emit stores for undef values.
3083  if (isa<UndefValue>(ArgVal))
3084  continue;
3085 
3086  // Need to store on the stack.
3087  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3088 
3089  unsigned BEAlign = 0;
3090  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3091  BEAlign = 8 - ArgSize;
3092 
3093  Address Addr;
3094  Addr.setKind(Address::RegBase);
3095  Addr.setReg(AArch64::SP);
3096  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3097 
3098  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3099  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3100  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3101  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3102 
3103  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3104  return false;
3105  }
3106  }
3107  return true;
3108 }
3109 
3110 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3111  unsigned NumBytes) {
3112  CallingConv::ID CC = CLI.CallConv;
3113 
3114  // Issue CALLSEQ_END
3115  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3116  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3117  .addImm(NumBytes).addImm(0);
3118 
3119  // Now the return value.
3120  if (RetVT != MVT::isVoid) {
3122  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3123  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3124 
3125  // Only handle a single return value.
3126  if (RVLocs.size() != 1)
3127  return false;
3128 
3129  // Copy all of the result registers out of their specified physreg.
3130  MVT CopyVT = RVLocs[0].getValVT();
3131 
3132  // TODO: Handle big-endian results
3133  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3134  return false;
3135 
3136  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3137  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3138  TII.get(TargetOpcode::COPY), ResultReg)
3139  .addReg(RVLocs[0].getLocReg());
3140  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3141 
3142  CLI.ResultReg = ResultReg;
3143  CLI.NumResultRegs = 1;
3144  }
3145 
3146  return true;
3147 }
3148 
3149 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3150  CallingConv::ID CC = CLI.CallConv;
3151  bool IsTailCall = CLI.IsTailCall;
3152  bool IsVarArg = CLI.IsVarArg;
3153  const Value *Callee = CLI.Callee;
3154  MCSymbol *Symbol = CLI.Symbol;
3155 
3156  if (!Callee && !Symbol)
3157  return false;
3158 
3159  // Allow SelectionDAG isel to handle tail calls.
3160  if (IsTailCall)
3161  return false;
3162 
3163  CodeModel::Model CM = TM.getCodeModel();
3164  // Only support the small-addressing and large code models.
3165  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3166  return false;
3167 
3168  // FIXME: Add large code model support for ELF.
3169  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3170  return false;
3171 
3172  // Let SDISel handle vararg functions.
3173  if (IsVarArg)
3174  return false;
3175 
3176  // FIXME: Only handle *simple* calls for now.
3177  MVT RetVT;
3178  if (CLI.RetTy->isVoidTy())
3179  RetVT = MVT::isVoid;
3180  else if (!isTypeLegal(CLI.RetTy, RetVT))
3181  return false;
3182 
3183  for (auto Flag : CLI.OutFlags)
3184  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3185  Flag.isSwiftSelf() || Flag.isSwiftError())
3186  return false;
3187 
3188  // Set up the argument vectors.
3189  SmallVector<MVT, 16> OutVTs;
3190  OutVTs.reserve(CLI.OutVals.size());
3191 
3192  for (auto *Val : CLI.OutVals) {
3193  MVT VT;
3194  if (!isTypeLegal(Val->getType(), VT) &&
3195  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3196  return false;
3197 
3198  // We don't handle vector parameters yet.
3199  if (VT.isVector() || VT.getSizeInBits() > 64)
3200  return false;
3201 
3202  OutVTs.push_back(VT);
3203  }
3204 
3205  Address Addr;
3206  if (Callee && !computeCallAddress(Callee, Addr))
3207  return false;
3208 
3209  // Handle the arguments now that we've gotten them.
3210  unsigned NumBytes;
3211  if (!processCallArgs(CLI, OutVTs, NumBytes))
3212  return false;
3213 
3214  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3215  if (RegInfo->isAnyArgRegReserved(*MF))
3216  RegInfo->emitReservedArgRegCallError(*MF);
3217 
3218  // Issue the call.
3219  MachineInstrBuilder MIB;
3220  if (Subtarget->useSmallAddressing()) {
3221  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3222  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3223  if (Symbol)
3224  MIB.addSym(Symbol, 0);
3225  else if (Addr.getGlobalValue())
3226  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3227  else if (Addr.getReg()) {
3228  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3229  MIB.addReg(Reg);
3230  } else
3231  return false;
3232  } else {
3233  unsigned CallReg = 0;
3234  if (Symbol) {
3235  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3236  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3237  ADRPReg)
3238  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3239 
3240  CallReg = createResultReg(&AArch64::GPR64RegClass);
3241  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3242  TII.get(AArch64::LDRXui), CallReg)
3243  .addReg(ADRPReg)
3244  .addSym(Symbol,
3246  } else if (Addr.getGlobalValue())
3247  CallReg = materializeGV(Addr.getGlobalValue());
3248  else if (Addr.getReg())
3249  CallReg = Addr.getReg();
3250 
3251  if (!CallReg)
3252  return false;
3253 
3254  const MCInstrDesc &II = TII.get(AArch64::BLR);
3255  CallReg = constrainOperandRegClass(II, CallReg, 0);
3256  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3257  }
3258 
3259  // Add implicit physical register uses to the call.
3260  for (auto Reg : CLI.OutRegs)
3261  MIB.addReg(Reg, RegState::Implicit);
3262 
3263  // Add a register mask with the call-preserved registers.
3264  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3265  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3266 
3267  CLI.Call = MIB;
3268 
3269  // Finish off the call including any return values.
3270  return finishCall(CLI, RetVT, NumBytes);
3271 }
3272 
3273 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3274  if (Alignment)
3275  return Len / Alignment <= 4;
3276  else
3277  return Len < 32;
3278 }
3279 
3280 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3281  uint64_t Len, unsigned Alignment) {
3282  // Make sure we don't bloat code by inlining very large memcpy's.
3283  if (!isMemCpySmall(Len, Alignment))
3284  return false;
3285 
3286  int64_t UnscaledOffset = 0;
3287  Address OrigDest = Dest;
3288  Address OrigSrc = Src;
3289 
3290  while (Len) {
3291  MVT VT;
3292  if (!Alignment || Alignment >= 8) {
3293  if (Len >= 8)
3294  VT = MVT::i64;
3295  else if (Len >= 4)
3296  VT = MVT::i32;
3297  else if (Len >= 2)
3298  VT = MVT::i16;
3299  else {
3300  VT = MVT::i8;
3301  }
3302  } else {
3303  // Bound based on alignment.
3304  if (Len >= 4 && Alignment == 4)
3305  VT = MVT::i32;
3306  else if (Len >= 2 && Alignment == 2)
3307  VT = MVT::i16;
3308  else {
3309  VT = MVT::i8;
3310  }
3311  }
3312 
3313  unsigned ResultReg = emitLoad(VT, VT, Src);
3314  if (!ResultReg)
3315  return false;
3316 
3317  if (!emitStore(VT, ResultReg, Dest))
3318  return false;
3319 
3320  int64_t Size = VT.getSizeInBits() / 8;
3321  Len -= Size;
3322  UnscaledOffset += Size;
3323 
3324  // We need to recompute the unscaled offset for each iteration.
3325  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3326  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3327  }
3328 
3329  return true;
3330 }
3331 
3332 /// Check if it is possible to fold the condition from the XALU intrinsic
3333 /// into the user. The condition code will only be updated on success.
3334 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3335  const Instruction *I,
3336  const Value *Cond) {
3337  if (!isa<ExtractValueInst>(Cond))
3338  return false;
3339 
3340  const auto *EV = cast<ExtractValueInst>(Cond);
3341  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3342  return false;
3343 
3344  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3345  MVT RetVT;
3346  const Function *Callee = II->getCalledFunction();
3347  Type *RetTy =
3348  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3349  if (!isTypeLegal(RetTy, RetVT))
3350  return false;
3351 
3352  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3353  return false;
3354 
3355  const Value *LHS = II->getArgOperand(0);
3356  const Value *RHS = II->getArgOperand(1);
3357 
3358  // Canonicalize immediate to the RHS.
3359  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3360  isCommutativeIntrinsic(II))
3361  std::swap(LHS, RHS);
3362 
3363  // Simplify multiplies.
3364  Intrinsic::ID IID = II->getIntrinsicID();
3365  switch (IID) {
3366  default:
3367  break;
3368  case Intrinsic::smul_with_overflow:
3369  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3370  if (C->getValue() == 2)
3371  IID = Intrinsic::sadd_with_overflow;
3372  break;
3373  case Intrinsic::umul_with_overflow:
3374  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375  if (C->getValue() == 2)
3376  IID = Intrinsic::uadd_with_overflow;
3377  break;
3378  }
3379 
3380  AArch64CC::CondCode TmpCC;
3381  switch (IID) {
3382  default:
3383  return false;
3384  case Intrinsic::sadd_with_overflow:
3385  case Intrinsic::ssub_with_overflow:
3386  TmpCC = AArch64CC::VS;
3387  break;
3388  case Intrinsic::uadd_with_overflow:
3389  TmpCC = AArch64CC::HS;
3390  break;
3391  case Intrinsic::usub_with_overflow:
3392  TmpCC = AArch64CC::LO;
3393  break;
3394  case Intrinsic::smul_with_overflow:
3395  case Intrinsic::umul_with_overflow:
3396  TmpCC = AArch64CC::NE;
3397  break;
3398  }
3399 
3400  // Check if both instructions are in the same basic block.
3401  if (!isValueAvailable(II))
3402  return false;
3403 
3404  // Make sure nothing is in the way
3405  BasicBlock::const_iterator Start(I);
3407  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3408  // We only expect extractvalue instructions between the intrinsic and the
3409  // instruction to be selected.
3410  if (!isa<ExtractValueInst>(Itr))
3411  return false;
3412 
3413  // Check that the extractvalue operand comes from the intrinsic.
3414  const auto *EVI = cast<ExtractValueInst>(Itr);
3415  if (EVI->getAggregateOperand() != II)
3416  return false;
3417  }
3418 
3419  CC = TmpCC;
3420  return true;
3421 }
3422 
3423 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3424  // FIXME: Handle more intrinsics.
3425  switch (II->getIntrinsicID()) {
3426  default: return false;
3427  case Intrinsic::frameaddress: {
3428  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3429  MFI.setFrameAddressIsTaken(true);
3430 
3431  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3432  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3433  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3434  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3435  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3436  // Recursively load frame address
3437  // ldr x0, [fp]
3438  // ldr x0, [x0]
3439  // ldr x0, [x0]
3440  // ...
3441  unsigned DestReg;
3442  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3443  while (Depth--) {
3444  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3445  SrcReg, /*IsKill=*/true, 0);
3446  assert(DestReg && "Unexpected LDR instruction emission failure.");
3447  SrcReg = DestReg;
3448  }
3449 
3450  updateValueMap(II, SrcReg);
3451  return true;
3452  }
3453  case Intrinsic::sponentry: {
3454  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3455 
3456  // SP = FP + Fixed Object + 16
3457  int FI = MFI.CreateFixedObject(4, 0, false);
3458  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3459  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3460  TII.get(AArch64::ADDXri), ResultReg)
3461  .addFrameIndex(FI)
3462  .addImm(0)
3463  .addImm(0);
3464 
3465  updateValueMap(II, ResultReg);
3466  return true;
3467  }
3468  case Intrinsic::memcpy:
3469  case Intrinsic::memmove: {
3470  const auto *MTI = cast<MemTransferInst>(II);
3471  // Don't handle volatile.
3472  if (MTI->isVolatile())
3473  return false;
3474 
3475  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3476  // we would emit dead code because we don't currently handle memmoves.
3477  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3478  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3479  // Small memcpy's are common enough that we want to do them without a call
3480  // if possible.
3481  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3482  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3483  MTI->getSourceAlignment());
3484  if (isMemCpySmall(Len, Alignment)) {
3485  Address Dest, Src;
3486  if (!computeAddress(MTI->getRawDest(), Dest) ||
3487  !computeAddress(MTI->getRawSource(), Src))
3488  return false;
3489  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3490  return true;
3491  }
3492  }
3493 
3494  if (!MTI->getLength()->getType()->isIntegerTy(64))
3495  return false;
3496 
3497  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3498  // Fast instruction selection doesn't support the special
3499  // address spaces.
3500  return false;
3501 
3502  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3503  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3504  }
3505  case Intrinsic::memset: {
3506  const MemSetInst *MSI = cast<MemSetInst>(II);
3507  // Don't handle volatile.
3508  if (MSI->isVolatile())
3509  return false;
3510 
3511  if (!MSI->getLength()->getType()->isIntegerTy(64))
3512  return false;
3513 
3514  if (MSI->getDestAddressSpace() > 255)
3515  // Fast instruction selection doesn't support the special
3516  // address spaces.
3517  return false;
3518 
3519  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3520  }
3521  case Intrinsic::sin:
3522  case Intrinsic::cos:
3523  case Intrinsic::pow: {
3524  MVT RetVT;
3525  if (!isTypeLegal(II->getType(), RetVT))
3526  return false;
3527 
3528  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3529  return false;
3530 
3531  static const RTLIB::Libcall LibCallTable[3][2] = {
3532  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3533  { RTLIB::COS_F32, RTLIB::COS_F64 },
3534  { RTLIB::POW_F32, RTLIB::POW_F64 }
3535  };
3536  RTLIB::Libcall LC;
3537  bool Is64Bit = RetVT == MVT::f64;
3538  switch (II->getIntrinsicID()) {
3539  default:
3540  llvm_unreachable("Unexpected intrinsic.");
3541  case Intrinsic::sin:
3542  LC = LibCallTable[0][Is64Bit];
3543  break;
3544  case Intrinsic::cos:
3545  LC = LibCallTable[1][Is64Bit];
3546  break;
3547  case Intrinsic::pow:
3548  LC = LibCallTable[2][Is64Bit];
3549  break;
3550  }
3551 
3552  ArgListTy Args;
3553  Args.reserve(II->getNumArgOperands());
3554 
3555  // Populate the argument list.
3556  for (auto &Arg : II->arg_operands()) {
3557  ArgListEntry Entry;
3558  Entry.Val = Arg;
3559  Entry.Ty = Arg->getType();
3560  Args.push_back(Entry);
3561  }
3562 
3563  CallLoweringInfo CLI;
3564  MCContext &Ctx = MF->getContext();
3565  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3566  TLI.getLibcallName(LC), std::move(Args));
3567  if (!lowerCallTo(CLI))
3568  return false;
3569  updateValueMap(II, CLI.ResultReg);
3570  return true;
3571  }
3572  case Intrinsic::fabs: {
3573  MVT VT;
3574  if (!isTypeLegal(II->getType(), VT))
3575  return false;
3576 
3577  unsigned Opc;
3578  switch (VT.SimpleTy) {
3579  default:
3580  return false;
3581  case MVT::f32:
3582  Opc = AArch64::FABSSr;
3583  break;
3584  case MVT::f64:
3585  Opc = AArch64::FABSDr;
3586  break;
3587  }
3588  unsigned SrcReg = getRegForValue(II->getOperand(0));
3589  if (!SrcReg)
3590  return false;
3591  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3592  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3593  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3594  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3595  updateValueMap(II, ResultReg);
3596  return true;
3597  }
3598  case Intrinsic::trap:
3599  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3600  .addImm(1);
3601  return true;
3602 
3603  case Intrinsic::sqrt: {
3604  Type *RetTy = II->getCalledFunction()->getReturnType();
3605 
3606  MVT VT;
3607  if (!isTypeLegal(RetTy, VT))
3608  return false;
3609 
3610  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3611  if (!Op0Reg)
3612  return false;
3613  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3614 
3615  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3616  if (!ResultReg)
3617  return false;
3618 
3619  updateValueMap(II, ResultReg);
3620  return true;
3621  }
3622  case Intrinsic::sadd_with_overflow:
3623  case Intrinsic::uadd_with_overflow:
3624  case Intrinsic::ssub_with_overflow:
3625  case Intrinsic::usub_with_overflow:
3626  case Intrinsic::smul_with_overflow:
3627  case Intrinsic::umul_with_overflow: {
3628  // This implements the basic lowering of the xalu with overflow intrinsics.
3629  const Function *Callee = II->getCalledFunction();
3630  auto *Ty = cast<StructType>(Callee->getReturnType());
3631  Type *RetTy = Ty->getTypeAtIndex(0U);
3632 
3633  MVT VT;
3634  if (!isTypeLegal(RetTy, VT))
3635  return false;
3636 
3637  if (VT != MVT::i32 && VT != MVT::i64)
3638  return false;
3639 
3640  const Value *LHS = II->getArgOperand(0);
3641  const Value *RHS = II->getArgOperand(1);
3642  // Canonicalize immediate to the RHS.
3643  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3644  isCommutativeIntrinsic(II))
3645  std::swap(LHS, RHS);
3646 
3647  // Simplify multiplies.
3648  Intrinsic::ID IID = II->getIntrinsicID();
3649  switch (IID) {
3650  default:
3651  break;
3652  case Intrinsic::smul_with_overflow:
3653  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3654  if (C->getValue() == 2) {
3655  IID = Intrinsic::sadd_with_overflow;
3656  RHS = LHS;
3657  }
3658  break;
3659  case Intrinsic::umul_with_overflow:
3660  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3661  if (C->getValue() == 2) {
3662  IID = Intrinsic::uadd_with_overflow;
3663  RHS = LHS;
3664  }
3665  break;
3666  }
3667 
3668  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3670  switch (IID) {
3671  default: llvm_unreachable("Unexpected intrinsic!");
3672  case Intrinsic::sadd_with_overflow:
3673  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3674  CC = AArch64CC::VS;
3675  break;
3676  case Intrinsic::uadd_with_overflow:
3677  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3678  CC = AArch64CC::HS;
3679  break;
3680  case Intrinsic::ssub_with_overflow:
3681  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3682  CC = AArch64CC::VS;
3683  break;
3684  case Intrinsic::usub_with_overflow:
3685  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3686  CC = AArch64CC::LO;
3687  break;
3688  case Intrinsic::smul_with_overflow: {
3689  CC = AArch64CC::NE;
3690  unsigned LHSReg = getRegForValue(LHS);
3691  if (!LHSReg)
3692  return false;
3693  bool LHSIsKill = hasTrivialKill(LHS);
3694 
3695  unsigned RHSReg = getRegForValue(RHS);
3696  if (!RHSReg)
3697  return false;
3698  bool RHSIsKill = hasTrivialKill(RHS);
3699 
3700  if (VT == MVT::i32) {
3701  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3702  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3703  /*IsKill=*/false, 32);
3704  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3705  AArch64::sub_32);
3706  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3707  AArch64::sub_32);
3708  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3709  AArch64_AM::ASR, 31, /*WantResult=*/false);
3710  } else {
3711  assert(VT == MVT::i64 && "Unexpected value type.");
3712  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3713  // reused in the next instruction.
3714  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3715  /*IsKill=*/false);
3716  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3717  RHSReg, RHSIsKill);
3718  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3719  AArch64_AM::ASR, 63, /*WantResult=*/false);
3720  }
3721  break;
3722  }
3723  case Intrinsic::umul_with_overflow: {
3724  CC = AArch64CC::NE;
3725  unsigned LHSReg = getRegForValue(LHS);
3726  if (!LHSReg)
3727  return false;
3728  bool LHSIsKill = hasTrivialKill(LHS);
3729 
3730  unsigned RHSReg = getRegForValue(RHS);
3731  if (!RHSReg)
3732  return false;
3733  bool RHSIsKill = hasTrivialKill(RHS);
3734 
3735  if (VT == MVT::i32) {
3736  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3737  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3738  /*IsKill=*/false, AArch64_AM::LSR, 32,
3739  /*WantResult=*/false);
3740  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3741  AArch64::sub_32);
3742  } else {
3743  assert(VT == MVT::i64 && "Unexpected value type.");
3744  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3745  // reused in the next instruction.
3746  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3747  /*IsKill=*/false);
3748  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3749  RHSReg, RHSIsKill);
3750  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3751  /*IsKill=*/false, /*WantResult=*/false);
3752  }
3753  break;
3754  }
3755  }
3756 
3757  if (MulReg) {
3758  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3759  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3760  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3761  }
3762 
3763  if (!ResultReg1)
3764  return false;
3765 
3766  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3767  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3768  /*IsKill=*/true, getInvertedCondCode(CC));
3769  (void)ResultReg2;
3770  assert((ResultReg1 + 1) == ResultReg2 &&
3771  "Nonconsecutive result registers.");
3772  updateValueMap(II, ResultReg1, 2);
3773  return true;
3774  }
3775  }
3776  return false;
3777 }
3778 
3779 bool AArch64FastISel::selectRet(const Instruction *I) {
3780  const ReturnInst *Ret = cast<ReturnInst>(I);
3781  const Function &F = *I->getParent()->getParent();
3782 
3783  if (!FuncInfo.CanLowerReturn)
3784  return false;
3785 
3786  if (F.isVarArg())
3787  return false;
3788 
3789  if (TLI.supportSwiftError() &&
3790  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3791  return false;
3792 
3793  if (TLI.supportSplitCSR(FuncInfo.MF))
3794  return false;
3795 
3796  // Build a list of return value registers.
3797  SmallVector<unsigned, 4> RetRegs;
3798 
3799  if (Ret->getNumOperands() > 0) {
3800  CallingConv::ID CC = F.getCallingConv();
3802  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3803 
3804  // Analyze operands of the call, assigning locations to each operand.
3806  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3807  CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3808  : RetCC_AArch64_AAPCS;
3809  CCInfo.AnalyzeReturn(Outs, RetCC);
3810 
3811  // Only handle a single return value for now.
3812  if (ValLocs.size() != 1)
3813  return false;
3814 
3815  CCValAssign &VA = ValLocs[0];
3816  const Value *RV = Ret->getOperand(0);
3817 
3818  // Don't bother handling odd stuff for now.
3819  if ((VA.getLocInfo() != CCValAssign::Full) &&
3820  (VA.getLocInfo() != CCValAssign::BCvt))
3821  return false;
3822 
3823  // Only handle register returns for now.
3824  if (!VA.isRegLoc())
3825  return false;
3826 
3827  unsigned Reg = getRegForValue(RV);
3828  if (Reg == 0)
3829  return false;
3830 
3831  unsigned SrcReg = Reg + VA.getValNo();
3832  unsigned DestReg = VA.getLocReg();
3833  // Avoid a cross-class copy. This is very unlikely.
3834  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3835  return false;
3836 
3837  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3838  if (!RVEVT.isSimple())
3839  return false;
3840 
3841  // Vectors (of > 1 lane) in big endian need tricky handling.
3842  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3843  !Subtarget->isLittleEndian())
3844  return false;
3845 
3846  MVT RVVT = RVEVT.getSimpleVT();
3847  if (RVVT == MVT::f128)
3848  return false;
3849 
3850  MVT DestVT = VA.getValVT();
3851  // Special handling for extended integers.
3852  if (RVVT != DestVT) {
3853  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3854  return false;
3855 
3856  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3857  return false;
3858 
3859  bool IsZExt = Outs[0].Flags.isZExt();
3860  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3861  if (SrcReg == 0)
3862  return false;
3863  }
3864 
3865  // Make the copy.
3866  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3867  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3868 
3869  // Add register to return instruction.
3870  RetRegs.push_back(VA.getLocReg());
3871  }
3872 
3873  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3874  TII.get(AArch64::RET_ReallyLR));
3875  for (unsigned RetReg : RetRegs)
3876  MIB.addReg(RetReg, RegState::Implicit);
3877  return true;
3878 }
3879 
3880 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3881  Type *DestTy = I->getType();
3882  Value *Op = I->getOperand(0);
3883  Type *SrcTy = Op->getType();
3884 
3885  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3886  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3887  if (!SrcEVT.isSimple())
3888  return false;
3889  if (!DestEVT.isSimple())
3890  return false;
3891 
3892  MVT SrcVT = SrcEVT.getSimpleVT();
3893  MVT DestVT = DestEVT.getSimpleVT();
3894 
3895  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3896  SrcVT != MVT::i8)
3897  return false;
3898  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3899  DestVT != MVT::i1)
3900  return false;
3901 
3902  unsigned SrcReg = getRegForValue(Op);
3903  if (!SrcReg)
3904  return false;
3905  bool SrcIsKill = hasTrivialKill(Op);
3906 
3907  // If we're truncating from i64 to a smaller non-legal type then generate an
3908  // AND. Otherwise, we know the high bits are undefined and a truncate only
3909  // generate a COPY. We cannot mark the source register also as result
3910  // register, because this can incorrectly transfer the kill flag onto the
3911  // source register.
3912  unsigned ResultReg;
3913  if (SrcVT == MVT::i64) {
3914  uint64_t Mask = 0;
3915  switch (DestVT.SimpleTy) {
3916  default:
3917  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3918  return false;
3919  case MVT::i1:
3920  Mask = 0x1;
3921  break;
3922  case MVT::i8:
3923  Mask = 0xff;
3924  break;
3925  case MVT::i16:
3926  Mask = 0xffff;
3927  break;
3928  }
3929  // Issue an extract_subreg to get the lower 32-bits.
3930  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3931  AArch64::sub_32);
3932  // Create the AND instruction which performs the actual truncation.
3933  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3934  assert(ResultReg && "Unexpected AND instruction emission failure.");
3935  } else {
3936  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3937  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3938  TII.get(TargetOpcode::COPY), ResultReg)
3939  .addReg(SrcReg, getKillRegState(SrcIsKill));
3940  }
3941 
3942  updateValueMap(I, ResultReg);
3943  return true;
3944 }
3945 
3946 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3947  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3948  DestVT == MVT::i64) &&
3949  "Unexpected value type.");
3950  // Handle i8 and i16 as i32.
3951  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3952  DestVT = MVT::i32;
3953 
3954  if (IsZExt) {
3955  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3956  assert(ResultReg && "Unexpected AND instruction emission failure.");
3957  if (DestVT == MVT::i64) {
3958  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3959  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3960  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3961  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3962  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3963  .addImm(0)
3964  .addReg(ResultReg)
3965  .addImm(AArch64::sub_32);
3966  ResultReg = Reg64;
3967  }
3968  return ResultReg;
3969  } else {
3970  if (DestVT == MVT::i64) {
3971  // FIXME: We're SExt i1 to i64.
3972  return 0;
3973  }
3974  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3975  /*TODO:IsKill=*/false, 0, 0);
3976  }
3977 }
3978 
3979 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3980  unsigned Op1, bool Op1IsKill) {
3981  unsigned Opc, ZReg;
3982  switch (RetVT.SimpleTy) {
3983  default: return 0;
3984  case MVT::i8:
3985  case MVT::i16:
3986  case MVT::i32:
3987  RetVT = MVT::i32;
3988  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3989  case MVT::i64:
3990  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3991  }
3992 
3993  const TargetRegisterClass *RC =
3994  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3995  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3996  /*IsKill=*/ZReg, true);
3997 }
3998 
3999 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4000  unsigned Op1, bool Op1IsKill) {
4001  if (RetVT != MVT::i64)
4002  return 0;
4003 
4004  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4005  Op0, Op0IsKill, Op1, Op1IsKill,
4006  AArch64::XZR, /*IsKill=*/true);
4007 }
4008 
4009 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4010  unsigned Op1, bool Op1IsKill) {
4011  if (RetVT != MVT::i64)
4012  return 0;
4013 
4014  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4015  Op0, Op0IsKill, Op1, Op1IsKill,
4016  AArch64::XZR, /*IsKill=*/true);
4017 }
4018 
4019 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4020  unsigned Op1Reg, bool Op1IsKill) {
4021  unsigned Opc = 0;
4022  bool NeedTrunc = false;
4023  uint64_t Mask = 0;
4024  switch (RetVT.SimpleTy) {
4025  default: return 0;
4026  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4027  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4028  case MVT::i32: Opc = AArch64::LSLVWr; break;
4029  case MVT::i64: Opc = AArch64::LSLVXr; break;
4030  }
4031 
4032  const TargetRegisterClass *RC =
4033  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4034  if (NeedTrunc) {
4035  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4036  Op1IsKill = true;
4037  }
4038  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4039  Op1IsKill);
4040  if (NeedTrunc)
4041  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4042  return ResultReg;
4043 }
4044 
4045 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4046  bool Op0IsKill, uint64_t Shift,
4047  bool IsZExt) {
4048  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4049  "Unexpected source/return type pair.");
4050  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4051  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4052  "Unexpected source value type.");
4053  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4054  RetVT == MVT::i64) && "Unexpected return value type.");
4055 
4056  bool Is64Bit = (RetVT == MVT::i64);
4057  unsigned RegSize = Is64Bit ? 64 : 32;
4058  unsigned DstBits = RetVT.getSizeInBits();
4059  unsigned SrcBits = SrcVT.getSizeInBits();
4060  const TargetRegisterClass *RC =
4061  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062 
4063  // Just emit a copy for "zero" shifts.
4064  if (Shift == 0) {
4065  if (RetVT == SrcVT) {
4066  unsigned ResultReg = createResultReg(RC);
4067  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4068  TII.get(TargetOpcode::COPY), ResultReg)
4069  .addReg(Op0, getKillRegState(Op0IsKill));
4070  return ResultReg;
4071  } else
4072  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4073  }
4074 
4075  // Don't deal with undefined shifts.
4076  if (Shift >= DstBits)
4077  return 0;
4078 
4079  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4080  // {S|U}BFM Wd, Wn, #r, #s
4081  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4082 
4083  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4084  // %2 = shl i16 %1, 4
4085  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4086  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4087  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4088  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4089 
4090  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4091  // %2 = shl i16 %1, 8
4092  // Wd<32+7-24,32-24> = Wn<7:0>
4093  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4094  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4095  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4096 
4097  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4098  // %2 = shl i16 %1, 12
4099  // Wd<32+3-20,32-20> = Wn<3:0>
4100  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4101  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4102  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4103 
4104  unsigned ImmR = RegSize - Shift;
4105  // Limit the width to the length of the source type.
4106  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4107  static const unsigned OpcTable[2][2] = {
4108  {AArch64::SBFMWri, AArch64::SBFMXri},
4109  {AArch64::UBFMWri, AArch64::UBFMXri}
4110  };
4111  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4112  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4113  unsigned TmpReg = MRI.createVirtualRegister(RC);
4114  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4115  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4116  .addImm(0)
4117  .addReg(Op0, getKillRegState(Op0IsKill))
4118  .addImm(AArch64::sub_32);
4119  Op0 = TmpReg;
4120  Op0IsKill = true;
4121  }
4122  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4123 }
4124 
4125 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4126  unsigned Op1Reg, bool Op1IsKill) {
4127  unsigned Opc = 0;
4128  bool NeedTrunc = false;
4129  uint64_t Mask = 0;
4130  switch (RetVT.SimpleTy) {
4131  default: return 0;
4132  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4133  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4134  case MVT::i32: Opc = AArch64::LSRVWr; break;
4135  case MVT::i64: Opc = AArch64::LSRVXr; break;
4136  }
4137 
4138  const TargetRegisterClass *RC =
4139  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4140  if (NeedTrunc) {
4141  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4142  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4143  Op0IsKill = Op1IsKill = true;
4144  }
4145  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4146  Op1IsKill);
4147  if (NeedTrunc)
4148  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4149  return ResultReg;
4150 }
4151 
4152 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4153  bool Op0IsKill, uint64_t Shift,
4154  bool IsZExt) {
4155  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4156  "Unexpected source/return type pair.");
4157  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4158  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4159  "Unexpected source value type.");
4160  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4161  RetVT == MVT::i64) && "Unexpected return value type.");
4162 
4163  bool Is64Bit = (RetVT == MVT::i64);
4164  unsigned RegSize = Is64Bit ? 64 : 32;
4165  unsigned DstBits = RetVT.getSizeInBits();
4166  unsigned SrcBits = SrcVT.getSizeInBits();
4167  const TargetRegisterClass *RC =
4168  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4169 
4170  // Just emit a copy for "zero" shifts.
4171  if (Shift == 0) {
4172  if (RetVT == SrcVT) {
4173  unsigned ResultReg = createResultReg(RC);
4174  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4175  TII.get(TargetOpcode::COPY), ResultReg)
4176  .addReg(Op0, getKillRegState(Op0IsKill));
4177  return ResultReg;
4178  } else
4179  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4180  }
4181 
4182  // Don't deal with undefined shifts.
4183  if (Shift >= DstBits)
4184  return 0;
4185 
4186  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4187  // {S|U}BFM Wd, Wn, #r, #s
4188  // Wd<s-r:0> = Wn<s:r> when r <= s
4189 
4190  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4191  // %2 = lshr i16 %1, 4
4192  // Wd<7-4:0> = Wn<7:4>
4193  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4194  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4195  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4196 
4197  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4198  // %2 = lshr i16 %1, 8
4199  // Wd<7-7,0> = Wn<7:7>
4200  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4201  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4202  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4203 
4204  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4205  // %2 = lshr i16 %1, 12
4206  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4207  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4208  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4209  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4210 
4211  if (Shift >= SrcBits && IsZExt)
4212  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4213 
4214  // It is not possible to fold a sign-extend into the LShr instruction. In this
4215  // case emit a sign-extend.
4216  if (!IsZExt) {
4217  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4218  if (!Op0)
4219  return 0;
4220  Op0IsKill = true;
4221  SrcVT = RetVT;
4222  SrcBits = SrcVT.getSizeInBits();
4223  IsZExt = true;
4224  }
4225 
4226  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4227  unsigned ImmS = SrcBits - 1;
4228  static const unsigned OpcTable[2][2] = {
4229  {AArch64::SBFMWri, AArch64::SBFMXri},
4230  {AArch64::UBFMWri, AArch64::UBFMXri}
4231  };
4232  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4233  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4234  unsigned TmpReg = MRI.createVirtualRegister(RC);
4235  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4236  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4237  .addImm(0)
4238  .addReg(Op0, getKillRegState(Op0IsKill))
4239  .addImm(AArch64::sub_32);
4240  Op0 = TmpReg;
4241  Op0IsKill = true;
4242  }
4243  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4244 }
4245 
4246 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4247  unsigned Op1Reg, bool Op1IsKill) {
4248  unsigned Opc = 0;
4249  bool NeedTrunc = false;
4250  uint64_t Mask = 0;
4251  switch (RetVT.SimpleTy) {
4252  default: return 0;
4253  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4254  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4255  case MVT::i32: Opc = AArch64::ASRVWr; break;
4256  case MVT::i64: Opc = AArch64::ASRVXr; break;
4257  }
4258 
4259  const TargetRegisterClass *RC =
4260  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4261  if (NeedTrunc) {
4262  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4263  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4264  Op0IsKill = Op1IsKill = true;
4265  }
4266  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4267  Op1IsKill);
4268  if (NeedTrunc)
4269  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4270  return ResultReg;
4271 }
4272 
4273 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4274  bool Op0IsKill, uint64_t Shift,
4275  bool IsZExt) {
4276  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4277  "Unexpected source/return type pair.");
4278  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4279  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4280  "Unexpected source value type.");
4281  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4282  RetVT == MVT::i64) && "Unexpected return value type.");
4283 
4284  bool Is64Bit = (RetVT == MVT::i64);
4285  unsigned RegSize = Is64Bit ? 64 : 32;
4286  unsigned DstBits = RetVT.getSizeInBits();
4287  unsigned SrcBits = SrcVT.getSizeInBits();
4288  const TargetRegisterClass *RC =
4289  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4290 
4291  // Just emit a copy for "zero" shifts.
4292  if (Shift == 0) {
4293  if (RetVT == SrcVT) {
4294  unsigned ResultReg = createResultReg(RC);
4295  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4296  TII.get(TargetOpcode::COPY), ResultReg)
4297  .addReg(Op0, getKillRegState(Op0IsKill));
4298  return ResultReg;
4299  } else
4300  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4301  }
4302 
4303  // Don't deal with undefined shifts.
4304  if (Shift >= DstBits)
4305  return 0;
4306 
4307  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4308  // {S|U}BFM Wd, Wn, #r, #s
4309  // Wd<s-r:0> = Wn<s:r> when r <= s
4310 
4311  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4312  // %2 = ashr i16 %1, 4
4313  // Wd<7-4:0> = Wn<7:4>
4314  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4315  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4316  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4317 
4318  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4319  // %2 = ashr i16 %1, 8
4320  // Wd<7-7,0> = Wn<7:7>
4321  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4322  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4323  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4324 
4325  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4326  // %2 = ashr i16 %1, 12
4327  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4328  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4329  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4330  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4331 
4332  if (Shift >= SrcBits && IsZExt)
4333  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4334 
4335  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4336  unsigned ImmS = SrcBits - 1;
4337  static const unsigned OpcTable[2][2] = {
4338  {AArch64::SBFMWri, AArch64::SBFMXri},
4339  {AArch64::UBFMWri, AArch64::UBFMXri}
4340  };
4341  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4342  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4343  unsigned TmpReg = MRI.createVirtualRegister(RC);
4344  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4345  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4346  .addImm(0)
4347  .addReg(Op0, getKillRegState(Op0IsKill))
4348  .addImm(AArch64::sub_32);
4349  Op0 = TmpReg;
4350  Op0IsKill = true;
4351  }
4352  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4353 }
4354 
4355 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4356  bool IsZExt) {
4357  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4358 
4359  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4360  // DestVT are odd things, so test to make sure that they are both types we can
4361  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4362  // bail out to SelectionDAG.
4363  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4364  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4365  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4366  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4367  return 0;
4368 
4369  unsigned Opc;
4370  unsigned Imm = 0;
4371 
4372  switch (SrcVT.SimpleTy) {
4373  default:
4374  return 0;
4375  case MVT::i1:
4376  return emiti1Ext(SrcReg, DestVT, IsZExt);
4377  case MVT::i8:
4378  if (DestVT == MVT::i64)
4379  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4380  else
4381  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4382  Imm = 7;
4383  break;
4384  case MVT::i16:
4385  if (DestVT == MVT::i64)
4386  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4387  else
4388  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4389  Imm = 15;
4390  break;
4391  case MVT::i32:
4392  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4393  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4394  Imm = 31;
4395  break;
4396  }
4397 
4398  // Handle i8 and i16 as i32.
4399  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4400  DestVT = MVT::i32;
4401  else if (DestVT == MVT::i64) {
4402  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4403  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4404  TII.get(AArch64::SUBREG_TO_REG), Src64)
4405  .addImm(0)
4406  .addReg(SrcReg)
4407  .addImm(AArch64::sub_32);
4408  SrcReg = Src64;
4409  }
4410 
4411  const TargetRegisterClass *RC =
4412  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4413  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4414 }
4415 
4416 static bool isZExtLoad(const MachineInstr *LI) {
4417  switch (LI->getOpcode()) {
4418  default:
4419  return false;
4420  case AArch64::LDURBBi:
4421  case AArch64::LDURHHi:
4422  case AArch64::LDURWi:
4423  case AArch64::LDRBBui:
4424  case AArch64::LDRHHui:
4425  case AArch64::LDRWui:
4426  case AArch64::LDRBBroX:
4427  case AArch64::LDRHHroX:
4428  case AArch64::LDRWroX:
4429  case AArch64::LDRBBroW:
4430  case AArch64::LDRHHroW:
4431  case AArch64::LDRWroW:
4432  return true;
4433  }
4434 }
4435 
4436 static bool isSExtLoad(const MachineInstr *LI) {
4437  switch (LI->getOpcode()) {
4438  default:
4439  return false;
4440  case AArch64::LDURSBWi:
4441  case AArch64::LDURSHWi:
4442  case AArch64::LDURSBXi:
4443  case AArch64::LDURSHXi:
4444  case AArch64::LDURSWi:
4445  case AArch64::LDRSBWui:
4446  case AArch64::LDRSHWui:
4447  case AArch64::LDRSBXui:
4448  case AArch64::LDRSHXui:
4449  case AArch64::LDRSWui:
4450  case AArch64::LDRSBWroX:
4451  case AArch64::LDRSHWroX:
4452  case AArch64::LDRSBXroX:
4453  case AArch64::LDRSHXroX:
4454  case AArch64::LDRSWroX:
4455  case AArch64::LDRSBWroW:
4456  case AArch64::LDRSHWroW:
4457  case AArch64::LDRSBXroW:
4458  case AArch64::LDRSHXroW:
4459  case AArch64::LDRSWroW:
4460  return true;
4461  }
4462 }
4463 
4464 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4465  MVT SrcVT) {
4466  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4467  if (!LI || !LI->hasOneUse())
4468  return false;
4469 
4470  // Check if the load instruction has already been selected.
4471  unsigned Reg = lookUpRegForValue(LI);
4472  if (!Reg)
4473  return false;
4474 
4475  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4476  if (!MI)
4477  return false;
4478 
4479  // Check if the correct load instruction has been emitted - SelectionDAG might
4480  // have emitted a zero-extending load, but we need a sign-extending load.
4481  bool IsZExt = isa<ZExtInst>(I);
4482  const auto *LoadMI = MI;
4483  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4484  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4485  unsigned LoadReg = MI->getOperand(1).getReg();
4486  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4487  assert(LoadMI && "Expected valid instruction");
4488  }
4489  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4490  return false;
4491 
4492  // Nothing to be done.
4493  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4494  updateValueMap(I, Reg);
4495  return true;
4496  }
4497 
4498  if (IsZExt) {
4499  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4501  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4502  .addImm(0)
4503  .addReg(Reg, getKillRegState(true))
4504  .addImm(AArch64::sub_32);
4505  Reg = Reg64;
4506  } else {
4507  assert((MI->getOpcode() == TargetOpcode::COPY &&
4508  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4509  "Expected copy instruction");
4510  Reg = MI->getOperand(1).getReg();
4511  MI->eraseFromParent();
4512  }
4513  updateValueMap(I, Reg);
4514  return true;
4515 }
4516 
4517 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4518  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4519  "Unexpected integer extend instruction.");
4520  MVT RetVT;
4521  MVT SrcVT;
4522  if (!isTypeSupported(I->getType(), RetVT))
4523  return false;
4524 
4525  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4526  return false;
4527 
4528  // Try to optimize already sign-/zero-extended values from load instructions.
4529  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4530  return true;
4531 
4532  unsigned SrcReg = getRegForValue(I->getOperand(0));
4533  if (!SrcReg)
4534  return false;
4535  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4536 
4537  // Try to optimize already sign-/zero-extended values from function arguments.
4538  bool IsZExt = isa<ZExtInst>(I);
4539  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4540  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4541  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4542  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4543  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4544  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4545  .addImm(0)
4546  .addReg(SrcReg, getKillRegState(SrcIsKill))
4547  .addImm(AArch64::sub_32);
4548  SrcReg = ResultReg;
4549  }
4550  // Conservatively clear all kill flags from all uses, because we are
4551  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4552  // level. The result of the instruction at IR level might have been
4553  // trivially dead, which is now not longer true.
4554  unsigned UseReg = lookUpRegForValue(I);
4555  if (UseReg)
4556  MRI.clearKillFlags(UseReg);
4557 
4558  updateValueMap(I, SrcReg);
4559  return true;
4560  }
4561  }
4562 
4563  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4564  if (!ResultReg)
4565  return false;
4566 
4567  updateValueMap(I, ResultReg);
4568  return true;
4569 }
4570 
4571 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4572  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4573  if (!DestEVT.isSimple())
4574  return false;
4575 
4576  MVT DestVT = DestEVT.getSimpleVT();
4577  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4578  return false;
4579 
4580  unsigned DivOpc;
4581  bool Is64bit = (DestVT == MVT::i64);
4582  switch (ISDOpcode) {
4583  default:
4584  return false;
4585  case ISD::SREM:
4586  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4587  break;
4588  case ISD::UREM:
4589  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4590  break;
4591  }
4592  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4593  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4594  if (!Src0Reg)
4595  return false;
4596  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4597 
4598  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4599  if (!Src1Reg)
4600  return false;
4601  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4602 
4603  const TargetRegisterClass *RC =
4604  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4605  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4606  Src1Reg, /*IsKill=*/false);
4607  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4608  // The remainder is computed as numerator - (quotient * denominator) using the
4609  // MSUB instruction.
4610  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4611  Src1Reg, Src1IsKill, Src0Reg,
4612  Src0IsKill);
4613  updateValueMap(I, ResultReg);
4614  return true;
4615 }
4616 
4617 bool AArch64FastISel::selectMul(const Instruction *I) {
4618  MVT VT;
4619  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4620  return false;
4621 
4622  if (VT.isVector())
4623  return selectBinaryOp(I, ISD::MUL);
4624 
4625  const Value *Src0 = I->getOperand(0);
4626  const Value *Src1 = I->getOperand(1);
4627  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4628  if (C->getValue().isPowerOf2())
4629  std::swap(Src0, Src1);
4630 
4631  // Try to simplify to a shift instruction.
4632  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4633  if (C->getValue().isPowerOf2()) {
4634  uint64_t ShiftVal = C->getValue().logBase2();
4635  MVT SrcVT = VT;
4636  bool IsZExt = true;
4637  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4638  if (!isIntExtFree(ZExt)) {
4639  MVT VT;
4640  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4641  SrcVT = VT;
4642  IsZExt = true;
4643  Src0 = ZExt->getOperand(0);
4644  }
4645  }
4646  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4647  if (!isIntExtFree(SExt)) {
4648  MVT VT;
4649  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4650  SrcVT = VT;
4651  IsZExt = false;
4652  Src0 = SExt->getOperand(0);
4653  }
4654  }
4655  }
4656 
4657  unsigned Src0Reg = getRegForValue(Src0);
4658  if (!Src0Reg)
4659  return false;
4660  bool Src0IsKill = hasTrivialKill(Src0);
4661 
4662  unsigned ResultReg =
4663  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4664 
4665  if (ResultReg) {
4666  updateValueMap(I, ResultReg);
4667  return true;
4668  }
4669  }
4670 
4671  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4672  if (!Src0Reg)
4673  return false;
4674  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4675 
4676  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4677  if (!Src1Reg)
4678  return false;
4679  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4680 
4681  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4682 
4683  if (!ResultReg)
4684  return false;
4685 
4686  updateValueMap(I, ResultReg);
4687  return true;
4688 }
4689 
4690 bool AArch64FastISel::selectShift(const Instruction *I) {
4691  MVT RetVT;
4692  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4693  return false;
4694 
4695  if (RetVT.isVector())
4696  return selectOperator(I, I->getOpcode());
4697 
4698  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4699  unsigned ResultReg = 0;
4700  uint64_t ShiftVal = C->getZExtValue();
4701  MVT SrcVT = RetVT;
4702  bool IsZExt = I->getOpcode() != Instruction::AShr;
4703  const Value *Op0 = I->getOperand(0);
4704  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4705  if (!isIntExtFree(ZExt)) {
4706  MVT TmpVT;
4707  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4708  SrcVT = TmpVT;
4709  IsZExt = true;
4710  Op0 = ZExt->getOperand(0);
4711  }
4712  }
4713  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4714  if (!isIntExtFree(SExt)) {
4715  MVT TmpVT;
4716  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4717  SrcVT = TmpVT;
4718  IsZExt = false;
4719  Op0 = SExt->getOperand(0);
4720  }
4721  }
4722  }
4723 
4724  unsigned Op0Reg = getRegForValue(Op0);
4725  if (!Op0Reg)
4726  return false;
4727  bool Op0IsKill = hasTrivialKill(Op0);
4728 
4729  switch (I->getOpcode()) {
4730  default: llvm_unreachable("Unexpected instruction.");
4731  case Instruction::Shl:
4732  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4733  break;
4734  case Instruction::AShr:
4735  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4736  break;
4737  case Instruction::LShr:
4738  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4739  break;
4740  }
4741  if (!ResultReg)
4742  return false;
4743 
4744  updateValueMap(I, ResultReg);
4745  return true;
4746  }
4747 
4748  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4749  if (!Op0Reg)
4750  return false;
4751  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4752 
4753  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4754  if (!Op1Reg)
4755  return false;
4756  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4757 
4758  unsigned ResultReg = 0;
4759  switch (I->getOpcode()) {
4760  default: llvm_unreachable("Unexpected instruction.");
4761  case Instruction::Shl:
4762  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4763  break;
4764  case Instruction::AShr:
4765  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4766  break;
4767  case Instruction::LShr:
4768  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4769  break;
4770  }
4771 
4772  if (!ResultReg)
4773  return false;
4774 
4775  updateValueMap(I, ResultReg);
4776  return true;
4777 }
4778 
4779 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4780  MVT RetVT, SrcVT;
4781 
4782  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4783  return false;
4784  if (!isTypeLegal(I->getType(), RetVT))
4785  return false;
4786 
4787  unsigned Opc;
4788  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4789  Opc = AArch64::FMOVWSr;
4790  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4791  Opc = AArch64::FMOVXDr;
4792  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4793  Opc = AArch64::FMOVSWr;
4794  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4795  Opc = AArch64::FMOVDXr;
4796  else
4797  return false;
4798 
4799  const TargetRegisterClass *RC = nullptr;
4800  switch (RetVT.SimpleTy) {
4801  default: llvm_unreachable("Unexpected value type.");
4802  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4803  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4804  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4805  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4806  }
4807  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4808  if (!Op0Reg)
4809  return false;
4810  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4811  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4812 
4813  if (!ResultReg)
4814  return false;
4815 
4816  updateValueMap(I, ResultReg);
4817  return true;
4818 }
4819 
4820 bool AArch64FastISel::selectFRem(const Instruction *I) {
4821  MVT RetVT;
4822  if (!isTypeLegal(I->getType(), RetVT))
4823  return false;
4824 
4825  RTLIB::Libcall LC;
4826  switch (RetVT.SimpleTy) {
4827  default:
4828  return false;
4829  case MVT::f32:
4830  LC = RTLIB::REM_F32;
4831  break;
4832  case MVT::f64:
4833  LC = RTLIB::REM_F64;
4834  break;
4835  }
4836 
4837  ArgListTy Args;
4838  Args.reserve(I->getNumOperands());
4839 
4840  // Populate the argument list.
4841  for (auto &Arg : I->operands()) {
4842  ArgListEntry Entry;
4843  Entry.Val = Arg;
4844  Entry.Ty = Arg->getType();
4845  Args.push_back(Entry);
4846  }
4847 
4848  CallLoweringInfo CLI;
4849  MCContext &Ctx = MF->getContext();
4850  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4851  TLI.getLibcallName(LC), std::move(Args));
4852  if (!lowerCallTo(CLI))
4853  return false;
4854  updateValueMap(I, CLI.ResultReg);
4855  return true;
4856 }
4857 
4858 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4859  MVT VT;
4860  if (!isTypeLegal(I->getType(), VT))
4861  return false;
4862 
4863  if (!isa<ConstantInt>(I->getOperand(1)))
4864  return selectBinaryOp(I, ISD::SDIV);
4865 
4866  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4867  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4868  !(C.isPowerOf2() || (-C).isPowerOf2()))
4869  return selectBinaryOp(I, ISD::SDIV);
4870 
4871  unsigned Lg2 = C.countTrailingZeros();
4872  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4873  if (!Src0Reg)
4874  return false;
4875  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4876 
4877  if (cast<BinaryOperator>(I)->isExact()) {
4878  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4879  if (!ResultReg)
4880  return false;
4881  updateValueMap(I, ResultReg);
4882  return true;
4883  }
4884 
4885  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4886  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4887  if (!AddReg)
4888  return false;
4889 
4890  // (Src0 < 0) ? Pow2 - 1 : 0;
4891  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4892  return false;
4893 
4894  unsigned SelectOpc;
4895  const TargetRegisterClass *RC;
4896  if (VT == MVT::i64) {
4897  SelectOpc = AArch64::CSELXr;
4898  RC = &AArch64::GPR64RegClass;
4899  } else {
4900  SelectOpc = AArch64::CSELWr;
4901  RC = &AArch64::GPR32RegClass;
4902  }
4903  unsigned SelectReg =
4904  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4905  Src0IsKill, AArch64CC::LT);
4906  if (!SelectReg)
4907  return false;
4908 
4909  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4910  // negate the result.
4911  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4912  unsigned ResultReg;
4913  if (C.isNegative())
4914  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4915  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4916  else
4917  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4918 
4919  if (!ResultReg)
4920  return false;
4921 
4922  updateValueMap(I, ResultReg);
4923  return true;
4924 }
4925 
4926 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4927 /// have to duplicate it for AArch64, because otherwise we would fail during the
4928 /// sign-extend emission.
4929 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4930  unsigned IdxN = getRegForValue(Idx);
4931  if (IdxN == 0)
4932  // Unhandled operand. Halt "fast" selection and bail.
4933  return std::pair<unsigned, bool>(0, false);
4934 
4935  bool IdxNIsKill = hasTrivialKill(Idx);
4936 
4937  // If the index is smaller or larger than intptr_t, truncate or extend it.
4938  MVT PtrVT = TLI.getPointerTy(DL);
4939  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4940  if (IdxVT.bitsLT(PtrVT)) {
4941  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4942  IdxNIsKill = true;
4943  } else if (IdxVT.bitsGT(PtrVT))
4944  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4945  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4946 }
4947 
4948 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4949 /// duplicate it for AArch64, because otherwise we would bail out even for
4950 /// simple cases. This is because the standard fastEmit functions don't cover
4951 /// MUL at all and ADD is lowered very inefficientily.
4952 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4953  unsigned N = getRegForValue(I->getOperand(0));
4954  if (!N)
4955  return false;
4956  bool NIsKill = hasTrivialKill(I->getOperand(0));
4957 
4958  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4959  // into a single N = N + TotalOffset.
4960  uint64_t TotalOffs = 0;
4961  MVT VT = TLI.getPointerTy(DL);
4962  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4963  GTI != E; ++GTI) {
4964  const Value *Idx = GTI.getOperand();
4965  if (auto *StTy = GTI.getStructTypeOrNull()) {
4966  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4967  // N = N + Offset
4968  if (Field)
4969  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4970  } else {
4971  Type *Ty = GTI.getIndexedType();
4972 
4973  // If this is a constant subscript, handle it quickly.
4974  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4975  if (CI->isZero())
4976  continue;
4977  // N = N + Offset
4978  TotalOffs +=
4979  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4980  continue;
4981  }
4982  if (TotalOffs) {
4983  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4984  if (!N)
4985  return false;
4986  NIsKill = true;
4987  TotalOffs = 0;
4988  }
4989 
4990  // N = N + Idx * ElementSize;
4991  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4992  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4993  unsigned IdxN = Pair.first;
4994  bool IdxNIsKill = Pair.second;
4995  if (!IdxN)
4996  return false;
4997 
4998  if (ElementSize != 1) {
4999  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5000  if (!C)
5001  return false;
5002  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5003  if (!IdxN)
5004  return false;
5005  IdxNIsKill = true;
5006  }
5007  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5008  if (!N)
5009  return false;
5010  }
5011  }
5012  if (TotalOffs) {
5013  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5014  if (!N)
5015  return false;
5016  }
5017  updateValueMap(I, N);
5018  return true;
5019 }
5020 
5021 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5022  assert(TM.getOptLevel() == CodeGenOpt::None &&
5023  "cmpxchg survived AtomicExpand at optlevel > -O0");
5024 
5025  auto *RetPairTy = cast<StructType>(I->getType());
5026  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5027  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5028  "cmpxchg has a non-i1 status result");
5029 
5030  MVT VT;
5031  if (!isTypeLegal(RetTy, VT))
5032  return false;
5033 
5034  const TargetRegisterClass *ResRC;
5035  unsigned Opc, CmpOpc;
5036  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5037  // extractvalue selection doesn't support that.
5038  if (VT == MVT::i32) {
5039  Opc = AArch64::CMP_SWAP_32;
5040  CmpOpc = AArch64::SUBSWrs;
5041  ResRC = &AArch64::GPR32RegClass;
5042  } else if (VT == MVT::i64) {
5043  Opc = AArch64::CMP_SWAP_64;
5044  CmpOpc = AArch64::SUBSXrs;
5045  ResRC = &AArch64::GPR64RegClass;
5046  } else {
5047  return false;
5048  }
5049 
5050  const MCInstrDesc &II = TII.get(Opc);
5051 
5052  const unsigned AddrReg = constrainOperandRegClass(
5053  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5054  const unsigned DesiredReg = constrainOperandRegClass(
5055  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5056  const unsigned NewReg = constrainOperandRegClass(
5057  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5058 
5059  const unsigned ResultReg1 = createResultReg(ResRC);
5060  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5061  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5062 
5063  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5064  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5065  .addDef(ResultReg1)
5066  .addDef(ScratchReg)
5067  .addUse(AddrReg)
5068  .addUse(DesiredReg)
5069  .addUse(NewReg);
5070 
5071  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5072  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5073  .addUse(ResultReg1)
5074  .addUse(DesiredReg)
5075  .addImm(0);
5076 
5077  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5078  .addDef(ResultReg2)
5079  .addUse(AArch64::WZR)
5080  .addUse(AArch64::WZR)
5082 
5083  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5084  updateValueMap(I, ResultReg1, 2);
5085  return true;
5086 }
5087 
5088 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5089  switch (I->getOpcode()) {
5090  default:
5091  break;
5092  case Instruction::Add:
5093  case Instruction::Sub:
5094  return selectAddSub(I);
5095  case Instruction::Mul:
5096  return selectMul(I);
5097  case Instruction::SDiv:
5098  return selectSDiv(I);
5099  case Instruction::SRem:
5100  if (!selectBinaryOp(I, ISD::SREM))
5101  return selectRem(I, ISD::SREM);
5102  return true;
5103  case Instruction::URem:
5104  if (!selectBinaryOp(I, ISD::UREM))
5105  return selectRem(I, ISD::UREM);
5106  return true;
5107  case Instruction::Shl:
5108  case Instruction::LShr:
5109  case Instruction::AShr:
5110  return selectShift(I);
5111  case Instruction::And:
5112  case Instruction::Or:
5113  case Instruction::Xor:
5114  return selectLogicalOp(I);
5115  case Instruction::Br:
5116  return selectBranch(I);
5117  case Instruction::IndirectBr:
5118  return selectIndirectBr(I);
5119  case Instruction::BitCast:
5120  if (!FastISel::selectBitCast(I))
5121  return selectBitCast(I);
5122  return true;
5123  case Instruction::FPToSI:
5124  if (!selectCast(I, ISD::FP_TO_SINT))
5125  return selectFPToInt(I, /*Signed=*/true);
5126  return true;
5127  case Instruction::FPToUI:
5128  return selectFPToInt(I, /*Signed=*/false);
5129  case Instruction::ZExt:
5130  case Instruction::SExt:
5131  return selectIntExt(I);
5132  case Instruction::Trunc:
5133  if (!selectCast(I, ISD::TRUNCATE))
5134  return selectTrunc(I);
5135  return true;
5136  case Instruction::FPExt:
5137  return selectFPExt(I);
5138  case Instruction::FPTrunc:
5139  return selectFPTrunc(I);
5140  case Instruction::SIToFP:
5141  if (!selectCast(I, ISD::SINT_TO_FP))
5142  return selectIntToFP(I, /*Signed=*/true);
5143  return true;
5144  case Instruction::UIToFP:
5145  return selectIntToFP(I, /*Signed=*/false);
5146  case Instruction::Load:
5147  return selectLoad(I);
5148  case Instruction::Store:
5149  return selectStore(I);
5150  case Instruction::FCmp:
5151  case Instruction::ICmp:
5152  return selectCmp(I);
5153  case Instruction::Select:
5154  return selectSelect(I);
5155  case Instruction::Ret:
5156  return selectRet(I);
5157  case Instruction::FRem:
5158  return selectFRem(I);
5159  case Instruction::GetElementPtr:
5160  return selectGetElementPtr(I);
5161  case Instruction::AtomicCmpXchg:
5162  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5163  }
5164 
5165  // Silence warnings.
5166  (void)&CC_AArch64_DarwinPCS_VarArg;
5167  (void)&CC_AArch64_Win64_VarArg;
5168 
5169  // fall-back to target-independent instruction selection.
5170  return selectOperator(I, I->getOpcode());
5171 }
5172 
5173 namespace llvm {
5174 
5176  const TargetLibraryInfo *LibInfo) {
5177  return new AArch64FastISel(FuncInfo, LibInfo);
5178 }
5179 
5180 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:631
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool hasCustomCallingConv() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:518
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:667
unsigned less than
Definition: InstrTypes.h:666
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:647
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:657
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:376
Value * getLength() const
bool isAnyArgRegReserved(const MachineFunction &MF) const
unsigned getFrameRegister(const MachineFunction &MF) const override
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:652
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1632
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:521
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:651
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:740
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Class to represent struct types.
Definition: DerivedTypes.h:201
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isUnsigned() const
Definition: InstrTypes.h:817
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:648
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:63
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:885
iterator_range< succ_op_iterator > successors()
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:467
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:305
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:170
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:513
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
Value * getOperand(unsigned i_nocapture) const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:618
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:641
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
op_range operands()
Definition: User.h:238
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:650
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:658
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:34
iterator_range< User::op_iterator > arg_operands()
Iteration adapter for range-for loops.
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:656
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:668
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:299
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool isReleaseOrStronger(AtomicOrdering ao)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:645
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:655
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:670
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.