LLVM  9.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/FastISel.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82  using BaseKind = enum {
83  RegBase,
84  FrameIndexBase
85  };
86 
87  private:
88  BaseKind Kind = RegBase;
90  union {
91  unsigned Reg;
92  int FI;
93  } Base;
94  unsigned OffsetReg = 0;
95  unsigned Shift = 0;
96  int64_t Offset = 0;
97  const GlobalValue *GV = nullptr;
98 
99  public:
100  Address() { Base.Reg = 0; }
101 
102  void setKind(BaseKind K) { Kind = K; }
103  BaseKind getKind() const { return Kind; }
104  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  bool isRegBase() const { return Kind == RegBase; }
107  bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109  void setReg(unsigned Reg) {
110  assert(isRegBase() && "Invalid base register access!");
111  Base.Reg = Reg;
112  }
113 
114  unsigned getReg() const {
115  assert(isRegBase() && "Invalid base register access!");
116  return Base.Reg;
117  }
118 
119  void setOffsetReg(unsigned Reg) {
120  OffsetReg = Reg;
121  }
122 
123  unsigned getOffsetReg() const {
124  return OffsetReg;
125  }
126 
127  void setFI(unsigned FI) {
128  assert(isFIBase() && "Invalid base frame index access!");
129  Base.FI = FI;
130  }
131 
132  unsigned getFI() const {
133  assert(isFIBase() && "Invalid base frame index access!");
134  return Base.FI;
135  }
136 
137  void setOffset(int64_t O) { Offset = O; }
138  int64_t getOffset() { return Offset; }
139  void setShift(unsigned S) { Shift = S; }
140  unsigned getShift() { return Shift; }
141 
142  void setGlobalValue(const GlobalValue *G) { GV = G; }
143  const GlobalValue *getGlobalValue() { return GV; }
144  };
145 
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
150 
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190  unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193  unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195  const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199 
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202  const Value *RHS, bool SetFlags = false,
203  bool WantResult = true, bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206  bool SetFlags = false, bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209  bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212  AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218  uint64_t ShiftImm, bool SetFlags = false,
219  bool WantResult = true);
220 
221  // Emit functions.
222  bool emitCompareAndBranch(const BranchInst *BI);
223  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228  MachineMemOperand *MMO = nullptr);
229  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230  MachineMemOperand *MMO = nullptr);
231  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232  MachineMemOperand *MMO = nullptr);
233  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236  bool SetFlags = false, bool WantResult = true,
237  bool IsZExt = false);
238  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240  bool SetFlags = false, bool WantResult = true,
241  bool IsZExt = false);
242  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245  unsigned RHSReg, bool RHSIsKill,
246  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247  bool WantResult = true);
248  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249  const Value *RHS);
250  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  bool LHSIsKill, uint64_t Imm);
252  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254  uint64_t ShiftImm);
255  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257  unsigned Op1, bool Op1IsKill);
258  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259  unsigned Op1, bool Op1IsKill);
260  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261  unsigned Op1, bool Op1IsKill);
262  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263  unsigned Op1Reg, bool Op1IsKill);
264  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265  uint64_t Imm, bool IsZExt = true);
266  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267  unsigned Op1Reg, bool Op1IsKill);
268  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269  uint64_t Imm, bool IsZExt = true);
270  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271  unsigned Op1Reg, bool Op1IsKill);
272  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273  uint64_t Imm, bool IsZExt = false);
274 
275  unsigned materializeInt(const ConstantInt *CI, MVT VT);
276  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277  unsigned materializeGV(const GlobalValue *GV);
278 
279  // Call handling routines.
280 private:
281  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283  unsigned &NumBytes);
284  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285 
286 public:
287  // Backend specific FastISel code.
288  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289  unsigned fastMaterializeConstant(const Constant *C) override;
290  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291 
292  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293  const TargetLibraryInfo *LibInfo)
294  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295  Subtarget =
296  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297  Context = &FuncInfo.Fn->getContext();
298  }
299 
300  bool fastSelectInstruction(const Instruction *I) override;
301 
302 #include "AArch64GenFastISel.inc"
303 };
304 
305 } // end anonymous namespace
306 
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310  "Unexpected integer extend instruction.");
311  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312  "Unexpected value type.");
313  bool IsZExt = isa<ZExtInst>(I);
314 
315  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316  if (LI->hasOneUse())
317  return true;
318 
319  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321  return true;
322 
323  return false;
324 }
325 
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329  switch (VT.SimpleTy) {
330  default:
331  return 0; // invalid
332  case MVT::i1: // fall-through
333  case MVT::i8:
334  return 1;
335  case MVT::i16:
336  return 2;
337  case MVT::i32: // fall-through
338  case MVT::f32:
339  return 4;
340  case MVT::i64: // fall-through
341  case MVT::f64:
342  return 8;
343  }
344 }
345 
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347  if (CC == CallingConv::WebKit_JS)
348  return CC_AArch64_WebKit_JS;
349  if (CC == CallingConv::GHC)
350  return CC_AArch64_GHC;
351  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
352 }
353 
354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356  "Alloca should always return a pointer.");
357 
358  // Don't handle dynamic allocas.
359  if (!FuncInfo.StaticAllocaMap.count(AI))
360  return 0;
361 
363  FuncInfo.StaticAllocaMap.find(AI);
364 
365  if (SI != FuncInfo.StaticAllocaMap.end()) {
366  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368  ResultReg)
369  .addFrameIndex(SI->second)
370  .addImm(0)
371  .addImm(0);
372  return ResultReg;
373  }
374 
375  return 0;
376 }
377 
378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379  if (VT > MVT::i64)
380  return 0;
381 
382  if (!CI->isZero())
383  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
384 
385  // Create a copy from the zero register to materialize a "0" value.
386  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
387  : &AArch64::GPR32RegClass;
388  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
389  unsigned ResultReg = createResultReg(RC);
390  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391  ResultReg).addReg(ZeroReg, getKillRegState(true));
392  return ResultReg;
393 }
394 
395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396  // Positive zero (+0.0) has to be materialized with a fmov from the zero
397  // register, because the immediate version of fmov cannot encode zero.
398  if (CFP->isNullValue())
399  return fastMaterializeFloatZero(CFP);
400 
401  if (VT != MVT::f32 && VT != MVT::f64)
402  return 0;
403 
404  const APFloat Val = CFP->getValueAPF();
405  bool Is64Bit = (VT == MVT::f64);
406  // This checks to see if we can use FMOV instructions to materialize
407  // a constant, otherwise we have to materialize via the constant pool.
408  int Imm =
409  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
410  if (Imm != -1) {
411  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
412  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
413  }
414 
415  // For the MachO large code model materialize the FP constant in code.
416  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
417  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
418  const TargetRegisterClass *RC = Is64Bit ?
419  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
420 
421  unsigned TmpReg = createResultReg(RC);
422  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
424 
425  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427  TII.get(TargetOpcode::COPY), ResultReg)
428  .addReg(TmpReg, getKillRegState(true));
429 
430  return ResultReg;
431  }
432 
433  // Materialize via constant pool. MachineConstantPool wants an explicit
434  // alignment.
435  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436  if (Align == 0)
437  Align = DL.getTypeAllocSize(CFP->getType());
438 
439  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443 
444  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447  .addReg(ADRPReg)
449  return ResultReg;
450 }
451 
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453  // We can't handle thread-local variables quickly yet.
454  if (GV->isThreadLocal())
455  return 0;
456 
457  // MachO still uses GOT for large code-model accesses, but ELF requires
458  // movz/movk sequences, which FastISel doesn't handle yet.
459  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460  return 0;
461 
462  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463 
464  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465  if (!DestEVT.isSimple())
466  return 0;
467 
468  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469  unsigned ResultReg;
470 
471  if (OpFlags & AArch64II::MO_GOT) {
472  // ADRP + LDRX
473  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474  ADRPReg)
475  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476 
477  ResultReg = createResultReg(&AArch64::GPR64RegClass);
478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
479  ResultReg)
480  .addReg(ADRPReg)
481  .addGlobalAddress(GV, 0,
483  } else {
484  // ADRP + ADDX
485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
486  ADRPReg)
487  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
488 
489  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
490  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
491  ResultReg)
492  .addReg(ADRPReg)
493  .addGlobalAddress(GV, 0,
495  .addImm(0);
496  }
497  return ResultReg;
498 }
499 
500 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
501  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
502 
503  // Only handle simple types.
504  if (!CEVT.isSimple())
505  return 0;
506  MVT VT = CEVT.getSimpleVT();
507 
508  if (const auto *CI = dyn_cast<ConstantInt>(C))
509  return materializeInt(CI, VT);
510  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
511  return materializeFP(CFP, VT);
512  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
513  return materializeGV(GV);
514 
515  return 0;
516 }
517 
518 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
519  assert(CFP->isNullValue() &&
520  "Floating-point constant is not a positive zero.");
521  MVT VT;
522  if (!isTypeLegal(CFP->getType(), VT))
523  return 0;
524 
525  if (VT != MVT::f32 && VT != MVT::f64)
526  return 0;
527 
528  bool Is64Bit = (VT == MVT::f64);
529  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
530  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
531  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
532 }
533 
534 /// Check if the multiply is by a power-of-2 constant.
535 static bool isMulPowOf2(const Value *I) {
536  if (const auto *MI = dyn_cast<MulOperator>(I)) {
537  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
538  if (C->getValue().isPowerOf2())
539  return true;
540  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
541  if (C->getValue().isPowerOf2())
542  return true;
543  }
544  return false;
545 }
546 
547 // Computes the address to get to an object.
548 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
549 {
550  const User *U = nullptr;
551  unsigned Opcode = Instruction::UserOp1;
552  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
553  // Don't walk into other basic blocks unless the object is an alloca from
554  // another block, otherwise it may not have a virtual register assigned.
555  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
556  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
557  Opcode = I->getOpcode();
558  U = I;
559  }
560  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
561  Opcode = C->getOpcode();
562  U = C;
563  }
564 
565  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
566  if (Ty->getAddressSpace() > 255)
567  // Fast instruction selection doesn't support the special
568  // address spaces.
569  return false;
570 
571  switch (Opcode) {
572  default:
573  break;
574  case Instruction::BitCast:
575  // Look through bitcasts.
576  return computeAddress(U->getOperand(0), Addr, Ty);
577 
578  case Instruction::IntToPtr:
579  // Look past no-op inttoptrs.
580  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
581  TLI.getPointerTy(DL))
582  return computeAddress(U->getOperand(0), Addr, Ty);
583  break;
584 
585  case Instruction::PtrToInt:
586  // Look past no-op ptrtoints.
587  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
588  return computeAddress(U->getOperand(0), Addr, Ty);
589  break;
590 
591  case Instruction::GetElementPtr: {
592  Address SavedAddr = Addr;
593  uint64_t TmpOffset = Addr.getOffset();
594 
595  // Iterate through the GEP folding the constants into offsets where
596  // we can.
597  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
598  GTI != E; ++GTI) {
599  const Value *Op = GTI.getOperand();
600  if (StructType *STy = GTI.getStructTypeOrNull()) {
601  const StructLayout *SL = DL.getStructLayout(STy);
602  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
603  TmpOffset += SL->getElementOffset(Idx);
604  } else {
605  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
606  while (true) {
607  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
608  // Constant-offset addressing.
609  TmpOffset += CI->getSExtValue() * S;
610  break;
611  }
612  if (canFoldAddIntoGEP(U, Op)) {
613  // A compatible add with a constant operand. Fold the constant.
614  ConstantInt *CI =
615  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
616  TmpOffset += CI->getSExtValue() * S;
617  // Iterate on the other operand.
618  Op = cast<AddOperator>(Op)->getOperand(0);
619  continue;
620  }
621  // Unsupported
622  goto unsupported_gep;
623  }
624  }
625  }
626 
627  // Try to grab the base operand now.
628  Addr.setOffset(TmpOffset);
629  if (computeAddress(U->getOperand(0), Addr, Ty))
630  return true;
631 
632  // We failed, restore everything and try the other options.
633  Addr = SavedAddr;
634 
635  unsupported_gep:
636  break;
637  }
638  case Instruction::Alloca: {
639  const AllocaInst *AI = cast<AllocaInst>(Obj);
641  FuncInfo.StaticAllocaMap.find(AI);
642  if (SI != FuncInfo.StaticAllocaMap.end()) {
643  Addr.setKind(Address::FrameIndexBase);
644  Addr.setFI(SI->second);
645  return true;
646  }
647  break;
648  }
649  case Instruction::Add: {
650  // Adds of constants are common and easy enough.
651  const Value *LHS = U->getOperand(0);
652  const Value *RHS = U->getOperand(1);
653 
654  if (isa<ConstantInt>(LHS))
655  std::swap(LHS, RHS);
656 
657  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
658  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
659  return computeAddress(LHS, Addr, Ty);
660  }
661 
662  Address Backup = Addr;
663  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
664  return true;
665  Addr = Backup;
666 
667  break;
668  }
669  case Instruction::Sub: {
670  // Subs of constants are common and easy enough.
671  const Value *LHS = U->getOperand(0);
672  const Value *RHS = U->getOperand(1);
673 
674  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
676  return computeAddress(LHS, Addr, Ty);
677  }
678  break;
679  }
680  case Instruction::Shl: {
681  if (Addr.getOffsetReg())
682  break;
683 
684  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
685  if (!CI)
686  break;
687 
688  unsigned Val = CI->getZExtValue();
689  if (Val < 1 || Val > 3)
690  break;
691 
692  uint64_t NumBytes = 0;
693  if (Ty && Ty->isSized()) {
694  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
695  NumBytes = NumBits / 8;
696  if (!isPowerOf2_64(NumBits))
697  NumBytes = 0;
698  }
699 
700  if (NumBytes != (1ULL << Val))
701  break;
702 
703  Addr.setShift(Val);
704  Addr.setExtendType(AArch64_AM::LSL);
705 
706  const Value *Src = U->getOperand(0);
707  if (const auto *I = dyn_cast<Instruction>(Src)) {
708  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
709  // Fold the zext or sext when it won't become a noop.
710  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
711  if (!isIntExtFree(ZE) &&
712  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
713  Addr.setExtendType(AArch64_AM::UXTW);
714  Src = ZE->getOperand(0);
715  }
716  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
717  if (!isIntExtFree(SE) &&
718  SE->getOperand(0)->getType()->isIntegerTy(32)) {
719  Addr.setExtendType(AArch64_AM::SXTW);
720  Src = SE->getOperand(0);
721  }
722  }
723  }
724  }
725 
726  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
727  if (AI->getOpcode() == Instruction::And) {
728  const Value *LHS = AI->getOperand(0);
729  const Value *RHS = AI->getOperand(1);
730 
731  if (const auto *C = dyn_cast<ConstantInt>(LHS))
732  if (C->getValue() == 0xffffffff)
733  std::swap(LHS, RHS);
734 
735  if (const auto *C = dyn_cast<ConstantInt>(RHS))
736  if (C->getValue() == 0xffffffff) {
737  Addr.setExtendType(AArch64_AM::UXTW);
738  unsigned Reg = getRegForValue(LHS);
739  if (!Reg)
740  return false;
741  bool RegIsKill = hasTrivialKill(LHS);
742  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
743  AArch64::sub_32);
744  Addr.setOffsetReg(Reg);
745  return true;
746  }
747  }
748 
749  unsigned Reg = getRegForValue(Src);
750  if (!Reg)
751  return false;
752  Addr.setOffsetReg(Reg);
753  return true;
754  }
755  case Instruction::Mul: {
756  if (Addr.getOffsetReg())
757  break;
758 
759  if (!isMulPowOf2(U))
760  break;
761 
762  const Value *LHS = U->getOperand(0);
763  const Value *RHS = U->getOperand(1);
764 
765  // Canonicalize power-of-2 value to the RHS.
766  if (const auto *C = dyn_cast<ConstantInt>(LHS))
767  if (C->getValue().isPowerOf2())
768  std::swap(LHS, RHS);
769 
770  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
771  const auto *C = cast<ConstantInt>(RHS);
772  unsigned Val = C->getValue().logBase2();
773  if (Val < 1 || Val > 3)
774  break;
775 
776  uint64_t NumBytes = 0;
777  if (Ty && Ty->isSized()) {
778  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
779  NumBytes = NumBits / 8;
780  if (!isPowerOf2_64(NumBits))
781  NumBytes = 0;
782  }
783 
784  if (NumBytes != (1ULL << Val))
785  break;
786 
787  Addr.setShift(Val);
788  Addr.setExtendType(AArch64_AM::LSL);
789 
790  const Value *Src = LHS;
791  if (const auto *I = dyn_cast<Instruction>(Src)) {
792  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
793  // Fold the zext or sext when it won't become a noop.
794  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
795  if (!isIntExtFree(ZE) &&
796  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
797  Addr.setExtendType(AArch64_AM::UXTW);
798  Src = ZE->getOperand(0);
799  }
800  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
801  if (!isIntExtFree(SE) &&
802  SE->getOperand(0)->getType()->isIntegerTy(32)) {
803  Addr.setExtendType(AArch64_AM::SXTW);
804  Src = SE->getOperand(0);
805  }
806  }
807  }
808  }
809 
810  unsigned Reg = getRegForValue(Src);
811  if (!Reg)
812  return false;
813  Addr.setOffsetReg(Reg);
814  return true;
815  }
816  case Instruction::And: {
817  if (Addr.getOffsetReg())
818  break;
819 
820  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
821  break;
822 
823  const Value *LHS = U->getOperand(0);
824  const Value *RHS = U->getOperand(1);
825 
826  if (const auto *C = dyn_cast<ConstantInt>(LHS))
827  if (C->getValue() == 0xffffffff)
828  std::swap(LHS, RHS);
829 
830  if (const auto *C = dyn_cast<ConstantInt>(RHS))
831  if (C->getValue() == 0xffffffff) {
832  Addr.setShift(0);
833  Addr.setExtendType(AArch64_AM::LSL);
834  Addr.setExtendType(AArch64_AM::UXTW);
835 
836  unsigned Reg = getRegForValue(LHS);
837  if (!Reg)
838  return false;
839  bool RegIsKill = hasTrivialKill(LHS);
840  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
841  AArch64::sub_32);
842  Addr.setOffsetReg(Reg);
843  return true;
844  }
845  break;
846  }
847  case Instruction::SExt:
848  case Instruction::ZExt: {
849  if (!Addr.getReg() || Addr.getOffsetReg())
850  break;
851 
852  const Value *Src = nullptr;
853  // Fold the zext or sext when it won't become a noop.
854  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
855  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
856  Addr.setExtendType(AArch64_AM::UXTW);
857  Src = ZE->getOperand(0);
858  }
859  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
860  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
861  Addr.setExtendType(AArch64_AM::SXTW);
862  Src = SE->getOperand(0);
863  }
864  }
865 
866  if (!Src)
867  break;
868 
869  Addr.setShift(0);
870  unsigned Reg = getRegForValue(Src);
871  if (!Reg)
872  return false;
873  Addr.setOffsetReg(Reg);
874  return true;
875  }
876  } // end switch
877 
878  if (Addr.isRegBase() && !Addr.getReg()) {
879  unsigned Reg = getRegForValue(Obj);
880  if (!Reg)
881  return false;
882  Addr.setReg(Reg);
883  return true;
884  }
885 
886  if (!Addr.getOffsetReg()) {
887  unsigned Reg = getRegForValue(Obj);
888  if (!Reg)
889  return false;
890  Addr.setOffsetReg(Reg);
891  return true;
892  }
893 
894  return false;
895 }
896 
897 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
898  const User *U = nullptr;
899  unsigned Opcode = Instruction::UserOp1;
900  bool InMBB = true;
901 
902  if (const auto *I = dyn_cast<Instruction>(V)) {
903  Opcode = I->getOpcode();
904  U = I;
905  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
906  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
907  Opcode = C->getOpcode();
908  U = C;
909  }
910 
911  switch (Opcode) {
912  default: break;
913  case Instruction::BitCast:
914  // Look past bitcasts if its operand is in the same BB.
915  if (InMBB)
916  return computeCallAddress(U->getOperand(0), Addr);
917  break;
918  case Instruction::IntToPtr:
919  // Look past no-op inttoptrs if its operand is in the same BB.
920  if (InMBB &&
921  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
922  TLI.getPointerTy(DL))
923  return computeCallAddress(U->getOperand(0), Addr);
924  break;
925  case Instruction::PtrToInt:
926  // Look past no-op ptrtoints if its operand is in the same BB.
927  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
928  return computeCallAddress(U->getOperand(0), Addr);
929  break;
930  }
931 
932  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
933  Addr.setGlobalValue(GV);
934  return true;
935  }
936 
937  // If all else fails, try to materialize the value in a register.
938  if (!Addr.getGlobalValue()) {
939  Addr.setReg(getRegForValue(V));
940  return Addr.getReg() != 0;
941  }
942 
943  return false;
944 }
945 
946 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
947  EVT evt = TLI.getValueType(DL, Ty, true);
948 
949  // Only handle simple types.
950  if (evt == MVT::Other || !evt.isSimple())
951  return false;
952  VT = evt.getSimpleVT();
953 
954  // This is a legal type, but it's not something we handle in fast-isel.
955  if (VT == MVT::f128)
956  return false;
957 
958  // Handle all other legal types, i.e. a register that will directly hold this
959  // value.
960  return TLI.isTypeLegal(VT);
961 }
962 
963 /// Determine if the value type is supported by FastISel.
964 ///
965 /// FastISel for AArch64 can handle more value types than are legal. This adds
966 /// simple value type such as i1, i8, and i16.
967 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
968  if (Ty->isVectorTy() && !IsVectorAllowed)
969  return false;
970 
971  if (isTypeLegal(Ty, VT))
972  return true;
973 
974  // If this is a type than can be sign or zero-extended to a basic operation
975  // go ahead and accept it now.
976  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
977  return true;
978 
979  return false;
980 }
981 
982 bool AArch64FastISel::isValueAvailable(const Value *V) const {
983  if (!isa<Instruction>(V))
984  return true;
985 
986  const auto *I = cast<Instruction>(V);
987  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
988 }
989 
990 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
991  unsigned ScaleFactor = getImplicitScaleFactor(VT);
992  if (!ScaleFactor)
993  return false;
994 
995  bool ImmediateOffsetNeedsLowering = false;
996  bool RegisterOffsetNeedsLowering = false;
997  int64_t Offset = Addr.getOffset();
998  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
999  ImmediateOffsetNeedsLowering = true;
1000  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1001  !isUInt<12>(Offset / ScaleFactor))
1002  ImmediateOffsetNeedsLowering = true;
1003 
1004  // Cannot encode an offset register and an immediate offset in the same
1005  // instruction. Fold the immediate offset into the load/store instruction and
1006  // emit an additional add to take care of the offset register.
1007  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1008  RegisterOffsetNeedsLowering = true;
1009 
1010  // Cannot encode zero register as base.
1011  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1012  RegisterOffsetNeedsLowering = true;
1013 
1014  // If this is a stack pointer and the offset needs to be simplified then put
1015  // the alloca address into a register, set the base type back to register and
1016  // continue. This should almost never happen.
1017  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1018  {
1019  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1020  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1021  ResultReg)
1022  .addFrameIndex(Addr.getFI())
1023  .addImm(0)
1024  .addImm(0);
1025  Addr.setKind(Address::RegBase);
1026  Addr.setReg(ResultReg);
1027  }
1028 
1029  if (RegisterOffsetNeedsLowering) {
1030  unsigned ResultReg = 0;
1031  if (Addr.getReg()) {
1032  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1033  Addr.getExtendType() == AArch64_AM::UXTW )
1034  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1035  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1036  /*TODO:IsKill=*/false, Addr.getExtendType(),
1037  Addr.getShift());
1038  else
1039  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1040  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1041  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1042  Addr.getShift());
1043  } else {
1044  if (Addr.getExtendType() == AArch64_AM::UXTW)
1045  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1046  /*Op0IsKill=*/false, Addr.getShift(),
1047  /*IsZExt=*/true);
1048  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1049  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050  /*Op0IsKill=*/false, Addr.getShift(),
1051  /*IsZExt=*/false);
1052  else
1053  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1054  /*Op0IsKill=*/false, Addr.getShift());
1055  }
1056  if (!ResultReg)
1057  return false;
1058 
1059  Addr.setReg(ResultReg);
1060  Addr.setOffsetReg(0);
1061  Addr.setShift(0);
1062  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1063  }
1064 
1065  // Since the offset is too large for the load/store instruction get the
1066  // reg+offset into a register.
1067  if (ImmediateOffsetNeedsLowering) {
1068  unsigned ResultReg;
1069  if (Addr.getReg())
1070  // Try to fold the immediate into the add instruction.
1071  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1072  else
1073  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1074 
1075  if (!ResultReg)
1076  return false;
1077  Addr.setReg(ResultReg);
1078  Addr.setOffset(0);
1079  }
1080  return true;
1081 }
1082 
1083 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1084  const MachineInstrBuilder &MIB,
1086  unsigned ScaleFactor,
1087  MachineMemOperand *MMO) {
1088  int64_t Offset = Addr.getOffset() / ScaleFactor;
1089  // Frame base works a bit differently. Handle it separately.
1090  if (Addr.isFIBase()) {
1091  int FI = Addr.getFI();
1092  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1093  // and alignment should be based on the VT.
1094  MMO = FuncInfo.MF->getMachineMemOperand(
1095  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1096  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1097  // Now add the rest of the operands.
1098  MIB.addFrameIndex(FI).addImm(Offset);
1099  } else {
1100  assert(Addr.isRegBase() && "Unexpected address kind.");
1101  const MCInstrDesc &II = MIB->getDesc();
1102  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1103  Addr.setReg(
1104  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1105  Addr.setOffsetReg(
1106  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1107  if (Addr.getOffsetReg()) {
1108  assert(Addr.getOffset() == 0 && "Unexpected offset");
1109  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1110  Addr.getExtendType() == AArch64_AM::SXTX;
1111  MIB.addReg(Addr.getReg());
1112  MIB.addReg(Addr.getOffsetReg());
1113  MIB.addImm(IsSigned);
1114  MIB.addImm(Addr.getShift() != 0);
1115  } else
1116  MIB.addReg(Addr.getReg()).addImm(Offset);
1117  }
1118 
1119  if (MMO)
1120  MIB.addMemOperand(MMO);
1121 }
1122 
1123 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1124  const Value *RHS, bool SetFlags,
1125  bool WantResult, bool IsZExt) {
1127  bool NeedExtend = false;
1128  switch (RetVT.SimpleTy) {
1129  default:
1130  return 0;
1131  case MVT::i1:
1132  NeedExtend = true;
1133  break;
1134  case MVT::i8:
1135  NeedExtend = true;
1136  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1137  break;
1138  case MVT::i16:
1139  NeedExtend = true;
1140  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1141  break;
1142  case MVT::i32: // fall-through
1143  case MVT::i64:
1144  break;
1145  }
1146  MVT SrcVT = RetVT;
1147  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1148 
1149  // Canonicalize immediates to the RHS first.
1150  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1151  std::swap(LHS, RHS);
1152 
1153  // Canonicalize mul by power of 2 to the RHS.
1154  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1155  if (isMulPowOf2(LHS))
1156  std::swap(LHS, RHS);
1157 
1158  // Canonicalize shift immediate to the RHS.
1159  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1160  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1161  if (isa<ConstantInt>(SI->getOperand(1)))
1162  if (SI->getOpcode() == Instruction::Shl ||
1163  SI->getOpcode() == Instruction::LShr ||
1164  SI->getOpcode() == Instruction::AShr )
1165  std::swap(LHS, RHS);
1166 
1167  unsigned LHSReg = getRegForValue(LHS);
1168  if (!LHSReg)
1169  return 0;
1170  bool LHSIsKill = hasTrivialKill(LHS);
1171 
1172  if (NeedExtend)
1173  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1174 
1175  unsigned ResultReg = 0;
1176  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1177  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1178  if (C->isNegative())
1179  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1180  SetFlags, WantResult);
1181  else
1182  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1183  WantResult);
1184  } else if (const auto *C = dyn_cast<Constant>(RHS))
1185  if (C->isNullValue())
1186  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1187  WantResult);
1188 
1189  if (ResultReg)
1190  return ResultReg;
1191 
1192  // Only extend the RHS within the instruction if there is a valid extend type.
1193  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1194  isValueAvailable(RHS)) {
1195  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1196  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1197  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1198  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1199  if (!RHSReg)
1200  return 0;
1201  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1202  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1203  RHSIsKill, ExtendType, C->getZExtValue(),
1204  SetFlags, WantResult);
1205  }
1206  unsigned RHSReg = getRegForValue(RHS);
1207  if (!RHSReg)
1208  return 0;
1209  bool RHSIsKill = hasTrivialKill(RHS);
1210  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1211  ExtendType, 0, SetFlags, WantResult);
1212  }
1213 
1214  // Check if the mul can be folded into the instruction.
1215  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1216  if (isMulPowOf2(RHS)) {
1217  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1218  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1219 
1220  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1221  if (C->getValue().isPowerOf2())
1222  std::swap(MulLHS, MulRHS);
1223 
1224  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1225  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1226  unsigned RHSReg = getRegForValue(MulLHS);
1227  if (!RHSReg)
1228  return 0;
1229  bool RHSIsKill = hasTrivialKill(MulLHS);
1230  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1231  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1232  WantResult);
1233  if (ResultReg)
1234  return ResultReg;
1235  }
1236  }
1237 
1238  // Check if the shift can be folded into the instruction.
1239  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1240  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1241  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1243  switch (SI->getOpcode()) {
1244  default: break;
1245  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1246  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1247  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1248  }
1249  uint64_t ShiftVal = C->getZExtValue();
1250  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1251  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1252  if (!RHSReg)
1253  return 0;
1254  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1255  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1256  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1257  WantResult);
1258  if (ResultReg)
1259  return ResultReg;
1260  }
1261  }
1262  }
1263  }
1264 
1265  unsigned RHSReg = getRegForValue(RHS);
1266  if (!RHSReg)
1267  return 0;
1268  bool RHSIsKill = hasTrivialKill(RHS);
1269 
1270  if (NeedExtend)
1271  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1272 
1273  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1274  SetFlags, WantResult);
1275 }
1276 
1277 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278  bool LHSIsKill, unsigned RHSReg,
1279  bool RHSIsKill, bool SetFlags,
1280  bool WantResult) {
1281  assert(LHSReg && RHSReg && "Invalid register number.");
1282 
1283  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1284  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1285  return 0;
1286 
1287  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1288  return 0;
1289 
1290  static const unsigned OpcTable[2][2][2] = {
1291  { { AArch64::SUBWrr, AArch64::SUBXrr },
1292  { AArch64::ADDWrr, AArch64::ADDXrr } },
1293  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1294  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1295  };
1296  bool Is64Bit = RetVT == MVT::i64;
1297  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1298  const TargetRegisterClass *RC =
1299  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1300  unsigned ResultReg;
1301  if (WantResult)
1302  ResultReg = createResultReg(RC);
1303  else
1304  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1305 
1306  const MCInstrDesc &II = TII.get(Opc);
1307  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1309  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1310  .addReg(LHSReg, getKillRegState(LHSIsKill))
1311  .addReg(RHSReg, getKillRegState(RHSIsKill));
1312  return ResultReg;
1313 }
1314 
1315 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316  bool LHSIsKill, uint64_t Imm,
1317  bool SetFlags, bool WantResult) {
1318  assert(LHSReg && "Invalid register number.");
1319 
1320  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1321  return 0;
1322 
1323  unsigned ShiftImm;
1324  if (isUInt<12>(Imm))
1325  ShiftImm = 0;
1326  else if ((Imm & 0xfff000) == Imm) {
1327  ShiftImm = 12;
1328  Imm >>= 12;
1329  } else
1330  return 0;
1331 
1332  static const unsigned OpcTable[2][2][2] = {
1333  { { AArch64::SUBWri, AArch64::SUBXri },
1334  { AArch64::ADDWri, AArch64::ADDXri } },
1335  { { AArch64::SUBSWri, AArch64::SUBSXri },
1336  { AArch64::ADDSWri, AArch64::ADDSXri } }
1337  };
1338  bool Is64Bit = RetVT == MVT::i64;
1339  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340  const TargetRegisterClass *RC;
1341  if (SetFlags)
1342  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1343  else
1344  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1345  unsigned ResultReg;
1346  if (WantResult)
1347  ResultReg = createResultReg(RC);
1348  else
1349  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1350 
1351  const MCInstrDesc &II = TII.get(Opc);
1352  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1354  .addReg(LHSReg, getKillRegState(LHSIsKill))
1355  .addImm(Imm)
1356  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1357  return ResultReg;
1358 }
1359 
1360 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1361  bool LHSIsKill, unsigned RHSReg,
1362  bool RHSIsKill,
1363  AArch64_AM::ShiftExtendType ShiftType,
1364  uint64_t ShiftImm, bool SetFlags,
1365  bool WantResult) {
1366  assert(LHSReg && RHSReg && "Invalid register number.");
1367  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1368  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1369 
1370  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1371  return 0;
1372 
1373  // Don't deal with undefined shifts.
1374  if (ShiftImm >= RetVT.getSizeInBits())
1375  return 0;
1376 
1377  static const unsigned OpcTable[2][2][2] = {
1378  { { AArch64::SUBWrs, AArch64::SUBXrs },
1379  { AArch64::ADDWrs, AArch64::ADDXrs } },
1380  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1381  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1382  };
1383  bool Is64Bit = RetVT == MVT::i64;
1384  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385  const TargetRegisterClass *RC =
1386  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1387  unsigned ResultReg;
1388  if (WantResult)
1389  ResultReg = createResultReg(RC);
1390  else
1391  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1392 
1393  const MCInstrDesc &II = TII.get(Opc);
1394  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1395  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1396  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1397  .addReg(LHSReg, getKillRegState(LHSIsKill))
1398  .addReg(RHSReg, getKillRegState(RHSIsKill))
1399  .addImm(getShifterImm(ShiftType, ShiftImm));
1400  return ResultReg;
1401 }
1402 
1403 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1404  bool LHSIsKill, unsigned RHSReg,
1405  bool RHSIsKill,
1407  uint64_t ShiftImm, bool SetFlags,
1408  bool WantResult) {
1409  assert(LHSReg && RHSReg && "Invalid register number.");
1410  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414  return 0;
1415 
1416  if (ShiftImm >= 4)
1417  return 0;
1418 
1419  static const unsigned OpcTable[2][2][2] = {
1420  { { AArch64::SUBWrx, AArch64::SUBXrx },
1421  { AArch64::ADDWrx, AArch64::ADDXrx } },
1422  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1424  };
1425  bool Is64Bit = RetVT == MVT::i64;
1426  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427  const TargetRegisterClass *RC = nullptr;
1428  if (SetFlags)
1429  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430  else
1431  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432  unsigned ResultReg;
1433  if (WantResult)
1434  ResultReg = createResultReg(RC);
1435  else
1436  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438  const MCInstrDesc &II = TII.get(Opc);
1439  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442  .addReg(LHSReg, getKillRegState(LHSIsKill))
1443  .addReg(RHSReg, getKillRegState(RHSIsKill))
1444  .addImm(getArithExtendImm(ExtType, ShiftImm));
1445  return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449  Type *Ty = LHS->getType();
1450  EVT EVT = TLI.getValueType(DL, Ty, true);
1451  if (!EVT.isSimple())
1452  return false;
1453  MVT VT = EVT.getSimpleVT();
1454 
1455  switch (VT.SimpleTy) {
1456  default:
1457  return false;
1458  case MVT::i1:
1459  case MVT::i8:
1460  case MVT::i16:
1461  case MVT::i32:
1462  case MVT::i64:
1463  return emitICmp(VT, LHS, RHS, IsZExt);
1464  case MVT::f32:
1465  case MVT::f64:
1466  return emitFCmp(VT, LHS, RHS);
1467  }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471  bool IsZExt) {
1472  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1477  uint64_t Imm) {
1478  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1479  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480 }
1481 
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484  return false;
1485 
1486  // Check to see if the 2nd operand is a constant that we can encode directly
1487  // in the compare.
1488  bool UseImm = false;
1489  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490  if (CFP->isZero() && !CFP->isNegative())
1491  UseImm = true;
1492 
1493  unsigned LHSReg = getRegForValue(LHS);
1494  if (!LHSReg)
1495  return false;
1496  bool LHSIsKill = hasTrivialKill(LHS);
1497 
1498  if (UseImm) {
1499  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1501  .addReg(LHSReg, getKillRegState(LHSIsKill));
1502  return true;
1503  }
1504 
1505  unsigned RHSReg = getRegForValue(RHS);
1506  if (!RHSReg)
1507  return false;
1508  bool RHSIsKill = hasTrivialKill(RHS);
1509 
1510  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1512  .addReg(LHSReg, getKillRegState(LHSIsKill))
1513  .addReg(RHSReg, getKillRegState(RHSIsKill));
1514  return true;
1515 }
1516 
1517 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1518  bool SetFlags, bool WantResult, bool IsZExt) {
1519  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1520  IsZExt);
1521 }
1522 
1523 /// This method is a wrapper to simplify add emission.
1524 ///
1525 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1526 /// that fails, then try to materialize the immediate into a register and use
1527 /// emitAddSub_rr instead.
1528 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1529  int64_t Imm) {
1530  unsigned ResultReg;
1531  if (Imm < 0)
1532  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1533  else
1534  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1535 
1536  if (ResultReg)
1537  return ResultReg;
1538 
1539  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1540  if (!CReg)
1541  return 0;
1542 
1543  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1544  return ResultReg;
1545 }
1546 
1547 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1548  bool SetFlags, bool WantResult, bool IsZExt) {
1549  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1550  IsZExt);
1551 }
1552 
1553 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1554  bool LHSIsKill, unsigned RHSReg,
1555  bool RHSIsKill, bool WantResult) {
1556  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1557  RHSIsKill, /*SetFlags=*/true, WantResult);
1558 }
1559 
1560 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1561  bool LHSIsKill, unsigned RHSReg,
1562  bool RHSIsKill,
1563  AArch64_AM::ShiftExtendType ShiftType,
1564  uint64_t ShiftImm, bool WantResult) {
1565  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1566  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1567  WantResult);
1568 }
1569 
1570 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1571  const Value *LHS, const Value *RHS) {
1572  // Canonicalize immediates to the RHS first.
1573  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1574  std::swap(LHS, RHS);
1575 
1576  // Canonicalize mul by power-of-2 to the RHS.
1577  if (LHS->hasOneUse() && isValueAvailable(LHS))
1578  if (isMulPowOf2(LHS))
1579  std::swap(LHS, RHS);
1580 
1581  // Canonicalize shift immediate to the RHS.
1582  if (LHS->hasOneUse() && isValueAvailable(LHS))
1583  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1584  if (isa<ConstantInt>(SI->getOperand(1)))
1585  std::swap(LHS, RHS);
1586 
1587  unsigned LHSReg = getRegForValue(LHS);
1588  if (!LHSReg)
1589  return 0;
1590  bool LHSIsKill = hasTrivialKill(LHS);
1591 
1592  unsigned ResultReg = 0;
1593  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1594  uint64_t Imm = C->getZExtValue();
1595  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1596  }
1597  if (ResultReg)
1598  return ResultReg;
1599 
1600  // Check if the mul can be folded into the instruction.
1601  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1602  if (isMulPowOf2(RHS)) {
1603  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1604  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1605 
1606  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1607  if (C->getValue().isPowerOf2())
1608  std::swap(MulLHS, MulRHS);
1609 
1610  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1611  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1612 
1613  unsigned RHSReg = getRegForValue(MulLHS);
1614  if (!RHSReg)
1615  return 0;
1616  bool RHSIsKill = hasTrivialKill(MulLHS);
1617  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1618  RHSIsKill, ShiftVal);
1619  if (ResultReg)
1620  return ResultReg;
1621  }
1622  }
1623 
1624  // Check if the shift can be folded into the instruction.
1625  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1626  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1627  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1628  uint64_t ShiftVal = C->getZExtValue();
1629  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1630  if (!RHSReg)
1631  return 0;
1632  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1633  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1634  RHSIsKill, ShiftVal);
1635  if (ResultReg)
1636  return ResultReg;
1637  }
1638  }
1639 
1640  unsigned RHSReg = getRegForValue(RHS);
1641  if (!RHSReg)
1642  return 0;
1643  bool RHSIsKill = hasTrivialKill(RHS);
1644 
1645  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1647  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1648  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1649  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1650  }
1651  return ResultReg;
1652 }
1653 
1654 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655  unsigned LHSReg, bool LHSIsKill,
1656  uint64_t Imm) {
1657  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658  "ISD nodes are not consecutive!");
1659  static const unsigned OpcTable[3][2] = {
1660  { AArch64::ANDWri, AArch64::ANDXri },
1661  { AArch64::ORRWri, AArch64::ORRXri },
1662  { AArch64::EORWri, AArch64::EORXri }
1663  };
1664  const TargetRegisterClass *RC;
1665  unsigned Opc;
1666  unsigned RegSize;
1667  switch (RetVT.SimpleTy) {
1668  default:
1669  return 0;
1670  case MVT::i1:
1671  case MVT::i8:
1672  case MVT::i16:
1673  case MVT::i32: {
1674  unsigned Idx = ISDOpc - ISD::AND;
1675  Opc = OpcTable[Idx][0];
1676  RC = &AArch64::GPR32spRegClass;
1677  RegSize = 32;
1678  break;
1679  }
1680  case MVT::i64:
1681  Opc = OpcTable[ISDOpc - ISD::AND][1];
1682  RC = &AArch64::GPR64spRegClass;
1683  RegSize = 64;
1684  break;
1685  }
1686 
1687  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1688  return 0;
1689 
1690  unsigned ResultReg =
1691  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1692  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1693  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1696  }
1697  return ResultReg;
1698 }
1699 
1700 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701  unsigned LHSReg, bool LHSIsKill,
1702  unsigned RHSReg, bool RHSIsKill,
1703  uint64_t ShiftImm) {
1704  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705  "ISD nodes are not consecutive!");
1706  static const unsigned OpcTable[3][2] = {
1707  { AArch64::ANDWrs, AArch64::ANDXrs },
1708  { AArch64::ORRWrs, AArch64::ORRXrs },
1709  { AArch64::EORWrs, AArch64::EORXrs }
1710  };
1711 
1712  // Don't deal with undefined shifts.
1713  if (ShiftImm >= RetVT.getSizeInBits())
1714  return 0;
1715 
1716  const TargetRegisterClass *RC;
1717  unsigned Opc;
1718  switch (RetVT.SimpleTy) {
1719  default:
1720  return 0;
1721  case MVT::i1:
1722  case MVT::i8:
1723  case MVT::i16:
1724  case MVT::i32:
1725  Opc = OpcTable[ISDOpc - ISD::AND][0];
1726  RC = &AArch64::GPR32RegClass;
1727  break;
1728  case MVT::i64:
1729  Opc = OpcTable[ISDOpc - ISD::AND][1];
1730  RC = &AArch64::GPR64RegClass;
1731  break;
1732  }
1733  unsigned ResultReg =
1734  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1736  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1739  }
1740  return ResultReg;
1741 }
1742 
1743 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1744  uint64_t Imm) {
1745  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1746 }
1747 
1748 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749  bool WantZExt, MachineMemOperand *MMO) {
1750  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751  return 0;
1752 
1753  // Simplify this down to something we can handle.
1754  if (!simplifyAddress(Addr, VT))
1755  return 0;
1756 
1757  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758  if (!ScaleFactor)
1759  llvm_unreachable("Unexpected value type.");
1760 
1761  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763  bool UseScaled = true;
1764  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765  UseScaled = false;
1766  ScaleFactor = 1;
1767  }
1768 
1769  static const unsigned GPOpcTable[2][8][4] = {
1770  // Sign-extend.
1771  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1772  AArch64::LDURXi },
1773  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1774  AArch64::LDURXi },
1775  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1776  AArch64::LDRXui },
1777  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1778  AArch64::LDRXui },
1779  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780  AArch64::LDRXroX },
1781  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782  AArch64::LDRXroX },
1783  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784  AArch64::LDRXroW },
1785  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786  AArch64::LDRXroW }
1787  },
1788  // Zero-extend.
1789  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790  AArch64::LDURXi },
1791  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1792  AArch64::LDURXi },
1793  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794  AArch64::LDRXui },
1795  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1796  AArch64::LDRXui },
1797  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798  AArch64::LDRXroX },
1799  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1800  AArch64::LDRXroX },
1801  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802  AArch64::LDRXroW },
1803  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1804  AArch64::LDRXroW }
1805  }
1806  };
1807 
1808  static const unsigned FPOpcTable[4][2] = {
1809  { AArch64::LDURSi, AArch64::LDURDi },
1810  { AArch64::LDRSui, AArch64::LDRDui },
1811  { AArch64::LDRSroX, AArch64::LDRDroX },
1812  { AArch64::LDRSroW, AArch64::LDRDroW }
1813  };
1814 
1815  unsigned Opc;
1816  const TargetRegisterClass *RC;
1817  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818  Addr.getOffsetReg();
1819  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821  Addr.getExtendType() == AArch64_AM::SXTW)
1822  Idx++;
1823 
1824  bool IsRet64Bit = RetVT == MVT::i64;
1825  switch (VT.SimpleTy) {
1826  default:
1827  llvm_unreachable("Unexpected value type.");
1828  case MVT::i1: // Intentional fall-through.
1829  case MVT::i8:
1830  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831  RC = (IsRet64Bit && !WantZExt) ?
1832  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833  break;
1834  case MVT::i16:
1835  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836  RC = (IsRet64Bit && !WantZExt) ?
1837  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838  break;
1839  case MVT::i32:
1840  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841  RC = (IsRet64Bit && !WantZExt) ?
1842  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843  break;
1844  case MVT::i64:
1845  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846  RC = &AArch64::GPR64RegClass;
1847  break;
1848  case MVT::f32:
1849  Opc = FPOpcTable[Idx][0];
1850  RC = &AArch64::FPR32RegClass;
1851  break;
1852  case MVT::f64:
1853  Opc = FPOpcTable[Idx][1];
1854  RC = &AArch64::FPR64RegClass;
1855  break;
1856  }
1857 
1858  // Create the base instruction, then add the operands.
1859  unsigned ResultReg = createResultReg(RC);
1860  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861  TII.get(Opc), ResultReg);
1862  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863 
1864  // Loading an i1 requires special handling.
1865  if (VT == MVT::i1) {
1866  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1867  assert(ANDReg && "Unexpected AND instruction emission failure.");
1868  ResultReg = ANDReg;
1869  }
1870 
1871  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872  // the 32bit reg to a 64bit reg.
1873  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1876  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877  .addImm(0)
1878  .addReg(ResultReg, getKillRegState(true))
1879  .addImm(AArch64::sub_32);
1880  ResultReg = Reg64;
1881  }
1882  return ResultReg;
1883 }
1884 
1885 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886  MVT VT;
1887  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888  return false;
1889 
1890  if (VT.isVector())
1891  return selectOperator(I, I->getOpcode());
1892 
1893  unsigned ResultReg;
1894  switch (I->getOpcode()) {
1895  default:
1896  llvm_unreachable("Unexpected instruction.");
1897  case Instruction::Add:
1898  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899  break;
1900  case Instruction::Sub:
1901  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902  break;
1903  }
1904  if (!ResultReg)
1905  return false;
1906 
1907  updateValueMap(I, ResultReg);
1908  return true;
1909 }
1910 
1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912  MVT VT;
1913  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914  return false;
1915 
1916  if (VT.isVector())
1917  return selectOperator(I, I->getOpcode());
1918 
1919  unsigned ResultReg;
1920  switch (I->getOpcode()) {
1921  default:
1922  llvm_unreachable("Unexpected instruction.");
1923  case Instruction::And:
1924  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925  break;
1926  case Instruction::Or:
1927  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928  break;
1929  case Instruction::Xor:
1930  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931  break;
1932  }
1933  if (!ResultReg)
1934  return false;
1935 
1936  updateValueMap(I, ResultReg);
1937  return true;
1938 }
1939 
1940 bool AArch64FastISel::selectLoad(const Instruction *I) {
1941  MVT VT;
1942  // Verify we have a legal type before going any further. Currently, we handle
1943  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946  cast<LoadInst>(I)->isAtomic())
1947  return false;
1948 
1949  const Value *SV = I->getOperand(0);
1950  if (TLI.supportSwiftError()) {
1951  // Swifterror values can come from either a function parameter with
1952  // swifterror attribute or an alloca with swifterror attribute.
1953  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954  if (Arg->hasSwiftErrorAttr())
1955  return false;
1956  }
1957 
1958  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959  if (Alloca->isSwiftError())
1960  return false;
1961  }
1962  }
1963 
1964  // See if we can handle this address.
1965  Address Addr;
1966  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967  return false;
1968 
1969  // Fold the following sign-/zero-extend into the load instruction.
1970  bool WantZExt = true;
1971  MVT RetVT = VT;
1972  const Value *IntExtVal = nullptr;
1973  if (I->hasOneUse()) {
1974  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975  if (isTypeSupported(ZE->getType(), RetVT))
1976  IntExtVal = ZE;
1977  else
1978  RetVT = VT;
1979  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980  if (isTypeSupported(SE->getType(), RetVT))
1981  IntExtVal = SE;
1982  else
1983  RetVT = VT;
1984  WantZExt = false;
1985  }
1986  }
1987 
1988  unsigned ResultReg =
1989  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990  if (!ResultReg)
1991  return false;
1992 
1993  // There are a few different cases we have to handle, because the load or the
1994  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995  // SelectionDAG. There is also an ordering issue when both instructions are in
1996  // different basic blocks.
1997  // 1.) The load instruction is selected by FastISel, but the integer extend
1998  // not. This usually happens when the integer extend is in a different
1999  // basic block and SelectionDAG took over for that basic block.
2000  // 2.) The load instruction is selected before the integer extend. This only
2001  // happens when the integer extend is in a different basic block.
2002  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003  // by FastISel. This happens if there are instructions between the load
2004  // and the integer extend that couldn't be selected by FastISel.
2005  if (IntExtVal) {
2006  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008  // it when it selects the integer extend.
2009  unsigned Reg = lookUpRegForValue(IntExtVal);
2010  auto *MI = MRI.getUniqueVRegDef(Reg);
2011  if (!MI) {
2012  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013  if (WantZExt) {
2014  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016  ResultReg = std::prev(I)->getOperand(0).getReg();
2017  removeDeadCode(I, std::next(I));
2018  } else
2019  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020  /*IsKill=*/true,
2021  AArch64::sub_32);
2022  }
2023  updateValueMap(I, ResultReg);
2024  return true;
2025  }
2026 
2027  // The integer extend has already been emitted - delete all the instructions
2028  // that have been emitted by the integer extend lowering code and use the
2029  // result from the load instruction directly.
2030  while (MI) {
2031  Reg = 0;
2032  for (auto &Opnd : MI->uses()) {
2033  if (Opnd.isReg()) {
2034  Reg = Opnd.getReg();
2035  break;
2036  }
2037  }
2039  removeDeadCode(I, std::next(I));
2040  MI = nullptr;
2041  if (Reg)
2042  MI = MRI.getUniqueVRegDef(Reg);
2043  }
2044  updateValueMap(IntExtVal, ResultReg);
2045  return true;
2046  }
2047 
2048  updateValueMap(I, ResultReg);
2049  return true;
2050 }
2051 
2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053  unsigned AddrReg,
2054  MachineMemOperand *MMO) {
2055  unsigned Opc;
2056  switch (VT.SimpleTy) {
2057  default: return false;
2058  case MVT::i8: Opc = AArch64::STLRB; break;
2059  case MVT::i16: Opc = AArch64::STLRH; break;
2060  case MVT::i32: Opc = AArch64::STLRW; break;
2061  case MVT::i64: Opc = AArch64::STLRX; break;
2062  }
2063 
2064  const MCInstrDesc &II = TII.get(Opc);
2065  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068  .addReg(SrcReg)
2069  .addReg(AddrReg)
2070  .addMemOperand(MMO);
2071  return true;
2072 }
2073 
2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075  MachineMemOperand *MMO) {
2076  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077  return false;
2078 
2079  // Simplify this down to something we can handle.
2080  if (!simplifyAddress(Addr, VT))
2081  return false;
2082 
2083  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084  if (!ScaleFactor)
2085  llvm_unreachable("Unexpected value type.");
2086 
2087  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089  bool UseScaled = true;
2090  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2091  UseScaled = false;
2092  ScaleFactor = 1;
2093  }
2094 
2095  static const unsigned OpcTable[4][6] = {
2096  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2097  AArch64::STURSi, AArch64::STURDi },
2098  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2099  AArch64::STRSui, AArch64::STRDui },
2100  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101  AArch64::STRSroX, AArch64::STRDroX },
2102  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103  AArch64::STRSroW, AArch64::STRDroW }
2104  };
2105 
2106  unsigned Opc;
2107  bool VTIsi1 = false;
2108  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2109  Addr.getOffsetReg();
2110  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2111  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112  Addr.getExtendType() == AArch64_AM::SXTW)
2113  Idx++;
2114 
2115  switch (VT.SimpleTy) {
2116  default: llvm_unreachable("Unexpected value type.");
2117  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2118  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2119  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2123  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2124  }
2125 
2126  // Storing an i1 requires special handling.
2127  if (VTIsi1 && SrcReg != AArch64::WZR) {
2128  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129  assert(ANDReg && "Unexpected AND instruction emission failure.");
2130  SrcReg = ANDReg;
2131  }
2132  // Create the base instruction, then add the operands.
2133  const MCInstrDesc &II = TII.get(Opc);
2134  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135  MachineInstrBuilder MIB =
2136  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2138 
2139  return true;
2140 }
2141 
2142 bool AArch64FastISel::selectStore(const Instruction *I) {
2143  MVT VT;
2144  const Value *Op0 = I->getOperand(0);
2145  // Verify we have a legal type before going any further. Currently, we handle
2146  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149  return false;
2150 
2151  const Value *PtrV = I->getOperand(1);
2152  if (TLI.supportSwiftError()) {
2153  // Swifterror values can come from either a function parameter with
2154  // swifterror attribute or an alloca with swifterror attribute.
2155  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156  if (Arg->hasSwiftErrorAttr())
2157  return false;
2158  }
2159 
2160  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161  if (Alloca->isSwiftError())
2162  return false;
2163  }
2164  }
2165 
2166  // Get the value to be stored into a register. Use the zero register directly
2167  // when possible to avoid an unnecessary copy and a wasted register.
2168  unsigned SrcReg = 0;
2169  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170  if (CI->isZero())
2171  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173  if (CF->isZero() && !CF->isNegative()) {
2174  VT = MVT::getIntegerVT(VT.getSizeInBits());
2175  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176  }
2177  }
2178 
2179  if (!SrcReg)
2180  SrcReg = getRegForValue(Op0);
2181 
2182  if (!SrcReg)
2183  return false;
2184 
2185  auto *SI = cast<StoreInst>(I);
2186 
2187  // Try to emit a STLR for seq_cst/release.
2188  if (SI->isAtomic()) {
2189  AtomicOrdering Ord = SI->getOrdering();
2190  // The non-atomic instructions are sufficient for relaxed stores.
2191  if (isReleaseOrStronger(Ord)) {
2192  // The STLR addressing mode only supports a base reg; pass that directly.
2193  unsigned AddrReg = getRegForValue(PtrV);
2194  return emitStoreRelease(VT, SrcReg, AddrReg,
2195  createMachineMemOperandFor(I));
2196  }
2197  }
2198 
2199  // See if we can handle this address.
2200  Address Addr;
2201  if (!computeAddress(PtrV, Addr, Op0->getType()))
2202  return false;
2203 
2204  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205  return false;
2206  return true;
2207 }
2208 
2210  switch (Pred) {
2211  case CmpInst::FCMP_ONE:
2212  case CmpInst::FCMP_UEQ:
2213  default:
2214  // AL is our "false" for now. The other two need more compares.
2215  return AArch64CC::AL;
2216  case CmpInst::ICMP_EQ:
2217  case CmpInst::FCMP_OEQ:
2218  return AArch64CC::EQ;
2219  case CmpInst::ICMP_SGT:
2220  case CmpInst::FCMP_OGT:
2221  return AArch64CC::GT;
2222  case CmpInst::ICMP_SGE:
2223  case CmpInst::FCMP_OGE:
2224  return AArch64CC::GE;
2225  case CmpInst::ICMP_UGT:
2226  case CmpInst::FCMP_UGT:
2227  return AArch64CC::HI;
2228  case CmpInst::FCMP_OLT:
2229  return AArch64CC::MI;
2230  case CmpInst::ICMP_ULE:
2231  case CmpInst::FCMP_OLE:
2232  return AArch64CC::LS;
2233  case CmpInst::FCMP_ORD:
2234  return AArch64CC::VC;
2235  case CmpInst::FCMP_UNO:
2236  return AArch64CC::VS;
2237  case CmpInst::FCMP_UGE:
2238  return AArch64CC::PL;
2239  case CmpInst::ICMP_SLT:
2240  case CmpInst::FCMP_ULT:
2241  return AArch64CC::LT;
2242  case CmpInst::ICMP_SLE:
2243  case CmpInst::FCMP_ULE:
2244  return AArch64CC::LE;
2245  case CmpInst::FCMP_UNE:
2246  case CmpInst::ICMP_NE:
2247  return AArch64CC::NE;
2248  case CmpInst::ICMP_UGE:
2249  return AArch64CC::HS;
2250  case CmpInst::ICMP_ULT:
2251  return AArch64CC::LO;
2252  }
2253 }
2254 
2255 /// Try to emit a combined compare-and-branch instruction.
2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258  // will not be produced, as they are conditional branch instructions that do
2259  // not set flags.
2260  if (FuncInfo.MF->getFunction().hasFnAttribute(
2261  Attribute::SpeculativeLoadHardening))
2262  return false;
2263 
2264  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267 
2268  const Value *LHS = CI->getOperand(0);
2269  const Value *RHS = CI->getOperand(1);
2270 
2271  MVT VT;
2272  if (!isTypeSupported(LHS->getType(), VT))
2273  return false;
2274 
2275  unsigned BW = VT.getSizeInBits();
2276  if (BW > 64)
2277  return false;
2278 
2279  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2280  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2281 
2282  // Try to take advantage of fallthrough opportunities.
2283  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284  std::swap(TBB, FBB);
2285  Predicate = CmpInst::getInversePredicate(Predicate);
2286  }
2287 
2288  int TestBit = -1;
2289  bool IsCmpNE;
2290  switch (Predicate) {
2291  default:
2292  return false;
2293  case CmpInst::ICMP_EQ:
2294  case CmpInst::ICMP_NE:
2295  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2296  std::swap(LHS, RHS);
2297 
2298  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2299  return false;
2300 
2301  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303  const Value *AndLHS = AI->getOperand(0);
2304  const Value *AndRHS = AI->getOperand(1);
2305 
2306  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307  if (C->getValue().isPowerOf2())
2308  std::swap(AndLHS, AndRHS);
2309 
2310  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311  if (C->getValue().isPowerOf2()) {
2312  TestBit = C->getValue().logBase2();
2313  LHS = AndLHS;
2314  }
2315  }
2316 
2317  if (VT == MVT::i1)
2318  TestBit = 0;
2319 
2320  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321  break;
2322  case CmpInst::ICMP_SLT:
2323  case CmpInst::ICMP_SGE:
2324  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2325  return false;
2326 
2327  TestBit = BW - 1;
2328  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329  break;
2330  case CmpInst::ICMP_SGT:
2331  case CmpInst::ICMP_SLE:
2332  if (!isa<ConstantInt>(RHS))
2333  return false;
2334 
2335  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336  return false;
2337 
2338  TestBit = BW - 1;
2339  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340  break;
2341  } // end switch
2342 
2343  static const unsigned OpcTable[2][2][2] = {
2344  { {AArch64::CBZW, AArch64::CBZX },
2345  {AArch64::CBNZW, AArch64::CBNZX} },
2346  { {AArch64::TBZW, AArch64::TBZX },
2347  {AArch64::TBNZW, AArch64::TBNZX} }
2348  };
2349 
2350  bool IsBitTest = TestBit != -1;
2351  bool Is64Bit = BW == 64;
2352  if (TestBit < 32 && TestBit >= 0)
2353  Is64Bit = false;
2354 
2355  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356  const MCInstrDesc &II = TII.get(Opc);
2357 
2358  unsigned SrcReg = getRegForValue(LHS);
2359  if (!SrcReg)
2360  return false;
2361  bool SrcIsKill = hasTrivialKill(LHS);
2362 
2363  if (BW == 64 && !Is64Bit)
2364  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2365  AArch64::sub_32);
2366 
2367  if ((BW < 32) && !IsBitTest)
2368  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2369 
2370  // Emit the combined compare and branch instruction.
2371  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2372  MachineInstrBuilder MIB =
2373  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2374  .addReg(SrcReg, getKillRegState(SrcIsKill));
2375  if (IsBitTest)
2376  MIB.addImm(TestBit);
2377  MIB.addMBB(TBB);
2378 
2379  finishCondBranch(BI->getParent(), TBB, FBB);
2380  return true;
2381 }
2382 
2383 bool AArch64FastISel::selectBranch(const Instruction *I) {
2384  const BranchInst *BI = cast<BranchInst>(I);
2385  if (BI->isUnconditional()) {
2386  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387  fastEmitBranch(MSucc, BI->getDebugLoc());
2388  return true;
2389  }
2390 
2391  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393 
2394  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395  if (CI->hasOneUse() && isValueAvailable(CI)) {
2396  // Try to optimize or fold the cmp.
2397  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398  switch (Predicate) {
2399  default:
2400  break;
2401  case CmpInst::FCMP_FALSE:
2402  fastEmitBranch(FBB, DbgLoc);
2403  return true;
2404  case CmpInst::FCMP_TRUE:
2405  fastEmitBranch(TBB, DbgLoc);
2406  return true;
2407  }
2408 
2409  // Try to emit a combined compare-and-branch first.
2410  if (emitCompareAndBranch(BI))
2411  return true;
2412 
2413  // Try to take advantage of fallthrough opportunities.
2414  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415  std::swap(TBB, FBB);
2416  Predicate = CmpInst::getInversePredicate(Predicate);
2417  }
2418 
2419  // Emit the cmp.
2420  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421  return false;
2422 
2423  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424  // instruction.
2425  AArch64CC::CondCode CC = getCompareCC(Predicate);
2427  switch (Predicate) {
2428  default:
2429  break;
2430  case CmpInst::FCMP_UEQ:
2431  ExtraCC = AArch64CC::EQ;
2432  CC = AArch64CC::VS;
2433  break;
2434  case CmpInst::FCMP_ONE:
2435  ExtraCC = AArch64CC::MI;
2436  CC = AArch64CC::GT;
2437  break;
2438  }
2439  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440 
2441  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442  if (ExtraCC != AArch64CC::AL) {
2443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444  .addImm(ExtraCC)
2445  .addMBB(TBB);
2446  }
2447 
2448  // Emit the branch.
2449  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2450  .addImm(CC)
2451  .addMBB(TBB);
2452 
2453  finishCondBranch(BI->getParent(), TBB, FBB);
2454  return true;
2455  }
2456  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457  uint64_t Imm = CI->getZExtValue();
2458  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2460  .addMBB(Target);
2461 
2462  // Obtain the branch probability and add the target to the successor list.
2463  if (FuncInfo.BPI) {
2464  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465  BI->getParent(), Target->getBasicBlock());
2466  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467  } else
2468  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469  return true;
2470  } else {
2472  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473  // Fake request the condition, otherwise the intrinsic might be completely
2474  // optimized away.
2475  unsigned CondReg = getRegForValue(BI->getCondition());
2476  if (!CondReg)
2477  return false;
2478 
2479  // Emit the branch.
2480  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481  .addImm(CC)
2482  .addMBB(TBB);
2483 
2484  finishCondBranch(BI->getParent(), TBB, FBB);
2485  return true;
2486  }
2487  }
2488 
2489  unsigned CondReg = getRegForValue(BI->getCondition());
2490  if (CondReg == 0)
2491  return false;
2492  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2493 
2494  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495  unsigned Opcode = AArch64::TBNZW;
2496  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497  std::swap(TBB, FBB);
2498  Opcode = AArch64::TBZW;
2499  }
2500 
2501  const MCInstrDesc &II = TII.get(Opcode);
2502  unsigned ConstrainedCondReg
2503  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2505  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2506  .addImm(0)
2507  .addMBB(TBB);
2508 
2509  finishCondBranch(BI->getParent(), TBB, FBB);
2510  return true;
2511 }
2512 
2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2516  if (AddrReg == 0)
2517  return false;
2518 
2519  // Emit the indirect branch.
2520  const MCInstrDesc &II = TII.get(AArch64::BR);
2521  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2522  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2523 
2524  // Make sure the CFG is up-to-date.
2525  for (auto *Succ : BI->successors())
2526  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2527 
2528  return true;
2529 }
2530 
2531 bool AArch64FastISel::selectCmp(const Instruction *I) {
2532  const CmpInst *CI = cast<CmpInst>(I);
2533 
2534  // Vectors of i1 are weird: bail out.
2535  if (CI->getType()->isVectorTy())
2536  return false;
2537 
2538  // Try to optimize or fold the cmp.
2539  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540  unsigned ResultReg = 0;
2541  switch (Predicate) {
2542  default:
2543  break;
2544  case CmpInst::FCMP_FALSE:
2545  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2547  TII.get(TargetOpcode::COPY), ResultReg)
2548  .addReg(AArch64::WZR, getKillRegState(true));
2549  break;
2550  case CmpInst::FCMP_TRUE:
2551  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552  break;
2553  }
2554 
2555  if (ResultReg) {
2556  updateValueMap(I, ResultReg);
2557  return true;
2558  }
2559 
2560  // Emit the cmp.
2561  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562  return false;
2563 
2564  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2565 
2566  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567  // condition codes are inverted, because they are used by CSINC.
2568  static unsigned CondCodeTable[2][2] = {
2571  };
2572  unsigned *CondCodes = nullptr;
2573  switch (Predicate) {
2574  default:
2575  break;
2576  case CmpInst::FCMP_UEQ:
2577  CondCodes = &CondCodeTable[0][0];
2578  break;
2579  case CmpInst::FCMP_ONE:
2580  CondCodes = &CondCodeTable[1][0];
2581  break;
2582  }
2583 
2584  if (CondCodes) {
2585  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  TmpReg1)
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(CondCodes[0]);
2591  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2592  ResultReg)
2593  .addReg(TmpReg1, getKillRegState(true))
2594  .addReg(AArch64::WZR, getKillRegState(true))
2595  .addImm(CondCodes[1]);
2596 
2597  updateValueMap(I, ResultReg);
2598  return true;
2599  }
2600 
2601  // Now set a register based on the comparison.
2602  AArch64CC::CondCode CC = getCompareCC(Predicate);
2603  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2606  ResultReg)
2607  .addReg(AArch64::WZR, getKillRegState(true))
2608  .addReg(AArch64::WZR, getKillRegState(true))
2609  .addImm(invertedCC);
2610 
2611  updateValueMap(I, ResultReg);
2612  return true;
2613 }
2614 
2615 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616 /// value.
2617 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618  if (!SI->getType()->isIntegerTy(1))
2619  return false;
2620 
2621  const Value *Src1Val, *Src2Val;
2622  unsigned Opc = 0;
2623  bool NeedExtraOp = false;
2624  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625  if (CI->isOne()) {
2626  Src1Val = SI->getCondition();
2627  Src2Val = SI->getFalseValue();
2628  Opc = AArch64::ORRWrr;
2629  } else {
2630  assert(CI->isZero());
2631  Src1Val = SI->getFalseValue();
2632  Src2Val = SI->getCondition();
2633  Opc = AArch64::BICWrr;
2634  }
2635  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636  if (CI->isOne()) {
2637  Src1Val = SI->getCondition();
2638  Src2Val = SI->getTrueValue();
2639  Opc = AArch64::ORRWrr;
2640  NeedExtraOp = true;
2641  } else {
2642  assert(CI->isZero());
2643  Src1Val = SI->getCondition();
2644  Src2Val = SI->getTrueValue();
2645  Opc = AArch64::ANDWrr;
2646  }
2647  }
2648 
2649  if (!Opc)
2650  return false;
2651 
2652  unsigned Src1Reg = getRegForValue(Src1Val);
2653  if (!Src1Reg)
2654  return false;
2655  bool Src1IsKill = hasTrivialKill(Src1Val);
2656 
2657  unsigned Src2Reg = getRegForValue(Src2Val);
2658  if (!Src2Reg)
2659  return false;
2660  bool Src2IsKill = hasTrivialKill(Src2Val);
2661 
2662  if (NeedExtraOp) {
2663  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2664  Src1IsKill = true;
2665  }
2666  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2667  Src1IsKill, Src2Reg, Src2IsKill);
2668  updateValueMap(SI, ResultReg);
2669  return true;
2670 }
2671 
2672 bool AArch64FastISel::selectSelect(const Instruction *I) {
2673  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2674  MVT VT;
2675  if (!isTypeSupported(I->getType(), VT))
2676  return false;
2677 
2678  unsigned Opc;
2679  const TargetRegisterClass *RC;
2680  switch (VT.SimpleTy) {
2681  default:
2682  return false;
2683  case MVT::i1:
2684  case MVT::i8:
2685  case MVT::i16:
2686  case MVT::i32:
2687  Opc = AArch64::CSELWr;
2688  RC = &AArch64::GPR32RegClass;
2689  break;
2690  case MVT::i64:
2691  Opc = AArch64::CSELXr;
2692  RC = &AArch64::GPR64RegClass;
2693  break;
2694  case MVT::f32:
2695  Opc = AArch64::FCSELSrrr;
2696  RC = &AArch64::FPR32RegClass;
2697  break;
2698  case MVT::f64:
2699  Opc = AArch64::FCSELDrrr;
2700  RC = &AArch64::FPR64RegClass;
2701  break;
2702  }
2703 
2704  const SelectInst *SI = cast<SelectInst>(I);
2705  const Value *Cond = SI->getCondition();
2708 
2709  if (optimizeSelect(SI))
2710  return true;
2711 
2712  // Try to pickup the flags, so we don't have to emit another compare.
2713  if (foldXALUIntrinsic(CC, I, Cond)) {
2714  // Fake request the condition to force emission of the XALU intrinsic.
2715  unsigned CondReg = getRegForValue(Cond);
2716  if (!CondReg)
2717  return false;
2718  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2719  isValueAvailable(Cond)) {
2720  const auto *Cmp = cast<CmpInst>(Cond);
2721  // Try to optimize or fold the cmp.
2722  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2723  const Value *FoldSelect = nullptr;
2724  switch (Predicate) {
2725  default:
2726  break;
2727  case CmpInst::FCMP_FALSE:
2728  FoldSelect = SI->getFalseValue();
2729  break;
2730  case CmpInst::FCMP_TRUE:
2731  FoldSelect = SI->getTrueValue();
2732  break;
2733  }
2734 
2735  if (FoldSelect) {
2736  unsigned SrcReg = getRegForValue(FoldSelect);
2737  if (!SrcReg)
2738  return false;
2739  unsigned UseReg = lookUpRegForValue(SI);
2740  if (UseReg)
2741  MRI.clearKillFlags(UseReg);
2742 
2743  updateValueMap(I, SrcReg);
2744  return true;
2745  }
2746 
2747  // Emit the cmp.
2748  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749  return false;
2750 
2751  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752  CC = getCompareCC(Predicate);
2753  switch (Predicate) {
2754  default:
2755  break;
2756  case CmpInst::FCMP_UEQ:
2757  ExtraCC = AArch64CC::EQ;
2758  CC = AArch64CC::VS;
2759  break;
2760  case CmpInst::FCMP_ONE:
2761  ExtraCC = AArch64CC::MI;
2762  CC = AArch64CC::GT;
2763  break;
2764  }
2765  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766  } else {
2767  unsigned CondReg = getRegForValue(Cond);
2768  if (!CondReg)
2769  return false;
2770  bool CondIsKill = hasTrivialKill(Cond);
2771 
2772  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2773  CondReg = constrainOperandRegClass(II, CondReg, 1);
2774 
2775  // Emit a TST instruction (ANDS wzr, reg, #imm).
2776  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2777  AArch64::WZR)
2778  .addReg(CondReg, getKillRegState(CondIsKill))
2780  }
2781 
2782  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2783  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2784 
2785  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2786  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2787 
2788  if (!Src1Reg || !Src2Reg)
2789  return false;
2790 
2791  if (ExtraCC != AArch64CC::AL) {
2792  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2793  Src2IsKill, ExtraCC);
2794  Src2IsKill = true;
2795  }
2796  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797  Src2IsKill, CC);
2798  updateValueMap(I, ResultReg);
2799  return true;
2800 }
2801 
2802 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2803  Value *V = I->getOperand(0);
2804  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2805  return false;
2806 
2807  unsigned Op = getRegForValue(V);
2808  if (Op == 0)
2809  return false;
2810 
2811  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2812  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2813  ResultReg).addReg(Op);
2814  updateValueMap(I, ResultReg);
2815  return true;
2816 }
2817 
2818 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2819  Value *V = I->getOperand(0);
2820  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2821  return false;
2822 
2823  unsigned Op = getRegForValue(V);
2824  if (Op == 0)
2825  return false;
2826 
2827  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2828  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2829  ResultReg).addReg(Op);
2830  updateValueMap(I, ResultReg);
2831  return true;
2832 }
2833 
2834 // FPToUI and FPToSI
2835 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2836  MVT DestVT;
2837  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2838  return false;
2839 
2840  unsigned SrcReg = getRegForValue(I->getOperand(0));
2841  if (SrcReg == 0)
2842  return false;
2843 
2844  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2845  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2846  return false;
2847 
2848  unsigned Opc;
2849  if (SrcVT == MVT::f64) {
2850  if (Signed)
2851  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2852  else
2853  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2854  } else {
2855  if (Signed)
2856  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2857  else
2858  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2859  }
2860  unsigned ResultReg = createResultReg(
2861  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2862  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2863  .addReg(SrcReg);
2864  updateValueMap(I, ResultReg);
2865  return true;
2866 }
2867 
2868 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2869  MVT DestVT;
2870  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2871  return false;
2872  // Let regular ISEL handle FP16
2873  if (DestVT == MVT::f16)
2874  return false;
2875 
2876  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2877  "Unexpected value type.");
2878 
2879  unsigned SrcReg = getRegForValue(I->getOperand(0));
2880  if (!SrcReg)
2881  return false;
2882  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2883 
2884  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2885 
2886  // Handle sign-extension.
2887  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2888  SrcReg =
2889  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2890  if (!SrcReg)
2891  return false;
2892  SrcIsKill = true;
2893  }
2894 
2895  unsigned Opc;
2896  if (SrcVT == MVT::i64) {
2897  if (Signed)
2898  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2899  else
2900  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2901  } else {
2902  if (Signed)
2903  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2904  else
2905  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2906  }
2907 
2908  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2909  SrcIsKill);
2910  updateValueMap(I, ResultReg);
2911  return true;
2912 }
2913 
2914 bool AArch64FastISel::fastLowerArguments() {
2915  if (!FuncInfo.CanLowerReturn)
2916  return false;
2917 
2918  const Function *F = FuncInfo.Fn;
2919  if (F->isVarArg())
2920  return false;
2921 
2922  CallingConv::ID CC = F->getCallingConv();
2923  if (CC != CallingConv::C && CC != CallingConv::Swift)
2924  return false;
2925 
2926  if (Subtarget->hasCustomCallingConv())
2927  return false;
2928 
2929  // Only handle simple cases of up to 8 GPR and FPR each.
2930  unsigned GPRCnt = 0;
2931  unsigned FPRCnt = 0;
2932  for (auto const &Arg : F->args()) {
2933  if (Arg.hasAttribute(Attribute::ByVal) ||
2934  Arg.hasAttribute(Attribute::InReg) ||
2935  Arg.hasAttribute(Attribute::StructRet) ||
2936  Arg.hasAttribute(Attribute::SwiftSelf) ||
2937  Arg.hasAttribute(Attribute::SwiftError) ||
2938  Arg.hasAttribute(Attribute::Nest))
2939  return false;
2940 
2941  Type *ArgTy = Arg.getType();
2942  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2943  return false;
2944 
2945  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2946  if (!ArgVT.isSimple())
2947  return false;
2948 
2949  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2950  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2951  return false;
2952 
2953  if (VT.isVector() &&
2954  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2955  return false;
2956 
2957  if (VT >= MVT::i1 && VT <= MVT::i64)
2958  ++GPRCnt;
2959  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2960  VT.is128BitVector())
2961  ++FPRCnt;
2962  else
2963  return false;
2964 
2965  if (GPRCnt > 8 || FPRCnt > 8)
2966  return false;
2967  }
2968 
2969  static const MCPhysReg Registers[6][8] = {
2970  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2971  AArch64::W5, AArch64::W6, AArch64::W7 },
2972  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2973  AArch64::X5, AArch64::X6, AArch64::X7 },
2974  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2975  AArch64::H5, AArch64::H6, AArch64::H7 },
2976  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2977  AArch64::S5, AArch64::S6, AArch64::S7 },
2978  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2979  AArch64::D5, AArch64::D6, AArch64::D7 },
2980  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2981  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2982  };
2983 
2984  unsigned GPRIdx = 0;
2985  unsigned FPRIdx = 0;
2986  for (auto const &Arg : F->args()) {
2987  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2988  unsigned SrcReg;
2989  const TargetRegisterClass *RC;
2990  if (VT >= MVT::i1 && VT <= MVT::i32) {
2991  SrcReg = Registers[0][GPRIdx++];
2992  RC = &AArch64::GPR32RegClass;
2993  VT = MVT::i32;
2994  } else if (VT == MVT::i64) {
2995  SrcReg = Registers[1][GPRIdx++];
2996  RC = &AArch64::GPR64RegClass;
2997  } else if (VT == MVT::f16) {
2998  SrcReg = Registers[2][FPRIdx++];
2999  RC = &AArch64::FPR16RegClass;
3000  } else if (VT == MVT::f32) {
3001  SrcReg = Registers[3][FPRIdx++];
3002  RC = &AArch64::FPR32RegClass;
3003  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3004  SrcReg = Registers[4][FPRIdx++];
3005  RC = &AArch64::FPR64RegClass;
3006  } else if (VT.is128BitVector()) {
3007  SrcReg = Registers[5][FPRIdx++];
3008  RC = &AArch64::FPR128RegClass;
3009  } else
3010  llvm_unreachable("Unexpected value type.");
3011 
3012  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3013  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3014  // Without this, EmitLiveInCopies may eliminate the livein if its only
3015  // use is a bitcast (which isn't turned into an instruction).
3016  unsigned ResultReg = createResultReg(RC);
3017  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3018  TII.get(TargetOpcode::COPY), ResultReg)
3019  .addReg(DstReg, getKillRegState(true));
3020  updateValueMap(&Arg, ResultReg);
3021  }
3022  return true;
3023 }
3024 
3025 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3026  SmallVectorImpl<MVT> &OutVTs,
3027  unsigned &NumBytes) {
3028  CallingConv::ID CC = CLI.CallConv;
3030  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3031  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3032 
3033  // Get a count of how many bytes are to be pushed on the stack.
3034  NumBytes = CCInfo.getNextStackOffset();
3035 
3036  // Issue CALLSEQ_START
3037  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3039  .addImm(NumBytes).addImm(0);
3040 
3041  // Process the args.
3042  for (CCValAssign &VA : ArgLocs) {
3043  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3044  MVT ArgVT = OutVTs[VA.getValNo()];
3045 
3046  unsigned ArgReg = getRegForValue(ArgVal);
3047  if (!ArgReg)
3048  return false;
3049 
3050  // Handle arg promotion: SExt, ZExt, AExt.
3051  switch (VA.getLocInfo()) {
3052  case CCValAssign::Full:
3053  break;
3054  case CCValAssign::SExt: {
3055  MVT DestVT = VA.getLocVT();
3056  MVT SrcVT = ArgVT;
3057  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3058  if (!ArgReg)
3059  return false;
3060  break;
3061  }
3062  case CCValAssign::AExt:
3063  // Intentional fall-through.
3064  case CCValAssign::ZExt: {
3065  MVT DestVT = VA.getLocVT();
3066  MVT SrcVT = ArgVT;
3067  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3068  if (!ArgReg)
3069  return false;
3070  break;
3071  }
3072  default:
3073  llvm_unreachable("Unknown arg promotion!");
3074  }
3075 
3076  // Now copy/store arg to correct locations.
3077  if (VA.isRegLoc() && !VA.needsCustom()) {
3078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3080  CLI.OutRegs.push_back(VA.getLocReg());
3081  } else if (VA.needsCustom()) {
3082  // FIXME: Handle custom args.
3083  return false;
3084  } else {
3085  assert(VA.isMemLoc() && "Assuming store on stack.");
3086 
3087  // Don't emit stores for undef values.
3088  if (isa<UndefValue>(ArgVal))
3089  continue;
3090 
3091  // Need to store on the stack.
3092  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3093 
3094  unsigned BEAlign = 0;
3095  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3096  BEAlign = 8 - ArgSize;
3097 
3098  Address Addr;
3099  Addr.setKind(Address::RegBase);
3100  Addr.setReg(AArch64::SP);
3101  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3102 
3103  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3104  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3105  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3106  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3107 
3108  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3109  return false;
3110  }
3111  }
3112  return true;
3113 }
3114 
3115 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3116  unsigned NumBytes) {
3117  CallingConv::ID CC = CLI.CallConv;
3118 
3119  // Issue CALLSEQ_END
3120  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3121  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3122  .addImm(NumBytes).addImm(0);
3123 
3124  // Now the return value.
3125  if (RetVT != MVT::isVoid) {
3127  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3128  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3129 
3130  // Only handle a single return value.
3131  if (RVLocs.size() != 1)
3132  return false;
3133 
3134  // Copy all of the result registers out of their specified physreg.
3135  MVT CopyVT = RVLocs[0].getValVT();
3136 
3137  // TODO: Handle big-endian results
3138  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3139  return false;
3140 
3141  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3142  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3143  TII.get(TargetOpcode::COPY), ResultReg)
3144  .addReg(RVLocs[0].getLocReg());
3145  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3146 
3147  CLI.ResultReg = ResultReg;
3148  CLI.NumResultRegs = 1;
3149  }
3150 
3151  return true;
3152 }
3153 
3154 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155  CallingConv::ID CC = CLI.CallConv;
3156  bool IsTailCall = CLI.IsTailCall;
3157  bool IsVarArg = CLI.IsVarArg;
3158  const Value *Callee = CLI.Callee;
3159  MCSymbol *Symbol = CLI.Symbol;
3160 
3161  if (!Callee && !Symbol)
3162  return false;
3163 
3164  // Allow SelectionDAG isel to handle tail calls.
3165  if (IsTailCall)
3166  return false;
3167 
3168  CodeModel::Model CM = TM.getCodeModel();
3169  // Only support the small-addressing and large code models.
3170  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171  return false;
3172 
3173  // FIXME: Add large code model support for ELF.
3174  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175  return false;
3176 
3177  // Let SDISel handle vararg functions.
3178  if (IsVarArg)
3179  return false;
3180 
3181  // FIXME: Only handle *simple* calls for now.
3182  MVT RetVT;
3183  if (CLI.RetTy->isVoidTy())
3184  RetVT = MVT::isVoid;
3185  else if (!isTypeLegal(CLI.RetTy, RetVT))
3186  return false;
3187 
3188  for (auto Flag : CLI.OutFlags)
3189  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190  Flag.isSwiftSelf() || Flag.isSwiftError())
3191  return false;
3192 
3193  // Set up the argument vectors.
3194  SmallVector<MVT, 16> OutVTs;
3195  OutVTs.reserve(CLI.OutVals.size());
3196 
3197  for (auto *Val : CLI.OutVals) {
3198  MVT VT;
3199  if (!isTypeLegal(Val->getType(), VT) &&
3200  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201  return false;
3202 
3203  // We don't handle vector parameters yet.
3204  if (VT.isVector() || VT.getSizeInBits() > 64)
3205  return false;
3206 
3207  OutVTs.push_back(VT);
3208  }
3209 
3210  Address Addr;
3211  if (Callee && !computeCallAddress(Callee, Addr))
3212  return false;
3213 
3214  // Handle the arguments now that we've gotten them.
3215  unsigned NumBytes;
3216  if (!processCallArgs(CLI, OutVTs, NumBytes))
3217  return false;
3218 
3219  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3220  if (RegInfo->isAnyArgRegReserved(*MF))
3221  RegInfo->emitReservedArgRegCallError(*MF);
3222 
3223  // Issue the call.
3224  MachineInstrBuilder MIB;
3225  if (Subtarget->useSmallAddressing()) {
3226  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3227  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3228  if (Symbol)
3229  MIB.addSym(Symbol, 0);
3230  else if (Addr.getGlobalValue())
3231  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3232  else if (Addr.getReg()) {
3233  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3234  MIB.addReg(Reg);
3235  } else
3236  return false;
3237  } else {
3238  unsigned CallReg = 0;
3239  if (Symbol) {
3240  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3241  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3242  ADRPReg)
3243  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3244 
3245  CallReg = createResultReg(&AArch64::GPR64RegClass);
3246  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3247  TII.get(AArch64::LDRXui), CallReg)
3248  .addReg(ADRPReg)
3249  .addSym(Symbol,
3251  } else if (Addr.getGlobalValue())
3252  CallReg = materializeGV(Addr.getGlobalValue());
3253  else if (Addr.getReg())
3254  CallReg = Addr.getReg();
3255 
3256  if (!CallReg)
3257  return false;
3258 
3259  const MCInstrDesc &II = TII.get(AArch64::BLR);
3260  CallReg = constrainOperandRegClass(II, CallReg, 0);
3261  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3262  }
3263 
3264  // Add implicit physical register uses to the call.
3265  for (auto Reg : CLI.OutRegs)
3266  MIB.addReg(Reg, RegState::Implicit);
3267 
3268  // Add a register mask with the call-preserved registers.
3269  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3270  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3271 
3272  CLI.Call = MIB;
3273 
3274  // Finish off the call including any return values.
3275  return finishCall(CLI, RetVT, NumBytes);
3276 }
3277 
3278 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3279  if (Alignment)
3280  return Len / Alignment <= 4;
3281  else
3282  return Len < 32;
3283 }
3284 
3285 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3286  uint64_t Len, unsigned Alignment) {
3287  // Make sure we don't bloat code by inlining very large memcpy's.
3288  if (!isMemCpySmall(Len, Alignment))
3289  return false;
3290 
3291  int64_t UnscaledOffset = 0;
3292  Address OrigDest = Dest;
3293  Address OrigSrc = Src;
3294 
3295  while (Len) {
3296  MVT VT;
3297  if (!Alignment || Alignment >= 8) {
3298  if (Len >= 8)
3299  VT = MVT::i64;
3300  else if (Len >= 4)
3301  VT = MVT::i32;
3302  else if (Len >= 2)
3303  VT = MVT::i16;
3304  else {
3305  VT = MVT::i8;
3306  }
3307  } else {
3308  // Bound based on alignment.
3309  if (Len >= 4 && Alignment == 4)
3310  VT = MVT::i32;
3311  else if (Len >= 2 && Alignment == 2)
3312  VT = MVT::i16;
3313  else {
3314  VT = MVT::i8;
3315  }
3316  }
3317 
3318  unsigned ResultReg = emitLoad(VT, VT, Src);
3319  if (!ResultReg)
3320  return false;
3321 
3322  if (!emitStore(VT, ResultReg, Dest))
3323  return false;
3324 
3325  int64_t Size = VT.getSizeInBits() / 8;
3326  Len -= Size;
3327  UnscaledOffset += Size;
3328 
3329  // We need to recompute the unscaled offset for each iteration.
3330  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3331  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3332  }
3333 
3334  return true;
3335 }
3336 
3337 /// Check if it is possible to fold the condition from the XALU intrinsic
3338 /// into the user. The condition code will only be updated on success.
3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3340  const Instruction *I,
3341  const Value *Cond) {
3342  if (!isa<ExtractValueInst>(Cond))
3343  return false;
3344 
3345  const auto *EV = cast<ExtractValueInst>(Cond);
3346  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3347  return false;
3348 
3349  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3350  MVT RetVT;
3351  const Function *Callee = II->getCalledFunction();
3352  Type *RetTy =
3353  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3354  if (!isTypeLegal(RetTy, RetVT))
3355  return false;
3356 
3357  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3358  return false;
3359 
3360  const Value *LHS = II->getArgOperand(0);
3361  const Value *RHS = II->getArgOperand(1);
3362 
3363  // Canonicalize immediate to the RHS.
3364  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3365  isCommutativeIntrinsic(II))
3366  std::swap(LHS, RHS);
3367 
3368  // Simplify multiplies.
3369  Intrinsic::ID IID = II->getIntrinsicID();
3370  switch (IID) {
3371  default:
3372  break;
3373  case Intrinsic::smul_with_overflow:
3374  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375  if (C->getValue() == 2)
3376  IID = Intrinsic::sadd_with_overflow;
3377  break;
3378  case Intrinsic::umul_with_overflow:
3379  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3380  if (C->getValue() == 2)
3381  IID = Intrinsic::uadd_with_overflow;
3382  break;
3383  }
3384 
3385  AArch64CC::CondCode TmpCC;
3386  switch (IID) {
3387  default:
3388  return false;
3389  case Intrinsic::sadd_with_overflow:
3390  case Intrinsic::ssub_with_overflow:
3391  TmpCC = AArch64CC::VS;
3392  break;
3393  case Intrinsic::uadd_with_overflow:
3394  TmpCC = AArch64CC::HS;
3395  break;
3396  case Intrinsic::usub_with_overflow:
3397  TmpCC = AArch64CC::LO;
3398  break;
3399  case Intrinsic::smul_with_overflow:
3400  case Intrinsic::umul_with_overflow:
3401  TmpCC = AArch64CC::NE;
3402  break;
3403  }
3404 
3405  // Check if both instructions are in the same basic block.
3406  if (!isValueAvailable(II))
3407  return false;
3408 
3409  // Make sure nothing is in the way
3410  BasicBlock::const_iterator Start(I);
3412  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3413  // We only expect extractvalue instructions between the intrinsic and the
3414  // instruction to be selected.
3415  if (!isa<ExtractValueInst>(Itr))
3416  return false;
3417 
3418  // Check that the extractvalue operand comes from the intrinsic.
3419  const auto *EVI = cast<ExtractValueInst>(Itr);
3420  if (EVI->getAggregateOperand() != II)
3421  return false;
3422  }
3423 
3424  CC = TmpCC;
3425  return true;
3426 }
3427 
3428 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3429  // FIXME: Handle more intrinsics.
3430  switch (II->getIntrinsicID()) {
3431  default: return false;
3432  case Intrinsic::frameaddress: {
3433  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3434  MFI.setFrameAddressIsTaken(true);
3435 
3436  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3437  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3438  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3439  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3440  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3441  // Recursively load frame address
3442  // ldr x0, [fp]
3443  // ldr x0, [x0]
3444  // ldr x0, [x0]
3445  // ...
3446  unsigned DestReg;
3447  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3448  while (Depth--) {
3449  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3450  SrcReg, /*IsKill=*/true, 0);
3451  assert(DestReg && "Unexpected LDR instruction emission failure.");
3452  SrcReg = DestReg;
3453  }
3454 
3455  updateValueMap(II, SrcReg);
3456  return true;
3457  }
3458  case Intrinsic::sponentry: {
3459  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3460 
3461  // SP = FP + Fixed Object + 16
3462  int FI = MFI.CreateFixedObject(4, 0, false);
3463  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3464  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3465  TII.get(AArch64::ADDXri), ResultReg)
3466  .addFrameIndex(FI)
3467  .addImm(0)
3468  .addImm(0);
3469 
3470  updateValueMap(II, ResultReg);
3471  return true;
3472  }
3473  case Intrinsic::memcpy:
3474  case Intrinsic::memmove: {
3475  const auto *MTI = cast<MemTransferInst>(II);
3476  // Don't handle volatile.
3477  if (MTI->isVolatile())
3478  return false;
3479 
3480  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3481  // we would emit dead code because we don't currently handle memmoves.
3482  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3483  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3484  // Small memcpy's are common enough that we want to do them without a call
3485  // if possible.
3486  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3487  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3488  MTI->getSourceAlignment());
3489  if (isMemCpySmall(Len, Alignment)) {
3490  Address Dest, Src;
3491  if (!computeAddress(MTI->getRawDest(), Dest) ||
3492  !computeAddress(MTI->getRawSource(), Src))
3493  return false;
3494  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3495  return true;
3496  }
3497  }
3498 
3499  if (!MTI->getLength()->getType()->isIntegerTy(64))
3500  return false;
3501 
3502  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3503  // Fast instruction selection doesn't support the special
3504  // address spaces.
3505  return false;
3506 
3507  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3508  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3509  }
3510  case Intrinsic::memset: {
3511  const MemSetInst *MSI = cast<MemSetInst>(II);
3512  // Don't handle volatile.
3513  if (MSI->isVolatile())
3514  return false;
3515 
3516  if (!MSI->getLength()->getType()->isIntegerTy(64))
3517  return false;
3518 
3519  if (MSI->getDestAddressSpace() > 255)
3520  // Fast instruction selection doesn't support the special
3521  // address spaces.
3522  return false;
3523 
3524  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3525  }
3526  case Intrinsic::sin:
3527  case Intrinsic::cos:
3528  case Intrinsic::pow: {
3529  MVT RetVT;
3530  if (!isTypeLegal(II->getType(), RetVT))
3531  return false;
3532 
3533  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3534  return false;
3535 
3536  static const RTLIB::Libcall LibCallTable[3][2] = {
3537  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3538  { RTLIB::COS_F32, RTLIB::COS_F64 },
3539  { RTLIB::POW_F32, RTLIB::POW_F64 }
3540  };
3541  RTLIB::Libcall LC;
3542  bool Is64Bit = RetVT == MVT::f64;
3543  switch (II->getIntrinsicID()) {
3544  default:
3545  llvm_unreachable("Unexpected intrinsic.");
3546  case Intrinsic::sin:
3547  LC = LibCallTable[0][Is64Bit];
3548  break;
3549  case Intrinsic::cos:
3550  LC = LibCallTable[1][Is64Bit];
3551  break;
3552  case Intrinsic::pow:
3553  LC = LibCallTable[2][Is64Bit];
3554  break;
3555  }
3556 
3557  ArgListTy Args;
3558  Args.reserve(II->getNumArgOperands());
3559 
3560  // Populate the argument list.
3561  for (auto &Arg : II->arg_operands()) {
3562  ArgListEntry Entry;
3563  Entry.Val = Arg;
3564  Entry.Ty = Arg->getType();
3565  Args.push_back(Entry);
3566  }
3567 
3568  CallLoweringInfo CLI;
3569  MCContext &Ctx = MF->getContext();
3570  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3571  TLI.getLibcallName(LC), std::move(Args));
3572  if (!lowerCallTo(CLI))
3573  return false;
3574  updateValueMap(II, CLI.ResultReg);
3575  return true;
3576  }
3577  case Intrinsic::fabs: {
3578  MVT VT;
3579  if (!isTypeLegal(II->getType(), VT))
3580  return false;
3581 
3582  unsigned Opc;
3583  switch (VT.SimpleTy) {
3584  default:
3585  return false;
3586  case MVT::f32:
3587  Opc = AArch64::FABSSr;
3588  break;
3589  case MVT::f64:
3590  Opc = AArch64::FABSDr;
3591  break;
3592  }
3593  unsigned SrcReg = getRegForValue(II->getOperand(0));
3594  if (!SrcReg)
3595  return false;
3596  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3597  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3598  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3599  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3600  updateValueMap(II, ResultReg);
3601  return true;
3602  }
3603  case Intrinsic::trap:
3604  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3605  .addImm(1);
3606  return true;
3607 
3608  case Intrinsic::sqrt: {
3609  Type *RetTy = II->getCalledFunction()->getReturnType();
3610 
3611  MVT VT;
3612  if (!isTypeLegal(RetTy, VT))
3613  return false;
3614 
3615  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3616  if (!Op0Reg)
3617  return false;
3618  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3619 
3620  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3621  if (!ResultReg)
3622  return false;
3623 
3624  updateValueMap(II, ResultReg);
3625  return true;
3626  }
3627  case Intrinsic::sadd_with_overflow:
3628  case Intrinsic::uadd_with_overflow:
3629  case Intrinsic::ssub_with_overflow:
3630  case Intrinsic::usub_with_overflow:
3631  case Intrinsic::smul_with_overflow:
3632  case Intrinsic::umul_with_overflow: {
3633  // This implements the basic lowering of the xalu with overflow intrinsics.
3634  const Function *Callee = II->getCalledFunction();
3635  auto *Ty = cast<StructType>(Callee->getReturnType());
3636  Type *RetTy = Ty->getTypeAtIndex(0U);
3637 
3638  MVT VT;
3639  if (!isTypeLegal(RetTy, VT))
3640  return false;
3641 
3642  if (VT != MVT::i32 && VT != MVT::i64)
3643  return false;
3644 
3645  const Value *LHS = II->getArgOperand(0);
3646  const Value *RHS = II->getArgOperand(1);
3647  // Canonicalize immediate to the RHS.
3648  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3649  isCommutativeIntrinsic(II))
3650  std::swap(LHS, RHS);
3651 
3652  // Simplify multiplies.
3653  Intrinsic::ID IID = II->getIntrinsicID();
3654  switch (IID) {
3655  default:
3656  break;
3657  case Intrinsic::smul_with_overflow:
3658  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3659  if (C->getValue() == 2) {
3660  IID = Intrinsic::sadd_with_overflow;
3661  RHS = LHS;
3662  }
3663  break;
3664  case Intrinsic::umul_with_overflow:
3665  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3666  if (C->getValue() == 2) {
3667  IID = Intrinsic::uadd_with_overflow;
3668  RHS = LHS;
3669  }
3670  break;
3671  }
3672 
3673  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3675  switch (IID) {
3676  default: llvm_unreachable("Unexpected intrinsic!");
3677  case Intrinsic::sadd_with_overflow:
3678  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3679  CC = AArch64CC::VS;
3680  break;
3681  case Intrinsic::uadd_with_overflow:
3682  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3683  CC = AArch64CC::HS;
3684  break;
3685  case Intrinsic::ssub_with_overflow:
3686  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3687  CC = AArch64CC::VS;
3688  break;
3689  case Intrinsic::usub_with_overflow:
3690  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3691  CC = AArch64CC::LO;
3692  break;
3693  case Intrinsic::smul_with_overflow: {
3694  CC = AArch64CC::NE;
3695  unsigned LHSReg = getRegForValue(LHS);
3696  if (!LHSReg)
3697  return false;
3698  bool LHSIsKill = hasTrivialKill(LHS);
3699 
3700  unsigned RHSReg = getRegForValue(RHS);
3701  if (!RHSReg)
3702  return false;
3703  bool RHSIsKill = hasTrivialKill(RHS);
3704 
3705  if (VT == MVT::i32) {
3706  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3707  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3708  /*IsKill=*/false, 32);
3709  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3710  AArch64::sub_32);
3711  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3712  AArch64::sub_32);
3713  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3714  AArch64_AM::ASR, 31, /*WantResult=*/false);
3715  } else {
3716  assert(VT == MVT::i64 && "Unexpected value type.");
3717  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3718  // reused in the next instruction.
3719  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3720  /*IsKill=*/false);
3721  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3722  RHSReg, RHSIsKill);
3723  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3724  AArch64_AM::ASR, 63, /*WantResult=*/false);
3725  }
3726  break;
3727  }
3728  case Intrinsic::umul_with_overflow: {
3729  CC = AArch64CC::NE;
3730  unsigned LHSReg = getRegForValue(LHS);
3731  if (!LHSReg)
3732  return false;
3733  bool LHSIsKill = hasTrivialKill(LHS);
3734 
3735  unsigned RHSReg = getRegForValue(RHS);
3736  if (!RHSReg)
3737  return false;
3738  bool RHSIsKill = hasTrivialKill(RHS);
3739 
3740  if (VT == MVT::i32) {
3741  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3742  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3743  /*IsKill=*/false, AArch64_AM::LSR, 32,
3744  /*WantResult=*/false);
3745  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3746  AArch64::sub_32);
3747  } else {
3748  assert(VT == MVT::i64 && "Unexpected value type.");
3749  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3750  // reused in the next instruction.
3751  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3752  /*IsKill=*/false);
3753  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3754  RHSReg, RHSIsKill);
3755  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3756  /*IsKill=*/false, /*WantResult=*/false);
3757  }
3758  break;
3759  }
3760  }
3761 
3762  if (MulReg) {
3763  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3764  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3765  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3766  }
3767 
3768  if (!ResultReg1)
3769  return false;
3770 
3771  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3772  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3773  /*IsKill=*/true, getInvertedCondCode(CC));
3774  (void)ResultReg2;
3775  assert((ResultReg1 + 1) == ResultReg2 &&
3776  "Nonconsecutive result registers.");
3777  updateValueMap(II, ResultReg1, 2);
3778  return true;
3779  }
3780  }
3781  return false;
3782 }
3783 
3784 bool AArch64FastISel::selectRet(const Instruction *I) {
3785  const ReturnInst *Ret = cast<ReturnInst>(I);
3786  const Function &F = *I->getParent()->getParent();
3787 
3788  if (!FuncInfo.CanLowerReturn)
3789  return false;
3790 
3791  if (F.isVarArg())
3792  return false;
3793 
3794  if (TLI.supportSwiftError() &&
3795  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3796  return false;
3797 
3798  if (TLI.supportSplitCSR(FuncInfo.MF))
3799  return false;
3800 
3801  // Build a list of return value registers.
3802  SmallVector<unsigned, 4> RetRegs;
3803 
3804  if (Ret->getNumOperands() > 0) {
3805  CallingConv::ID CC = F.getCallingConv();
3807  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3808 
3809  // Analyze operands of the call, assigning locations to each operand.
3811  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3814  CCInfo.AnalyzeReturn(Outs, RetCC);
3815 
3816  // Only handle a single return value for now.
3817  if (ValLocs.size() != 1)
3818  return false;
3819 
3820  CCValAssign &VA = ValLocs[0];
3821  const Value *RV = Ret->getOperand(0);
3822 
3823  // Don't bother handling odd stuff for now.
3824  if ((VA.getLocInfo() != CCValAssign::Full) &&
3825  (VA.getLocInfo() != CCValAssign::BCvt))
3826  return false;
3827 
3828  // Only handle register returns for now.
3829  if (!VA.isRegLoc())
3830  return false;
3831 
3832  unsigned Reg = getRegForValue(RV);
3833  if (Reg == 0)
3834  return false;
3835 
3836  unsigned SrcReg = Reg + VA.getValNo();
3837  unsigned DestReg = VA.getLocReg();
3838  // Avoid a cross-class copy. This is very unlikely.
3839  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3840  return false;
3841 
3842  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3843  if (!RVEVT.isSimple())
3844  return false;
3845 
3846  // Vectors (of > 1 lane) in big endian need tricky handling.
3847  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3848  !Subtarget->isLittleEndian())
3849  return false;
3850 
3851  MVT RVVT = RVEVT.getSimpleVT();
3852  if (RVVT == MVT::f128)
3853  return false;
3854 
3855  MVT DestVT = VA.getValVT();
3856  // Special handling for extended integers.
3857  if (RVVT != DestVT) {
3858  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3859  return false;
3860 
3861  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3862  return false;
3863 
3864  bool IsZExt = Outs[0].Flags.isZExt();
3865  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3866  if (SrcReg == 0)
3867  return false;
3868  }
3869 
3870  // Make the copy.
3871  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3872  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3873 
3874  // Add register to return instruction.
3875  RetRegs.push_back(VA.getLocReg());
3876  }
3877 
3878  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3879  TII.get(AArch64::RET_ReallyLR));
3880  for (unsigned RetReg : RetRegs)
3881  MIB.addReg(RetReg, RegState::Implicit);
3882  return true;
3883 }
3884 
3885 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3886  Type *DestTy = I->getType();
3887  Value *Op = I->getOperand(0);
3888  Type *SrcTy = Op->getType();
3889 
3890  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3891  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3892  if (!SrcEVT.isSimple())
3893  return false;
3894  if (!DestEVT.isSimple())
3895  return false;
3896 
3897  MVT SrcVT = SrcEVT.getSimpleVT();
3898  MVT DestVT = DestEVT.getSimpleVT();
3899 
3900  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3901  SrcVT != MVT::i8)
3902  return false;
3903  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3904  DestVT != MVT::i1)
3905  return false;
3906 
3907  unsigned SrcReg = getRegForValue(Op);
3908  if (!SrcReg)
3909  return false;
3910  bool SrcIsKill = hasTrivialKill(Op);
3911 
3912  // If we're truncating from i64 to a smaller non-legal type then generate an
3913  // AND. Otherwise, we know the high bits are undefined and a truncate only
3914  // generate a COPY. We cannot mark the source register also as result
3915  // register, because this can incorrectly transfer the kill flag onto the
3916  // source register.
3917  unsigned ResultReg;
3918  if (SrcVT == MVT::i64) {
3919  uint64_t Mask = 0;
3920  switch (DestVT.SimpleTy) {
3921  default:
3922  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3923  return false;
3924  case MVT::i1:
3925  Mask = 0x1;
3926  break;
3927  case MVT::i8:
3928  Mask = 0xff;
3929  break;
3930  case MVT::i16:
3931  Mask = 0xffff;
3932  break;
3933  }
3934  // Issue an extract_subreg to get the lower 32-bits.
3935  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3936  AArch64::sub_32);
3937  // Create the AND instruction which performs the actual truncation.
3938  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3939  assert(ResultReg && "Unexpected AND instruction emission failure.");
3940  } else {
3941  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3942  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3943  TII.get(TargetOpcode::COPY), ResultReg)
3944  .addReg(SrcReg, getKillRegState(SrcIsKill));
3945  }
3946 
3947  updateValueMap(I, ResultReg);
3948  return true;
3949 }
3950 
3951 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3952  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3953  DestVT == MVT::i64) &&
3954  "Unexpected value type.");
3955  // Handle i8 and i16 as i32.
3956  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3957  DestVT = MVT::i32;
3958 
3959  if (IsZExt) {
3960  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3961  assert(ResultReg && "Unexpected AND instruction emission failure.");
3962  if (DestVT == MVT::i64) {
3963  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3964  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3965  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3966  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3967  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3968  .addImm(0)
3969  .addReg(ResultReg)
3970  .addImm(AArch64::sub_32);
3971  ResultReg = Reg64;
3972  }
3973  return ResultReg;
3974  } else {
3975  if (DestVT == MVT::i64) {
3976  // FIXME: We're SExt i1 to i64.
3977  return 0;
3978  }
3979  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3980  /*TODO:IsKill=*/false, 0, 0);
3981  }
3982 }
3983 
3984 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3985  unsigned Op1, bool Op1IsKill) {
3986  unsigned Opc, ZReg;
3987  switch (RetVT.SimpleTy) {
3988  default: return 0;
3989  case MVT::i8:
3990  case MVT::i16:
3991  case MVT::i32:
3992  RetVT = MVT::i32;
3993  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3994  case MVT::i64:
3995  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3996  }
3997 
3998  const TargetRegisterClass *RC =
3999  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4000  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4001  /*IsKill=*/ZReg, true);
4002 }
4003 
4004 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4005  unsigned Op1, bool Op1IsKill) {
4006  if (RetVT != MVT::i64)
4007  return 0;
4008 
4009  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4010  Op0, Op0IsKill, Op1, Op1IsKill,
4011  AArch64::XZR, /*IsKill=*/true);
4012 }
4013 
4014 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4015  unsigned Op1, bool Op1IsKill) {
4016  if (RetVT != MVT::i64)
4017  return 0;
4018 
4019  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4020  Op0, Op0IsKill, Op1, Op1IsKill,
4021  AArch64::XZR, /*IsKill=*/true);
4022 }
4023 
4024 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4025  unsigned Op1Reg, bool Op1IsKill) {
4026  unsigned Opc = 0;
4027  bool NeedTrunc = false;
4028  uint64_t Mask = 0;
4029  switch (RetVT.SimpleTy) {
4030  default: return 0;
4031  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4032  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4033  case MVT::i32: Opc = AArch64::LSLVWr; break;
4034  case MVT::i64: Opc = AArch64::LSLVXr; break;
4035  }
4036 
4037  const TargetRegisterClass *RC =
4038  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4039  if (NeedTrunc) {
4040  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4041  Op1IsKill = true;
4042  }
4043  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4044  Op1IsKill);
4045  if (NeedTrunc)
4046  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4047  return ResultReg;
4048 }
4049 
4050 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4051  bool Op0IsKill, uint64_t Shift,
4052  bool IsZExt) {
4053  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4054  "Unexpected source/return type pair.");
4055  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4056  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4057  "Unexpected source value type.");
4058  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4059  RetVT == MVT::i64) && "Unexpected return value type.");
4060 
4061  bool Is64Bit = (RetVT == MVT::i64);
4062  unsigned RegSize = Is64Bit ? 64 : 32;
4063  unsigned DstBits = RetVT.getSizeInBits();
4064  unsigned SrcBits = SrcVT.getSizeInBits();
4065  const TargetRegisterClass *RC =
4066  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4067 
4068  // Just emit a copy for "zero" shifts.
4069  if (Shift == 0) {
4070  if (RetVT == SrcVT) {
4071  unsigned ResultReg = createResultReg(RC);
4072  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4073  TII.get(TargetOpcode::COPY), ResultReg)
4074  .addReg(Op0, getKillRegState(Op0IsKill));
4075  return ResultReg;
4076  } else
4077  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4078  }
4079 
4080  // Don't deal with undefined shifts.
4081  if (Shift >= DstBits)
4082  return 0;
4083 
4084  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4085  // {S|U}BFM Wd, Wn, #r, #s
4086  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4087 
4088  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4089  // %2 = shl i16 %1, 4
4090  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4091  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4092  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4093  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4094 
4095  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4096  // %2 = shl i16 %1, 8
4097  // Wd<32+7-24,32-24> = Wn<7:0>
4098  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4099  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4100  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4101 
4102  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4103  // %2 = shl i16 %1, 12
4104  // Wd<32+3-20,32-20> = Wn<3:0>
4105  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4106  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4107  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4108 
4109  unsigned ImmR = RegSize - Shift;
4110  // Limit the width to the length of the source type.
4111  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4112  static const unsigned OpcTable[2][2] = {
4113  {AArch64::SBFMWri, AArch64::SBFMXri},
4114  {AArch64::UBFMWri, AArch64::UBFMXri}
4115  };
4116  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4117  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4118  unsigned TmpReg = MRI.createVirtualRegister(RC);
4119  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4120  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4121  .addImm(0)
4122  .addReg(Op0, getKillRegState(Op0IsKill))
4123  .addImm(AArch64::sub_32);
4124  Op0 = TmpReg;
4125  Op0IsKill = true;
4126  }
4127  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4128 }
4129 
4130 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4131  unsigned Op1Reg, bool Op1IsKill) {
4132  unsigned Opc = 0;
4133  bool NeedTrunc = false;
4134  uint64_t Mask = 0;
4135  switch (RetVT.SimpleTy) {
4136  default: return 0;
4137  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4138  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4139  case MVT::i32: Opc = AArch64::LSRVWr; break;
4140  case MVT::i64: Opc = AArch64::LSRVXr; break;
4141  }
4142 
4143  const TargetRegisterClass *RC =
4144  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4145  if (NeedTrunc) {
4146  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4147  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4148  Op0IsKill = Op1IsKill = true;
4149  }
4150  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4151  Op1IsKill);
4152  if (NeedTrunc)
4153  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4154  return ResultReg;
4155 }
4156 
4157 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4158  bool Op0IsKill, uint64_t Shift,
4159  bool IsZExt) {
4160  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4161  "Unexpected source/return type pair.");
4162  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4163  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4164  "Unexpected source value type.");
4165  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4166  RetVT == MVT::i64) && "Unexpected return value type.");
4167 
4168  bool Is64Bit = (RetVT == MVT::i64);
4169  unsigned RegSize = Is64Bit ? 64 : 32;
4170  unsigned DstBits = RetVT.getSizeInBits();
4171  unsigned SrcBits = SrcVT.getSizeInBits();
4172  const TargetRegisterClass *RC =
4173  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4174 
4175  // Just emit a copy for "zero" shifts.
4176  if (Shift == 0) {
4177  if (RetVT == SrcVT) {
4178  unsigned ResultReg = createResultReg(RC);
4179  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4180  TII.get(TargetOpcode::COPY), ResultReg)
4181  .addReg(Op0, getKillRegState(Op0IsKill));
4182  return ResultReg;
4183  } else
4184  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4185  }
4186 
4187  // Don't deal with undefined shifts.
4188  if (Shift >= DstBits)
4189  return 0;
4190 
4191  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4192  // {S|U}BFM Wd, Wn, #r, #s
4193  // Wd<s-r:0> = Wn<s:r> when r <= s
4194 
4195  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4196  // %2 = lshr i16 %1, 4
4197  // Wd<7-4:0> = Wn<7:4>
4198  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4199  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4200  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4201 
4202  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4203  // %2 = lshr i16 %1, 8
4204  // Wd<7-7,0> = Wn<7:7>
4205  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4206  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4207  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4208 
4209  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4210  // %2 = lshr i16 %1, 12
4211  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4212  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4213  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4214  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4215 
4216  if (Shift >= SrcBits && IsZExt)
4217  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4218 
4219  // It is not possible to fold a sign-extend into the LShr instruction. In this
4220  // case emit a sign-extend.
4221  if (!IsZExt) {
4222  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4223  if (!Op0)
4224  return 0;
4225  Op0IsKill = true;
4226  SrcVT = RetVT;
4227  SrcBits = SrcVT.getSizeInBits();
4228  IsZExt = true;
4229  }
4230 
4231  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4232  unsigned ImmS = SrcBits - 1;
4233  static const unsigned OpcTable[2][2] = {
4234  {AArch64::SBFMWri, AArch64::SBFMXri},
4235  {AArch64::UBFMWri, AArch64::UBFMXri}
4236  };
4237  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4238  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4239  unsigned TmpReg = MRI.createVirtualRegister(RC);
4240  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4241  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4242  .addImm(0)
4243  .addReg(Op0, getKillRegState(Op0IsKill))
4244  .addImm(AArch64::sub_32);
4245  Op0 = TmpReg;
4246  Op0IsKill = true;
4247  }
4248  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4249 }
4250 
4251 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4252  unsigned Op1Reg, bool Op1IsKill) {
4253  unsigned Opc = 0;
4254  bool NeedTrunc = false;
4255  uint64_t Mask = 0;
4256  switch (RetVT.SimpleTy) {
4257  default: return 0;
4258  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4259  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4260  case MVT::i32: Opc = AArch64::ASRVWr; break;
4261  case MVT::i64: Opc = AArch64::ASRVXr; break;
4262  }
4263 
4264  const TargetRegisterClass *RC =
4265  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4266  if (NeedTrunc) {
4267  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4268  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4269  Op0IsKill = Op1IsKill = true;
4270  }
4271  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4272  Op1IsKill);
4273  if (NeedTrunc)
4274  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4275  return ResultReg;
4276 }
4277 
4278 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4279  bool Op0IsKill, uint64_t Shift,
4280  bool IsZExt) {
4281  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4282  "Unexpected source/return type pair.");
4283  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4284  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4285  "Unexpected source value type.");
4286  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4287  RetVT == MVT::i64) && "Unexpected return value type.");
4288 
4289  bool Is64Bit = (RetVT == MVT::i64);
4290  unsigned RegSize = Is64Bit ? 64 : 32;
4291  unsigned DstBits = RetVT.getSizeInBits();
4292  unsigned SrcBits = SrcVT.getSizeInBits();
4293  const TargetRegisterClass *RC =
4294  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4295 
4296  // Just emit a copy for "zero" shifts.
4297  if (Shift == 0) {
4298  if (RetVT == SrcVT) {
4299  unsigned ResultReg = createResultReg(RC);
4300  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4301  TII.get(TargetOpcode::COPY), ResultReg)
4302  .addReg(Op0, getKillRegState(Op0IsKill));
4303  return ResultReg;
4304  } else
4305  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4306  }
4307 
4308  // Don't deal with undefined shifts.
4309  if (Shift >= DstBits)
4310  return 0;
4311 
4312  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4313  // {S|U}BFM Wd, Wn, #r, #s
4314  // Wd<s-r:0> = Wn<s:r> when r <= s
4315 
4316  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4317  // %2 = ashr i16 %1, 4
4318  // Wd<7-4:0> = Wn<7:4>
4319  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4320  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4321  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4322 
4323  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4324  // %2 = ashr i16 %1, 8
4325  // Wd<7-7,0> = Wn<7:7>
4326  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4327  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4328  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4329 
4330  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4331  // %2 = ashr i16 %1, 12
4332  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4333  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4334  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4335  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4336 
4337  if (Shift >= SrcBits && IsZExt)
4338  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4339 
4340  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4341  unsigned ImmS = SrcBits - 1;
4342  static const unsigned OpcTable[2][2] = {
4343  {AArch64::SBFMWri, AArch64::SBFMXri},
4344  {AArch64::UBFMWri, AArch64::UBFMXri}
4345  };
4346  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4347  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4348  unsigned TmpReg = MRI.createVirtualRegister(RC);
4349  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4350  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4351  .addImm(0)
4352  .addReg(Op0, getKillRegState(Op0IsKill))
4353  .addImm(AArch64::sub_32);
4354  Op0 = TmpReg;
4355  Op0IsKill = true;
4356  }
4357  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4358 }
4359 
4360 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4361  bool IsZExt) {
4362  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4363 
4364  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4365  // DestVT are odd things, so test to make sure that they are both types we can
4366  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4367  // bail out to SelectionDAG.
4368  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4369  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4370  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4371  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4372  return 0;
4373 
4374  unsigned Opc;
4375  unsigned Imm = 0;
4376 
4377  switch (SrcVT.SimpleTy) {
4378  default:
4379  return 0;
4380  case MVT::i1:
4381  return emiti1Ext(SrcReg, DestVT, IsZExt);
4382  case MVT::i8:
4383  if (DestVT == MVT::i64)
4384  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4385  else
4386  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4387  Imm = 7;
4388  break;
4389  case MVT::i16:
4390  if (DestVT == MVT::i64)
4391  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4392  else
4393  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4394  Imm = 15;
4395  break;
4396  case MVT::i32:
4397  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4398  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4399  Imm = 31;
4400  break;
4401  }
4402 
4403  // Handle i8 and i16 as i32.
4404  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4405  DestVT = MVT::i32;
4406  else if (DestVT == MVT::i64) {
4407  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4408  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4409  TII.get(AArch64::SUBREG_TO_REG), Src64)
4410  .addImm(0)
4411  .addReg(SrcReg)
4412  .addImm(AArch64::sub_32);
4413  SrcReg = Src64;
4414  }
4415 
4416  const TargetRegisterClass *RC =
4417  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4418  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4419 }
4420 
4421 static bool isZExtLoad(const MachineInstr *LI) {
4422  switch (LI->getOpcode()) {
4423  default:
4424  return false;
4425  case AArch64::LDURBBi:
4426  case AArch64::LDURHHi:
4427  case AArch64::LDURWi:
4428  case AArch64::LDRBBui:
4429  case AArch64::LDRHHui:
4430  case AArch64::LDRWui:
4431  case AArch64::LDRBBroX:
4432  case AArch64::LDRHHroX:
4433  case AArch64::LDRWroX:
4434  case AArch64::LDRBBroW:
4435  case AArch64::LDRHHroW:
4436  case AArch64::LDRWroW:
4437  return true;
4438  }
4439 }
4440 
4441 static bool isSExtLoad(const MachineInstr *LI) {
4442  switch (LI->getOpcode()) {
4443  default:
4444  return false;
4445  case AArch64::LDURSBWi:
4446  case AArch64::LDURSHWi:
4447  case AArch64::LDURSBXi:
4448  case AArch64::LDURSHXi:
4449  case AArch64::LDURSWi:
4450  case AArch64::LDRSBWui:
4451  case AArch64::LDRSHWui:
4452  case AArch64::LDRSBXui:
4453  case AArch64::LDRSHXui:
4454  case AArch64::LDRSWui:
4455  case AArch64::LDRSBWroX:
4456  case AArch64::LDRSHWroX:
4457  case AArch64::LDRSBXroX:
4458  case AArch64::LDRSHXroX:
4459  case AArch64::LDRSWroX:
4460  case AArch64::LDRSBWroW:
4461  case AArch64::LDRSHWroW:
4462  case AArch64::LDRSBXroW:
4463  case AArch64::LDRSHXroW:
4464  case AArch64::LDRSWroW:
4465  return true;
4466  }
4467 }
4468 
4469 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4470  MVT SrcVT) {
4471  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4472  if (!LI || !LI->hasOneUse())
4473  return false;
4474 
4475  // Check if the load instruction has already been selected.
4476  unsigned Reg = lookUpRegForValue(LI);
4477  if (!Reg)
4478  return false;
4479 
4480  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4481  if (!MI)
4482  return false;
4483 
4484  // Check if the correct load instruction has been emitted - SelectionDAG might
4485  // have emitted a zero-extending load, but we need a sign-extending load.
4486  bool IsZExt = isa<ZExtInst>(I);
4487  const auto *LoadMI = MI;
4488  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4489  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4490  unsigned LoadReg = MI->getOperand(1).getReg();
4491  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4492  assert(LoadMI && "Expected valid instruction");
4493  }
4494  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4495  return false;
4496 
4497  // Nothing to be done.
4498  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4499  updateValueMap(I, Reg);
4500  return true;
4501  }
4502 
4503  if (IsZExt) {
4504  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4505  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4506  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4507  .addImm(0)
4508  .addReg(Reg, getKillRegState(true))
4509  .addImm(AArch64::sub_32);
4510  Reg = Reg64;
4511  } else {
4512  assert((MI->getOpcode() == TargetOpcode::COPY &&
4513  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4514  "Expected copy instruction");
4515  Reg = MI->getOperand(1).getReg();
4517  removeDeadCode(I, std::next(I));
4518  }
4519  updateValueMap(I, Reg);
4520  return true;
4521 }
4522 
4523 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4524  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4525  "Unexpected integer extend instruction.");
4526  MVT RetVT;
4527  MVT SrcVT;
4528  if (!isTypeSupported(I->getType(), RetVT))
4529  return false;
4530 
4531  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4532  return false;
4533 
4534  // Try to optimize already sign-/zero-extended values from load instructions.
4535  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4536  return true;
4537 
4538  unsigned SrcReg = getRegForValue(I->getOperand(0));
4539  if (!SrcReg)
4540  return false;
4541  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4542 
4543  // Try to optimize already sign-/zero-extended values from function arguments.
4544  bool IsZExt = isa<ZExtInst>(I);
4545  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4546  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4547  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4548  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4549  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4550  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4551  .addImm(0)
4552  .addReg(SrcReg, getKillRegState(SrcIsKill))
4553  .addImm(AArch64::sub_32);
4554  SrcReg = ResultReg;
4555  }
4556  // Conservatively clear all kill flags from all uses, because we are
4557  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4558  // level. The result of the instruction at IR level might have been
4559  // trivially dead, which is now not longer true.
4560  unsigned UseReg = lookUpRegForValue(I);
4561  if (UseReg)
4562  MRI.clearKillFlags(UseReg);
4563 
4564  updateValueMap(I, SrcReg);
4565  return true;
4566  }
4567  }
4568 
4569  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4570  if (!ResultReg)
4571  return false;
4572 
4573  updateValueMap(I, ResultReg);
4574  return true;
4575 }
4576 
4577 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4578  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4579  if (!DestEVT.isSimple())
4580  return false;
4581 
4582  MVT DestVT = DestEVT.getSimpleVT();
4583  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4584  return false;
4585 
4586  unsigned DivOpc;
4587  bool Is64bit = (DestVT == MVT::i64);
4588  switch (ISDOpcode) {
4589  default:
4590  return false;
4591  case ISD::SREM:
4592  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4593  break;
4594  case ISD::UREM:
4595  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4596  break;
4597  }
4598  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4599  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4600  if (!Src0Reg)
4601  return false;
4602  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4603 
4604  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4605  if (!Src1Reg)
4606  return false;
4607  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4608 
4609  const TargetRegisterClass *RC =
4610  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4611  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4612  Src1Reg, /*IsKill=*/false);
4613  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4614  // The remainder is computed as numerator - (quotient * denominator) using the
4615  // MSUB instruction.
4616  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4617  Src1Reg, Src1IsKill, Src0Reg,
4618  Src0IsKill);
4619  updateValueMap(I, ResultReg);
4620  return true;
4621 }
4622 
4623 bool AArch64FastISel::selectMul(const Instruction *I) {
4624  MVT VT;
4625  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4626  return false;
4627 
4628  if (VT.isVector())
4629  return selectBinaryOp(I, ISD::MUL);
4630 
4631  const Value *Src0 = I->getOperand(0);
4632  const Value *Src1 = I->getOperand(1);
4633  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4634  if (C->getValue().isPowerOf2())
4635  std::swap(Src0, Src1);
4636 
4637  // Try to simplify to a shift instruction.
4638  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4639  if (C->getValue().isPowerOf2()) {
4640  uint64_t ShiftVal = C->getValue().logBase2();
4641  MVT SrcVT = VT;
4642  bool IsZExt = true;
4643  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4644  if (!isIntExtFree(ZExt)) {
4645  MVT VT;
4646  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4647  SrcVT = VT;
4648  IsZExt = true;
4649  Src0 = ZExt->getOperand(0);
4650  }
4651  }
4652  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4653  if (!isIntExtFree(SExt)) {
4654  MVT VT;
4655  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4656  SrcVT = VT;
4657  IsZExt = false;
4658  Src0 = SExt->getOperand(0);
4659  }
4660  }
4661  }
4662 
4663  unsigned Src0Reg = getRegForValue(Src0);
4664  if (!Src0Reg)
4665  return false;
4666  bool Src0IsKill = hasTrivialKill(Src0);
4667 
4668  unsigned ResultReg =
4669  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4670 
4671  if (ResultReg) {
4672  updateValueMap(I, ResultReg);
4673  return true;
4674  }
4675  }
4676 
4677  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4678  if (!Src0Reg)
4679  return false;
4680  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4681 
4682  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4683  if (!Src1Reg)
4684  return false;
4685  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4686 
4687  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4688 
4689  if (!ResultReg)
4690  return false;
4691 
4692  updateValueMap(I, ResultReg);
4693  return true;
4694 }
4695 
4696 bool AArch64FastISel::selectShift(const Instruction *I) {
4697  MVT RetVT;
4698  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4699  return false;
4700 
4701  if (RetVT.isVector())
4702  return selectOperator(I, I->getOpcode());
4703 
4704  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4705  unsigned ResultReg = 0;
4706  uint64_t ShiftVal = C->getZExtValue();
4707  MVT SrcVT = RetVT;
4708  bool IsZExt = I->getOpcode() != Instruction::AShr;
4709  const Value *Op0 = I->getOperand(0);
4710  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4711  if (!isIntExtFree(ZExt)) {
4712  MVT TmpVT;
4713  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4714  SrcVT = TmpVT;
4715  IsZExt = true;
4716  Op0 = ZExt->getOperand(0);
4717  }
4718  }
4719  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4720  if (!isIntExtFree(SExt)) {
4721  MVT TmpVT;
4722  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4723  SrcVT = TmpVT;
4724  IsZExt = false;
4725  Op0 = SExt->getOperand(0);
4726  }
4727  }
4728  }
4729 
4730  unsigned Op0Reg = getRegForValue(Op0);
4731  if (!Op0Reg)
4732  return false;
4733  bool Op0IsKill = hasTrivialKill(Op0);
4734 
4735  switch (I->getOpcode()) {
4736  default: llvm_unreachable("Unexpected instruction.");
4737  case Instruction::Shl:
4738  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4739  break;
4740  case Instruction::AShr:
4741  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4742  break;
4743  case Instruction::LShr:
4744  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4745  break;
4746  }
4747  if (!ResultReg)
4748  return false;
4749 
4750  updateValueMap(I, ResultReg);
4751  return true;
4752  }
4753 
4754  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4755  if (!Op0Reg)
4756  return false;
4757  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4758 
4759  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4760  if (!Op1Reg)
4761  return false;
4762  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4763 
4764  unsigned ResultReg = 0;
4765  switch (I->getOpcode()) {
4766  default: llvm_unreachable("Unexpected instruction.");
4767  case Instruction::Shl:
4768  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4769  break;
4770  case Instruction::AShr:
4771  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4772  break;
4773  case Instruction::LShr:
4774  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4775  break;
4776  }
4777 
4778  if (!ResultReg)
4779  return false;
4780 
4781  updateValueMap(I, ResultReg);
4782  return true;
4783 }
4784 
4785 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4786  MVT RetVT, SrcVT;
4787 
4788  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4789  return false;
4790  if (!isTypeLegal(I->getType(), RetVT))
4791  return false;
4792 
4793  unsigned Opc;
4794  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4795  Opc = AArch64::FMOVWSr;
4796  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4797  Opc = AArch64::FMOVXDr;
4798  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4799  Opc = AArch64::FMOVSWr;
4800  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4801  Opc = AArch64::FMOVDXr;
4802  else
4803  return false;
4804 
4805  const TargetRegisterClass *RC = nullptr;
4806  switch (RetVT.SimpleTy) {
4807  default: llvm_unreachable("Unexpected value type.");
4808  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4809  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4810  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4811  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4812  }
4813  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4814  if (!Op0Reg)
4815  return false;
4816  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4817  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4818 
4819  if (!ResultReg)
4820  return false;
4821 
4822  updateValueMap(I, ResultReg);
4823  return true;
4824 }
4825 
4826 bool AArch64FastISel::selectFRem(const Instruction *I) {
4827  MVT RetVT;
4828  if (!isTypeLegal(I->getType(), RetVT))
4829  return false;
4830 
4831  RTLIB::Libcall LC;
4832  switch (RetVT.SimpleTy) {
4833  default:
4834  return false;
4835  case MVT::f32:
4836  LC = RTLIB::REM_F32;
4837  break;
4838  case MVT::f64:
4839  LC = RTLIB::REM_F64;
4840  break;
4841  }
4842 
4843  ArgListTy Args;
4844  Args.reserve(I->getNumOperands());
4845 
4846  // Populate the argument list.
4847  for (auto &Arg : I->operands()) {
4848  ArgListEntry Entry;
4849  Entry.Val = Arg;
4850  Entry.Ty = Arg->getType();
4851  Args.push_back(Entry);
4852  }
4853 
4854  CallLoweringInfo CLI;
4855  MCContext &Ctx = MF->getContext();
4856  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4857  TLI.getLibcallName(LC), std::move(Args));
4858  if (!lowerCallTo(CLI))
4859  return false;
4860  updateValueMap(I, CLI.ResultReg);
4861  return true;
4862 }
4863 
4864 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4865  MVT VT;
4866  if (!isTypeLegal(I->getType(), VT))
4867  return false;
4868 
4869  if (!isa<ConstantInt>(I->getOperand(1)))
4870  return selectBinaryOp(I, ISD::SDIV);
4871 
4872  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4873  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4874  !(C.isPowerOf2() || (-C).isPowerOf2()))
4875  return selectBinaryOp(I, ISD::SDIV);
4876 
4877  unsigned Lg2 = C.countTrailingZeros();
4878  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4879  if (!Src0Reg)
4880  return false;
4881  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4882 
4883  if (cast<BinaryOperator>(I)->isExact()) {
4884  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4885  if (!ResultReg)
4886  return false;
4887  updateValueMap(I, ResultReg);
4888  return true;
4889  }
4890 
4891  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4892  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4893  if (!AddReg)
4894  return false;
4895 
4896  // (Src0 < 0) ? Pow2 - 1 : 0;
4897  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4898  return false;
4899 
4900  unsigned SelectOpc;
4901  const TargetRegisterClass *RC;
4902  if (VT == MVT::i64) {
4903  SelectOpc = AArch64::CSELXr;
4904  RC = &AArch64::GPR64RegClass;
4905  } else {
4906  SelectOpc = AArch64::CSELWr;
4907  RC = &AArch64::GPR32RegClass;
4908  }
4909  unsigned SelectReg =
4910  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4911  Src0IsKill, AArch64CC::LT);
4912  if (!SelectReg)
4913  return false;
4914 
4915  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4916  // negate the result.
4917  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4918  unsigned ResultReg;
4919  if (C.isNegative())
4920  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4921  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4922  else
4923  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4924 
4925  if (!ResultReg)
4926  return false;
4927 
4928  updateValueMap(I, ResultReg);
4929  return true;
4930 }
4931 
4932 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4933 /// have to duplicate it for AArch64, because otherwise we would fail during the
4934 /// sign-extend emission.
4935 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4936  unsigned IdxN = getRegForValue(Idx);
4937  if (IdxN == 0)
4938  // Unhandled operand. Halt "fast" selection and bail.
4939  return std::pair<unsigned, bool>(0, false);
4940 
4941  bool IdxNIsKill = hasTrivialKill(Idx);
4942 
4943  // If the index is smaller or larger than intptr_t, truncate or extend it.
4944  MVT PtrVT = TLI.getPointerTy(DL);
4945  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4946  if (IdxVT.bitsLT(PtrVT)) {
4947  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4948  IdxNIsKill = true;
4949  } else if (IdxVT.bitsGT(PtrVT))
4950  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4951  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4952 }
4953 
4954 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4955 /// duplicate it for AArch64, because otherwise we would bail out even for
4956 /// simple cases. This is because the standard fastEmit functions don't cover
4957 /// MUL at all and ADD is lowered very inefficientily.
4958 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4959  unsigned N = getRegForValue(I->getOperand(0));
4960  if (!N)
4961  return false;
4962  bool NIsKill = hasTrivialKill(I->getOperand(0));
4963 
4964  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4965  // into a single N = N + TotalOffset.
4966  uint64_t TotalOffs = 0;
4967  MVT VT = TLI.getPointerTy(DL);
4968  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4969  GTI != E; ++GTI) {
4970  const Value *Idx = GTI.getOperand();
4971  if (auto *StTy = GTI.getStructTypeOrNull()) {
4972  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4973  // N = N + Offset
4974  if (Field)
4975  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4976  } else {
4977  Type *Ty = GTI.getIndexedType();
4978 
4979  // If this is a constant subscript, handle it quickly.
4980  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4981  if (CI->isZero())
4982  continue;
4983  // N = N + Offset
4984  TotalOffs +=
4985  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4986  continue;
4987  }
4988  if (TotalOffs) {
4989  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4990  if (!N)
4991  return false;
4992  NIsKill = true;
4993  TotalOffs = 0;
4994  }
4995 
4996  // N = N + Idx * ElementSize;
4997  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4998  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4999  unsigned IdxN = Pair.first;
5000  bool IdxNIsKill = Pair.second;
5001  if (!IdxN)
5002  return false;
5003 
5004  if (ElementSize != 1) {
5005  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5006  if (!C)
5007  return false;
5008  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5009  if (!IdxN)
5010  return false;
5011  IdxNIsKill = true;
5012  }
5013  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5014  if (!N)
5015  return false;
5016  }
5017  }
5018  if (TotalOffs) {
5019  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5020  if (!N)
5021  return false;
5022  }
5023  updateValueMap(I, N);
5024  return true;
5025 }
5026 
5027 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5028  assert(TM.getOptLevel() == CodeGenOpt::None &&
5029  "cmpxchg survived AtomicExpand at optlevel > -O0");
5030 
5031  auto *RetPairTy = cast<StructType>(I->getType());
5032  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5033  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5034  "cmpxchg has a non-i1 status result");
5035 
5036  MVT VT;
5037  if (!isTypeLegal(RetTy, VT))
5038  return false;
5039 
5040  const TargetRegisterClass *ResRC;
5041  unsigned Opc, CmpOpc;
5042  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5043  // extractvalue selection doesn't support that.
5044  if (VT == MVT::i32) {
5045  Opc = AArch64::CMP_SWAP_32;
5046  CmpOpc = AArch64::SUBSWrs;
5047  ResRC = &AArch64::GPR32RegClass;
5048  } else if (VT == MVT::i64) {
5049  Opc = AArch64::CMP_SWAP_64;
5050  CmpOpc = AArch64::SUBSXrs;
5051  ResRC = &AArch64::GPR64RegClass;
5052  } else {
5053  return false;
5054  }
5055 
5056  const MCInstrDesc &II = TII.get(Opc);
5057 
5058  const unsigned AddrReg = constrainOperandRegClass(
5059  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5060  const unsigned DesiredReg = constrainOperandRegClass(
5061  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5062  const unsigned NewReg = constrainOperandRegClass(
5063  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5064 
5065  const unsigned ResultReg1 = createResultReg(ResRC);
5066  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5067  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5068 
5069  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5071  .addDef(ResultReg1)
5072  .addDef(ScratchReg)
5073  .addUse(AddrReg)
5074  .addUse(DesiredReg)
5075  .addUse(NewReg);
5076 
5077  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5078  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5079  .addUse(ResultReg1)
5080  .addUse(DesiredReg)
5081  .addImm(0);
5082 
5083  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5084  .addDef(ResultReg2)
5085  .addUse(AArch64::WZR)
5086  .addUse(AArch64::WZR)
5088 
5089  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5090  updateValueMap(I, ResultReg1, 2);
5091  return true;
5092 }
5093 
5094 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5095  switch (I->getOpcode()) {
5096  default:
5097  break;
5098  case Instruction::Add:
5099  case Instruction::Sub:
5100  return selectAddSub(I);
5101  case Instruction::Mul:
5102  return selectMul(I);
5103  case Instruction::SDiv:
5104  return selectSDiv(I);
5105  case Instruction::SRem:
5106  if (!selectBinaryOp(I, ISD::SREM))
5107  return selectRem(I, ISD::SREM);
5108  return true;
5109  case Instruction::URem:
5110  if (!selectBinaryOp(I, ISD::UREM))
5111  return selectRem(I, ISD::UREM);
5112  return true;
5113  case Instruction::Shl:
5114  case Instruction::LShr:
5115  case Instruction::AShr:
5116  return selectShift(I);
5117  case Instruction::And:
5118  case Instruction::Or:
5119  case Instruction::Xor:
5120  return selectLogicalOp(I);
5121  case Instruction::Br:
5122  return selectBranch(I);
5123  case Instruction::IndirectBr:
5124  return selectIndirectBr(I);
5125  case Instruction::BitCast:
5126  if (!FastISel::selectBitCast(I))
5127  return selectBitCast(I);
5128  return true;
5129  case Instruction::FPToSI:
5130  if (!selectCast(I, ISD::FP_TO_SINT))
5131  return selectFPToInt(I, /*Signed=*/true);
5132  return true;
5133  case Instruction::FPToUI:
5134  return selectFPToInt(I, /*Signed=*/false);
5135  case Instruction::ZExt:
5136  case Instruction::SExt:
5137  return selectIntExt(I);
5138  case Instruction::Trunc:
5139  if (!selectCast(I, ISD::TRUNCATE))
5140  return selectTrunc(I);
5141  return true;
5142  case Instruction::FPExt:
5143  return selectFPExt(I);
5144  case Instruction::FPTrunc:
5145  return selectFPTrunc(I);
5146  case Instruction::SIToFP:
5147  if (!selectCast(I, ISD::SINT_TO_FP))
5148  return selectIntToFP(I, /*Signed=*/true);
5149  return true;
5150  case Instruction::UIToFP:
5151  return selectIntToFP(I, /*Signed=*/false);
5152  case Instruction::Load:
5153  return selectLoad(I);
5154  case Instruction::Store:
5155  return selectStore(I);
5156  case Instruction::FCmp:
5157  case Instruction::ICmp:
5158  return selectCmp(I);
5159  case Instruction::Select:
5160  return selectSelect(I);
5161  case Instruction::Ret:
5162  return selectRet(I);
5163  case Instruction::FRem:
5164  return selectFRem(I);
5165  case Instruction::GetElementPtr:
5166  return selectGetElementPtr(I);
5167  case Instruction::AtomicCmpXchg:
5168  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5169  }
5170 
5171  // fall-back to target-independent instruction selection.
5172  return selectOperator(I, I->getOpcode());
5173 }
5174 
5175 namespace llvm {
5176 
5178  const TargetLibraryInfo *LibInfo) {
5179  return new AArch64FastISel(FuncInfo, LibInfo);
5180 }
5181 
5182 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVMContext & Context
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasCustomCallingConv() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:528
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:672
unsigned less than
Definition: InstrTypes.h:671
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
void reserve(size_type N)
Definition: SmallVector.h:369
Value * getLength() const
bool isAnyArgRegReserved(const MachineFunction &MF) const
unsigned getFrameRegister(const MachineFunction &MF) const override
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:745
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
Class to represent struct types.
Definition: DerivedTypes.h:232
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isUnsigned() const
Definition: InstrTypes.h:822
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:62
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
iterator_range< succ_op_iterator > successors()
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:477
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1147
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:523
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:630
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:655
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:663
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:33
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:673
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:302
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
bool isReleaseOrStronger(AtomicOrdering ao)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:675
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.