LLVM  16.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  unsigned Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  unsigned getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  unsigned RHSReg, bool SetFlags = false,
207  bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217  uint64_t ShiftImm, bool SetFlags = false,
218  bool WantResult = true);
219 
220  // Emit functions.
221  bool emitCompareAndBranch(const BranchInst *BI);
222  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227  MachineMemOperand *MMO = nullptr);
228  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231  MachineMemOperand *MMO = nullptr);
232  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235  bool SetFlags = false, bool WantResult = true,
236  bool IsZExt = false);
237  unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239  bool SetFlags = false, bool WantResult = true,
240  bool IsZExt = false);
241  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242  bool WantResult = true);
243  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245  bool WantResult = true);
246  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247  const Value *RHS);
248  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249  uint64_t Imm);
250  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  unsigned RHSReg, uint64_t ShiftImm);
252  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253  unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258  bool IsZExt = true);
259  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261  bool IsZExt = true);
262  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264  bool IsZExt = false);
265 
266  unsigned materializeInt(const ConstantInt *CI, MVT VT);
267  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268  unsigned materializeGV(const GlobalValue *GV);
269 
270  // Call handling routines.
271 private:
272  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274  unsigned &NumBytes);
275  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276 
277 public:
278  // Backend specific FastISel code.
279  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280  unsigned fastMaterializeConstant(const Constant *C) override;
281  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282 
283  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284  const TargetLibraryInfo *LibInfo)
285  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286  Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
287  Context = &FuncInfo.Fn->getContext();
288  }
289 
290  bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300  "Unexpected integer extend instruction.");
301  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302  "Unexpected value type.");
303  bool IsZExt = isa<ZExtInst>(I);
304 
305  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306  if (LI->hasOneUse())
307  return true;
308 
309  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311  return true;
312 
313  return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319  switch (VT.SimpleTy) {
320  default:
321  return 0; // invalid
322  case MVT::i1: // fall-through
323  case MVT::i8:
324  return 1;
325  case MVT::i16:
326  return 2;
327  case MVT::i32: // fall-through
328  case MVT::f32:
329  return 4;
330  case MVT::i64: // fall-through
331  case MVT::f64:
332  return 8;
333  }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337  if (CC == CallingConv::WebKit_JS)
338  return CC_AArch64_WebKit_JS;
339  if (CC == CallingConv::GHC)
340  return CC_AArch64_GHC;
343  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348  "Alloca should always return a pointer.");
349 
350  // Don't handle dynamic allocas.
351  if (!FuncInfo.StaticAllocaMap.count(AI))
352  return 0;
353 
355  FuncInfo.StaticAllocaMap.find(AI);
356 
357  if (SI != FuncInfo.StaticAllocaMap.end()) {
358  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
360  ResultReg)
361  .addFrameIndex(SI->second)
362  .addImm(0)
363  .addImm(0);
364  return ResultReg;
365  }
366 
367  return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371  if (VT > MVT::i64)
372  return 0;
373 
374  if (!CI->isZero())
375  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377  // Create a copy from the zero register to materialize a "0" value.
378  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379  : &AArch64::GPR32RegClass;
380  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381  Register ResultReg = createResultReg(RC);
382  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
383  ResultReg).addReg(ZeroReg, getKillRegState(true));
384  return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388  // Positive zero (+0.0) has to be materialized with a fmov from the zero
389  // register, because the immediate version of fmov cannot encode zero.
390  if (CFP->isNullValue())
391  return fastMaterializeFloatZero(CFP);
392 
393  if (VT != MVT::f32 && VT != MVT::f64)
394  return 0;
395 
396  const APFloat Val = CFP->getValueAPF();
397  bool Is64Bit = (VT == MVT::f64);
398  // This checks to see if we can use FMOV instructions to materialize
399  // a constant, otherwise we have to materialize via the constant pool.
400  int Imm =
401  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402  if (Imm != -1) {
403  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405  }
406 
407  // For the large code model materialize the FP constant in code.
408  if (TM.getCodeModel() == CodeModel::Large) {
409  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410  const TargetRegisterClass *RC = Is64Bit ?
411  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413  Register TmpReg = createResultReg(RC);
414  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
416 
417  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
419  TII.get(TargetOpcode::COPY), ResultReg)
420  .addReg(TmpReg, getKillRegState(true));
421 
422  return ResultReg;
423  }
424 
425  // Materialize via constant pool. MachineConstantPool wants an explicit
426  // alignment.
427  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
432  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
437  .addReg(ADRPReg)
439  return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443  // We can't handle thread-local variables quickly yet.
444  if (GV->isThreadLocal())
445  return 0;
446 
447  // MachO still uses GOT for large code-model accesses, but ELF requires
448  // movz/movk sequences, which FastISel doesn't handle yet.
449  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450  return 0;
451 
452  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455  if (!DestEVT.isSimple())
456  return 0;
457 
458  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459  unsigned ResultReg;
460 
461  if (OpFlags & AArch64II::MO_GOT) {
462  // ADRP + LDRX
463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
464  ADRPReg)
465  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467  unsigned LdrOpc;
468  if (Subtarget->isTargetILP32()) {
469  ResultReg = createResultReg(&AArch64::GPR32RegClass);
470  LdrOpc = AArch64::LDRWui;
471  } else {
472  ResultReg = createResultReg(&AArch64::GPR64RegClass);
473  LdrOpc = AArch64::LDRXui;
474  }
475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
476  ResultReg)
477  .addReg(ADRPReg)
479  AArch64II::MO_NC | OpFlags);
480  if (!Subtarget->isTargetILP32())
481  return ResultReg;
482 
483  // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484  // so we must extend the result on ILP32.
485  Register Result64 = createResultReg(&AArch64::GPR64RegClass);
486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
487  TII.get(TargetOpcode::SUBREG_TO_REG))
488  .addDef(Result64)
489  .addImm(0)
490  .addReg(ResultReg, RegState::Kill)
491  .addImm(AArch64::sub_32);
492  return Result64;
493  } else {
494  // ADRP + ADDX
495  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
496  ADRPReg)
497  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
501  ResultReg)
502  .addReg(ADRPReg)
503  .addGlobalAddress(GV, 0,
505  .addImm(0);
506  }
507  return ResultReg;
508 }
509 
510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
511  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
512 
513  // Only handle simple types.
514  if (!CEVT.isSimple())
515  return 0;
516  MVT VT = CEVT.getSimpleVT();
517  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
518  // 'null' pointers need to have a somewhat special treatment.
519  if (isa<ConstantPointerNull>(C)) {
520  assert(VT == MVT::i64 && "Expected 64-bit pointers");
521  return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
522  }
523 
524  if (const auto *CI = dyn_cast<ConstantInt>(C))
525  return materializeInt(CI, VT);
526  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
527  return materializeFP(CFP, VT);
528  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
529  return materializeGV(GV);
530 
531  return 0;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
535  assert(CFP->isNullValue() &&
536  "Floating-point constant is not a positive zero.");
537  MVT VT;
538  if (!isTypeLegal(CFP->getType(), VT))
539  return 0;
540 
541  if (VT != MVT::f32 && VT != MVT::f64)
542  return 0;
543 
544  bool Is64Bit = (VT == MVT::f64);
545  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
546  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
547  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
548 }
549 
550 /// Check if the multiply is by a power-of-2 constant.
551 static bool isMulPowOf2(const Value *I) {
552  if (const auto *MI = dyn_cast<MulOperator>(I)) {
553  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
554  if (C->getValue().isPowerOf2())
555  return true;
556  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
557  if (C->getValue().isPowerOf2())
558  return true;
559  }
560  return false;
561 }
562 
563 // Computes the address to get to an object.
564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
565 {
566  const User *U = nullptr;
567  unsigned Opcode = Instruction::UserOp1;
568  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
569  // Don't walk into other basic blocks unless the object is an alloca from
570  // another block, otherwise it may not have a virtual register assigned.
571  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
572  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
573  Opcode = I->getOpcode();
574  U = I;
575  }
576  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
577  Opcode = C->getOpcode();
578  U = C;
579  }
580 
581  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
582  if (Ty->getAddressSpace() > 255)
583  // Fast instruction selection doesn't support the special
584  // address spaces.
585  return false;
586 
587  switch (Opcode) {
588  default:
589  break;
590  case Instruction::BitCast:
591  // Look through bitcasts.
592  return computeAddress(U->getOperand(0), Addr, Ty);
593 
594  case Instruction::IntToPtr:
595  // Look past no-op inttoptrs.
596  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
597  TLI.getPointerTy(DL))
598  return computeAddress(U->getOperand(0), Addr, Ty);
599  break;
600 
601  case Instruction::PtrToInt:
602  // Look past no-op ptrtoints.
603  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
604  return computeAddress(U->getOperand(0), Addr, Ty);
605  break;
606 
607  case Instruction::GetElementPtr: {
608  Address SavedAddr = Addr;
609  uint64_t TmpOffset = Addr.getOffset();
610 
611  // Iterate through the GEP folding the constants into offsets where
612  // we can.
613  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
614  GTI != E; ++GTI) {
615  const Value *Op = GTI.getOperand();
616  if (StructType *STy = GTI.getStructTypeOrNull()) {
617  const StructLayout *SL = DL.getStructLayout(STy);
618  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
619  TmpOffset += SL->getElementOffset(Idx);
620  } else {
621  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
622  while (true) {
623  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
624  // Constant-offset addressing.
625  TmpOffset += CI->getSExtValue() * S;
626  break;
627  }
628  if (canFoldAddIntoGEP(U, Op)) {
629  // A compatible add with a constant operand. Fold the constant.
630  ConstantInt *CI =
631  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
632  TmpOffset += CI->getSExtValue() * S;
633  // Iterate on the other operand.
634  Op = cast<AddOperator>(Op)->getOperand(0);
635  continue;
636  }
637  // Unsupported
638  goto unsupported_gep;
639  }
640  }
641  }
642 
643  // Try to grab the base operand now.
644  Addr.setOffset(TmpOffset);
645  if (computeAddress(U->getOperand(0), Addr, Ty))
646  return true;
647 
648  // We failed, restore everything and try the other options.
649  Addr = SavedAddr;
650 
651  unsupported_gep:
652  break;
653  }
654  case Instruction::Alloca: {
655  const AllocaInst *AI = cast<AllocaInst>(Obj);
657  FuncInfo.StaticAllocaMap.find(AI);
658  if (SI != FuncInfo.StaticAllocaMap.end()) {
659  Addr.setKind(Address::FrameIndexBase);
660  Addr.setFI(SI->second);
661  return true;
662  }
663  break;
664  }
665  case Instruction::Add: {
666  // Adds of constants are common and easy enough.
667  const Value *LHS = U->getOperand(0);
668  const Value *RHS = U->getOperand(1);
669 
670  if (isa<ConstantInt>(LHS))
671  std::swap(LHS, RHS);
672 
673  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
674  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
675  return computeAddress(LHS, Addr, Ty);
676  }
677 
678  Address Backup = Addr;
679  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
680  return true;
681  Addr = Backup;
682 
683  break;
684  }
685  case Instruction::Sub: {
686  // Subs of constants are common and easy enough.
687  const Value *LHS = U->getOperand(0);
688  const Value *RHS = U->getOperand(1);
689 
690  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
691  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
692  return computeAddress(LHS, Addr, Ty);
693  }
694  break;
695  }
696  case Instruction::Shl: {
697  if (Addr.getOffsetReg())
698  break;
699 
700  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
701  if (!CI)
702  break;
703 
704  unsigned Val = CI->getZExtValue();
705  if (Val < 1 || Val > 3)
706  break;
707 
708  uint64_t NumBytes = 0;
709  if (Ty && Ty->isSized()) {
710  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
711  NumBytes = NumBits / 8;
712  if (!isPowerOf2_64(NumBits))
713  NumBytes = 0;
714  }
715 
716  if (NumBytes != (1ULL << Val))
717  break;
718 
719  Addr.setShift(Val);
720  Addr.setExtendType(AArch64_AM::LSL);
721 
722  const Value *Src = U->getOperand(0);
723  if (const auto *I = dyn_cast<Instruction>(Src)) {
724  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725  // Fold the zext or sext when it won't become a noop.
726  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
727  if (!isIntExtFree(ZE) &&
728  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
729  Addr.setExtendType(AArch64_AM::UXTW);
730  Src = ZE->getOperand(0);
731  }
732  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
733  if (!isIntExtFree(SE) &&
734  SE->getOperand(0)->getType()->isIntegerTy(32)) {
735  Addr.setExtendType(AArch64_AM::SXTW);
736  Src = SE->getOperand(0);
737  }
738  }
739  }
740  }
741 
742  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
743  if (AI->getOpcode() == Instruction::And) {
744  const Value *LHS = AI->getOperand(0);
745  const Value *RHS = AI->getOperand(1);
746 
747  if (const auto *C = dyn_cast<ConstantInt>(LHS))
748  if (C->getValue() == 0xffffffff)
749  std::swap(LHS, RHS);
750 
751  if (const auto *C = dyn_cast<ConstantInt>(RHS))
752  if (C->getValue() == 0xffffffff) {
753  Addr.setExtendType(AArch64_AM::UXTW);
754  Register Reg = getRegForValue(LHS);
755  if (!Reg)
756  return false;
757  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
758  Addr.setOffsetReg(Reg);
759  return true;
760  }
761  }
762 
763  Register Reg = getRegForValue(Src);
764  if (!Reg)
765  return false;
766  Addr.setOffsetReg(Reg);
767  return true;
768  }
769  case Instruction::Mul: {
770  if (Addr.getOffsetReg())
771  break;
772 
773  if (!isMulPowOf2(U))
774  break;
775 
776  const Value *LHS = U->getOperand(0);
777  const Value *RHS = U->getOperand(1);
778 
779  // Canonicalize power-of-2 value to the RHS.
780  if (const auto *C = dyn_cast<ConstantInt>(LHS))
781  if (C->getValue().isPowerOf2())
782  std::swap(LHS, RHS);
783 
784  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
785  const auto *C = cast<ConstantInt>(RHS);
786  unsigned Val = C->getValue().logBase2();
787  if (Val < 1 || Val > 3)
788  break;
789 
790  uint64_t NumBytes = 0;
791  if (Ty && Ty->isSized()) {
792  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
793  NumBytes = NumBits / 8;
794  if (!isPowerOf2_64(NumBits))
795  NumBytes = 0;
796  }
797 
798  if (NumBytes != (1ULL << Val))
799  break;
800 
801  Addr.setShift(Val);
802  Addr.setExtendType(AArch64_AM::LSL);
803 
804  const Value *Src = LHS;
805  if (const auto *I = dyn_cast<Instruction>(Src)) {
806  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807  // Fold the zext or sext when it won't become a noop.
808  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
809  if (!isIntExtFree(ZE) &&
810  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
811  Addr.setExtendType(AArch64_AM::UXTW);
812  Src = ZE->getOperand(0);
813  }
814  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
815  if (!isIntExtFree(SE) &&
816  SE->getOperand(0)->getType()->isIntegerTy(32)) {
817  Addr.setExtendType(AArch64_AM::SXTW);
818  Src = SE->getOperand(0);
819  }
820  }
821  }
822  }
823 
824  Register Reg = getRegForValue(Src);
825  if (!Reg)
826  return false;
827  Addr.setOffsetReg(Reg);
828  return true;
829  }
830  case Instruction::And: {
831  if (Addr.getOffsetReg())
832  break;
833 
834  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
835  break;
836 
837  const Value *LHS = U->getOperand(0);
838  const Value *RHS = U->getOperand(1);
839 
840  if (const auto *C = dyn_cast<ConstantInt>(LHS))
841  if (C->getValue() == 0xffffffff)
842  std::swap(LHS, RHS);
843 
844  if (const auto *C = dyn_cast<ConstantInt>(RHS))
845  if (C->getValue() == 0xffffffff) {
846  Addr.setShift(0);
847  Addr.setExtendType(AArch64_AM::LSL);
848  Addr.setExtendType(AArch64_AM::UXTW);
849 
850  Register Reg = getRegForValue(LHS);
851  if (!Reg)
852  return false;
853  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
854  Addr.setOffsetReg(Reg);
855  return true;
856  }
857  break;
858  }
859  case Instruction::SExt:
860  case Instruction::ZExt: {
861  if (!Addr.getReg() || Addr.getOffsetReg())
862  break;
863 
864  const Value *Src = nullptr;
865  // Fold the zext or sext when it won't become a noop.
866  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
867  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
868  Addr.setExtendType(AArch64_AM::UXTW);
869  Src = ZE->getOperand(0);
870  }
871  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
872  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
873  Addr.setExtendType(AArch64_AM::SXTW);
874  Src = SE->getOperand(0);
875  }
876  }
877 
878  if (!Src)
879  break;
880 
881  Addr.setShift(0);
882  Register Reg = getRegForValue(Src);
883  if (!Reg)
884  return false;
885  Addr.setOffsetReg(Reg);
886  return true;
887  }
888  } // end switch
889 
890  if (Addr.isRegBase() && !Addr.getReg()) {
891  Register Reg = getRegForValue(Obj);
892  if (!Reg)
893  return false;
894  Addr.setReg(Reg);
895  return true;
896  }
897 
898  if (!Addr.getOffsetReg()) {
899  Register Reg = getRegForValue(Obj);
900  if (!Reg)
901  return false;
902  Addr.setOffsetReg(Reg);
903  return true;
904  }
905 
906  return false;
907 }
908 
909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
910  const User *U = nullptr;
911  unsigned Opcode = Instruction::UserOp1;
912  bool InMBB = true;
913 
914  if (const auto *I = dyn_cast<Instruction>(V)) {
915  Opcode = I->getOpcode();
916  U = I;
917  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
918  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
919  Opcode = C->getOpcode();
920  U = C;
921  }
922 
923  switch (Opcode) {
924  default: break;
925  case Instruction::BitCast:
926  // Look past bitcasts if its operand is in the same BB.
927  if (InMBB)
928  return computeCallAddress(U->getOperand(0), Addr);
929  break;
930  case Instruction::IntToPtr:
931  // Look past no-op inttoptrs if its operand is in the same BB.
932  if (InMBB &&
933  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
934  TLI.getPointerTy(DL))
935  return computeCallAddress(U->getOperand(0), Addr);
936  break;
937  case Instruction::PtrToInt:
938  // Look past no-op ptrtoints if its operand is in the same BB.
939  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
940  return computeCallAddress(U->getOperand(0), Addr);
941  break;
942  }
943 
944  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
945  Addr.setGlobalValue(GV);
946  return true;
947  }
948 
949  // If all else fails, try to materialize the value in a register.
950  if (!Addr.getGlobalValue()) {
951  Addr.setReg(getRegForValue(V));
952  return Addr.getReg() != 0;
953  }
954 
955  return false;
956 }
957 
958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
959  EVT evt = TLI.getValueType(DL, Ty, true);
960 
961  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
962  return false;
963 
964  // Only handle simple types.
965  if (evt == MVT::Other || !evt.isSimple())
966  return false;
967  VT = evt.getSimpleVT();
968 
969  // This is a legal type, but it's not something we handle in fast-isel.
970  if (VT == MVT::f128)
971  return false;
972 
973  // Handle all other legal types, i.e. a register that will directly hold this
974  // value.
975  return TLI.isTypeLegal(VT);
976 }
977 
978 /// Determine if the value type is supported by FastISel.
979 ///
980 /// FastISel for AArch64 can handle more value types than are legal. This adds
981 /// simple value type such as i1, i8, and i16.
982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
983  if (Ty->isVectorTy() && !IsVectorAllowed)
984  return false;
985 
986  if (isTypeLegal(Ty, VT))
987  return true;
988 
989  // If this is a type than can be sign or zero-extended to a basic operation
990  // go ahead and accept it now.
991  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
992  return true;
993 
994  return false;
995 }
996 
997 bool AArch64FastISel::isValueAvailable(const Value *V) const {
998  if (!isa<Instruction>(V))
999  return true;
1000 
1001  const auto *I = cast<Instruction>(V);
1002  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1003 }
1004 
1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1006  if (Subtarget->isTargetILP32())
1007  return false;
1008 
1009  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1010  if (!ScaleFactor)
1011  return false;
1012 
1013  bool ImmediateOffsetNeedsLowering = false;
1014  bool RegisterOffsetNeedsLowering = false;
1015  int64_t Offset = Addr.getOffset();
1016  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1017  ImmediateOffsetNeedsLowering = true;
1018  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1019  !isUInt<12>(Offset / ScaleFactor))
1020  ImmediateOffsetNeedsLowering = true;
1021 
1022  // Cannot encode an offset register and an immediate offset in the same
1023  // instruction. Fold the immediate offset into the load/store instruction and
1024  // emit an additional add to take care of the offset register.
1025  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1026  RegisterOffsetNeedsLowering = true;
1027 
1028  // Cannot encode zero register as base.
1029  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1030  RegisterOffsetNeedsLowering = true;
1031 
1032  // If this is a stack pointer and the offset needs to be simplified then put
1033  // the alloca address into a register, set the base type back to register and
1034  // continue. This should almost never happen.
1035  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1036  {
1037  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1039  ResultReg)
1040  .addFrameIndex(Addr.getFI())
1041  .addImm(0)
1042  .addImm(0);
1043  Addr.setKind(Address::RegBase);
1044  Addr.setReg(ResultReg);
1045  }
1046 
1047  if (RegisterOffsetNeedsLowering) {
1048  unsigned ResultReg = 0;
1049  if (Addr.getReg()) {
1050  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1051  Addr.getExtendType() == AArch64_AM::UXTW )
1052  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1053  Addr.getOffsetReg(), Addr.getExtendType(),
1054  Addr.getShift());
1055  else
1056  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1057  Addr.getOffsetReg(), AArch64_AM::LSL,
1058  Addr.getShift());
1059  } else {
1060  if (Addr.getExtendType() == AArch64_AM::UXTW)
1061  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1062  Addr.getShift(), /*IsZExt=*/true);
1063  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1064  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1065  Addr.getShift(), /*IsZExt=*/false);
1066  else
1067  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1068  Addr.getShift());
1069  }
1070  if (!ResultReg)
1071  return false;
1072 
1073  Addr.setReg(ResultReg);
1074  Addr.setOffsetReg(0);
1075  Addr.setShift(0);
1076  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1077  }
1078 
1079  // Since the offset is too large for the load/store instruction get the
1080  // reg+offset into a register.
1081  if (ImmediateOffsetNeedsLowering) {
1082  unsigned ResultReg;
1083  if (Addr.getReg())
1084  // Try to fold the immediate into the add instruction.
1085  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1086  else
1087  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1088 
1089  if (!ResultReg)
1090  return false;
1091  Addr.setReg(ResultReg);
1092  Addr.setOffset(0);
1093  }
1094  return true;
1095 }
1096 
1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1098  const MachineInstrBuilder &MIB,
1100  unsigned ScaleFactor,
1101  MachineMemOperand *MMO) {
1102  int64_t Offset = Addr.getOffset() / ScaleFactor;
1103  // Frame base works a bit differently. Handle it separately.
1104  if (Addr.isFIBase()) {
1105  int FI = Addr.getFI();
1106  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1107  // and alignment should be based on the VT.
1108  MMO = FuncInfo.MF->getMachineMemOperand(
1109  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1110  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1111  // Now add the rest of the operands.
1112  MIB.addFrameIndex(FI).addImm(Offset);
1113  } else {
1114  assert(Addr.isRegBase() && "Unexpected address kind.");
1115  const MCInstrDesc &II = MIB->getDesc();
1116  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1117  Addr.setReg(
1118  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1119  Addr.setOffsetReg(
1120  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1121  if (Addr.getOffsetReg()) {
1122  assert(Addr.getOffset() == 0 && "Unexpected offset");
1123  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1124  Addr.getExtendType() == AArch64_AM::SXTX;
1125  MIB.addReg(Addr.getReg());
1126  MIB.addReg(Addr.getOffsetReg());
1127  MIB.addImm(IsSigned);
1128  MIB.addImm(Addr.getShift() != 0);
1129  } else
1130  MIB.addReg(Addr.getReg()).addImm(Offset);
1131  }
1132 
1133  if (MMO)
1134  MIB.addMemOperand(MMO);
1135 }
1136 
1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1138  const Value *RHS, bool SetFlags,
1139  bool WantResult, bool IsZExt) {
1141  bool NeedExtend = false;
1142  switch (RetVT.SimpleTy) {
1143  default:
1144  return 0;
1145  case MVT::i1:
1146  NeedExtend = true;
1147  break;
1148  case MVT::i8:
1149  NeedExtend = true;
1150  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1151  break;
1152  case MVT::i16:
1153  NeedExtend = true;
1154  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1155  break;
1156  case MVT::i32: // fall-through
1157  case MVT::i64:
1158  break;
1159  }
1160  MVT SrcVT = RetVT;
1161  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1162 
1163  // Canonicalize immediates to the RHS first.
1164  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1165  std::swap(LHS, RHS);
1166 
1167  // Canonicalize mul by power of 2 to the RHS.
1168  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1169  if (isMulPowOf2(LHS))
1170  std::swap(LHS, RHS);
1171 
1172  // Canonicalize shift immediate to the RHS.
1173  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1174  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1175  if (isa<ConstantInt>(SI->getOperand(1)))
1176  if (SI->getOpcode() == Instruction::Shl ||
1177  SI->getOpcode() == Instruction::LShr ||
1178  SI->getOpcode() == Instruction::AShr )
1179  std::swap(LHS, RHS);
1180 
1181  Register LHSReg = getRegForValue(LHS);
1182  if (!LHSReg)
1183  return 0;
1184 
1185  if (NeedExtend)
1186  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1187 
1188  unsigned ResultReg = 0;
1189  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1190  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1191  if (C->isNegative())
1192  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1193  WantResult);
1194  else
1195  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1196  WantResult);
1197  } else if (const auto *C = dyn_cast<Constant>(RHS))
1198  if (C->isNullValue())
1199  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1200 
1201  if (ResultReg)
1202  return ResultReg;
1203 
1204  // Only extend the RHS within the instruction if there is a valid extend type.
1205  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1206  isValueAvailable(RHS)) {
1207  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1208  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1209  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1210  Register RHSReg = getRegForValue(SI->getOperand(0));
1211  if (!RHSReg)
1212  return 0;
1213  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1214  C->getZExtValue(), SetFlags, WantResult);
1215  }
1216  Register RHSReg = getRegForValue(RHS);
1217  if (!RHSReg)
1218  return 0;
1219  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1220  SetFlags, WantResult);
1221  }
1222 
1223  // Check if the mul can be folded into the instruction.
1224  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1225  if (isMulPowOf2(RHS)) {
1226  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1227  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1228 
1229  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1230  if (C->getValue().isPowerOf2())
1231  std::swap(MulLHS, MulRHS);
1232 
1233  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1234  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1235  Register RHSReg = getRegForValue(MulLHS);
1236  if (!RHSReg)
1237  return 0;
1238  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1239  ShiftVal, SetFlags, WantResult);
1240  if (ResultReg)
1241  return ResultReg;
1242  }
1243  }
1244 
1245  // Check if the shift can be folded into the instruction.
1246  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1247  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1248  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1250  switch (SI->getOpcode()) {
1251  default: break;
1252  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1253  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1254  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1255  }
1256  uint64_t ShiftVal = C->getZExtValue();
1257  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1258  Register RHSReg = getRegForValue(SI->getOperand(0));
1259  if (!RHSReg)
1260  return 0;
1261  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1262  ShiftVal, SetFlags, WantResult);
1263  if (ResultReg)
1264  return ResultReg;
1265  }
1266  }
1267  }
1268  }
1269 
1270  Register RHSReg = getRegForValue(RHS);
1271  if (!RHSReg)
1272  return 0;
1273 
1274  if (NeedExtend)
1275  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277  return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1278 }
1279 
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281  unsigned RHSReg, bool SetFlags,
1282  bool WantResult) {
1283  assert(LHSReg && RHSReg && "Invalid register number.");
1284 
1285  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287  return 0;
1288 
1289  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290  return 0;
1291 
1292  static const unsigned OpcTable[2][2][2] = {
1293  { { AArch64::SUBWrr, AArch64::SUBXrr },
1294  { AArch64::ADDWrr, AArch64::ADDXrr } },
1295  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1297  };
1298  bool Is64Bit = RetVT == MVT::i64;
1299  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300  const TargetRegisterClass *RC =
1301  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302  unsigned ResultReg;
1303  if (WantResult)
1304  ResultReg = createResultReg(RC);
1305  else
1306  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307 
1308  const MCInstrDesc &II = TII.get(Opc);
1309  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1312  .addReg(LHSReg)
1313  .addReg(RHSReg);
1314  return ResultReg;
1315 }
1316 
1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318  uint64_t Imm, bool SetFlags,
1319  bool WantResult) {
1320  assert(LHSReg && "Invalid register number.");
1321 
1322  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323  return 0;
1324 
1325  unsigned ShiftImm;
1326  if (isUInt<12>(Imm))
1327  ShiftImm = 0;
1328  else if ((Imm & 0xfff000) == Imm) {
1329  ShiftImm = 12;
1330  Imm >>= 12;
1331  } else
1332  return 0;
1333 
1334  static const unsigned OpcTable[2][2][2] = {
1335  { { AArch64::SUBWri, AArch64::SUBXri },
1336  { AArch64::ADDWri, AArch64::ADDXri } },
1337  { { AArch64::SUBSWri, AArch64::SUBSXri },
1338  { AArch64::ADDSWri, AArch64::ADDSXri } }
1339  };
1340  bool Is64Bit = RetVT == MVT::i64;
1341  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342  const TargetRegisterClass *RC;
1343  if (SetFlags)
1344  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345  else
1346  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347  unsigned ResultReg;
1348  if (WantResult)
1349  ResultReg = createResultReg(RC);
1350  else
1351  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352 
1353  const MCInstrDesc &II = TII.get(Opc);
1354  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1356  .addReg(LHSReg)
1357  .addImm(Imm)
1358  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359  return ResultReg;
1360 }
1361 
1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363  unsigned RHSReg,
1364  AArch64_AM::ShiftExtendType ShiftType,
1365  uint64_t ShiftImm, bool SetFlags,
1366  bool WantResult) {
1367  assert(LHSReg && RHSReg && "Invalid register number.");
1368  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1369  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 
1371  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372  return 0;
1373 
1374  // Don't deal with undefined shifts.
1375  if (ShiftImm >= RetVT.getSizeInBits())
1376  return 0;
1377 
1378  static const unsigned OpcTable[2][2][2] = {
1379  { { AArch64::SUBWrs, AArch64::SUBXrs },
1380  { AArch64::ADDWrs, AArch64::ADDXrs } },
1381  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1382  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1383  };
1384  bool Is64Bit = RetVT == MVT::i64;
1385  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1386  const TargetRegisterClass *RC =
1387  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388  unsigned ResultReg;
1389  if (WantResult)
1390  ResultReg = createResultReg(RC);
1391  else
1392  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 
1394  const MCInstrDesc &II = TII.get(Opc);
1395  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1396  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1397  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1398  .addReg(LHSReg)
1399  .addReg(RHSReg)
1400  .addImm(getShifterImm(ShiftType, ShiftImm));
1401  return ResultReg;
1402 }
1403 
1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1405  unsigned RHSReg,
1407  uint64_t ShiftImm, bool SetFlags,
1408  bool WantResult) {
1409  assert(LHSReg && RHSReg && "Invalid register number.");
1410  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414  return 0;
1415 
1416  if (ShiftImm >= 4)
1417  return 0;
1418 
1419  static const unsigned OpcTable[2][2][2] = {
1420  { { AArch64::SUBWrx, AArch64::SUBXrx },
1421  { AArch64::ADDWrx, AArch64::ADDXrx } },
1422  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1424  };
1425  bool Is64Bit = RetVT == MVT::i64;
1426  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427  const TargetRegisterClass *RC = nullptr;
1428  if (SetFlags)
1429  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430  else
1431  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432  unsigned ResultReg;
1433  if (WantResult)
1434  ResultReg = createResultReg(RC);
1435  else
1436  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438  const MCInstrDesc &II = TII.get(Opc);
1439  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1442  .addReg(LHSReg)
1443  .addReg(RHSReg)
1444  .addImm(getArithExtendImm(ExtType, ShiftImm));
1445  return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449  Type *Ty = LHS->getType();
1450  EVT EVT = TLI.getValueType(DL, Ty, true);
1451  if (!EVT.isSimple())
1452  return false;
1453  MVT VT = EVT.getSimpleVT();
1454 
1455  switch (VT.SimpleTy) {
1456  default:
1457  return false;
1458  case MVT::i1:
1459  case MVT::i8:
1460  case MVT::i16:
1461  case MVT::i32:
1462  case MVT::i64:
1463  return emitICmp(VT, LHS, RHS, IsZExt);
1464  case MVT::f32:
1465  case MVT::f64:
1466  return emitFCmp(VT, LHS, RHS);
1467  }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471  bool IsZExt) {
1472  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1477  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1478  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1479 }
1480 
1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1482  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1483  return false;
1484 
1485  // Check to see if the 2nd operand is a constant that we can encode directly
1486  // in the compare.
1487  bool UseImm = false;
1488  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1489  if (CFP->isZero() && !CFP->isNegative())
1490  UseImm = true;
1491 
1492  Register LHSReg = getRegForValue(LHS);
1493  if (!LHSReg)
1494  return false;
1495 
1496  if (UseImm) {
1497  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1498  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1499  .addReg(LHSReg);
1500  return true;
1501  }
1502 
1503  Register RHSReg = getRegForValue(RHS);
1504  if (!RHSReg)
1505  return false;
1506 
1507  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1508  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1509  .addReg(LHSReg)
1510  .addReg(RHSReg);
1511  return true;
1512 }
1513 
1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1515  bool SetFlags, bool WantResult, bool IsZExt) {
1516  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1517  IsZExt);
1518 }
1519 
1520 /// This method is a wrapper to simplify add emission.
1521 ///
1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1523 /// that fails, then try to materialize the immediate into a register and use
1524 /// emitAddSub_rr instead.
1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1526  unsigned ResultReg;
1527  if (Imm < 0)
1528  ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1529  else
1530  ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1531 
1532  if (ResultReg)
1533  return ResultReg;
1534 
1535  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1536  if (!CReg)
1537  return 0;
1538 
1539  ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1540  return ResultReg;
1541 }
1542 
1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1544  bool SetFlags, bool WantResult, bool IsZExt) {
1545  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1546  IsZExt);
1547 }
1548 
1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1550  unsigned RHSReg, bool WantResult) {
1551  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1552  /*SetFlags=*/true, WantResult);
1553 }
1554 
1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1556  unsigned RHSReg,
1557  AArch64_AM::ShiftExtendType ShiftType,
1558  uint64_t ShiftImm, bool WantResult) {
1559  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1560  ShiftImm, /*SetFlags=*/true, WantResult);
1561 }
1562 
1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1564  const Value *LHS, const Value *RHS) {
1565  // Canonicalize immediates to the RHS first.
1566  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1567  std::swap(LHS, RHS);
1568 
1569  // Canonicalize mul by power-of-2 to the RHS.
1570  if (LHS->hasOneUse() && isValueAvailable(LHS))
1571  if (isMulPowOf2(LHS))
1572  std::swap(LHS, RHS);
1573 
1574  // Canonicalize shift immediate to the RHS.
1575  if (LHS->hasOneUse() && isValueAvailable(LHS))
1576  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1577  if (isa<ConstantInt>(SI->getOperand(1)))
1578  std::swap(LHS, RHS);
1579 
1580  Register LHSReg = getRegForValue(LHS);
1581  if (!LHSReg)
1582  return 0;
1583 
1584  unsigned ResultReg = 0;
1585  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1586  uint64_t Imm = C->getZExtValue();
1587  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1588  }
1589  if (ResultReg)
1590  return ResultReg;
1591 
1592  // Check if the mul can be folded into the instruction.
1593  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1594  if (isMulPowOf2(RHS)) {
1595  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1596  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1597 
1598  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1599  if (C->getValue().isPowerOf2())
1600  std::swap(MulLHS, MulRHS);
1601 
1602  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1603  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1604 
1605  Register RHSReg = getRegForValue(MulLHS);
1606  if (!RHSReg)
1607  return 0;
1608  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1609  if (ResultReg)
1610  return ResultReg;
1611  }
1612  }
1613 
1614  // Check if the shift can be folded into the instruction.
1615  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1616  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1617  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1618  uint64_t ShiftVal = C->getZExtValue();
1619  Register RHSReg = getRegForValue(SI->getOperand(0));
1620  if (!RHSReg)
1621  return 0;
1622  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623  if (ResultReg)
1624  return ResultReg;
1625  }
1626  }
1627 
1628  Register RHSReg = getRegForValue(RHS);
1629  if (!RHSReg)
1630  return 0;
1631 
1632  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1633  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1634  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1635  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1636  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1637  }
1638  return ResultReg;
1639 }
1640 
1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1642  unsigned LHSReg, uint64_t Imm) {
1643  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1644  "ISD nodes are not consecutive!");
1645  static const unsigned OpcTable[3][2] = {
1646  { AArch64::ANDWri, AArch64::ANDXri },
1647  { AArch64::ORRWri, AArch64::ORRXri },
1648  { AArch64::EORWri, AArch64::EORXri }
1649  };
1650  const TargetRegisterClass *RC;
1651  unsigned Opc;
1652  unsigned RegSize;
1653  switch (RetVT.SimpleTy) {
1654  default:
1655  return 0;
1656  case MVT::i1:
1657  case MVT::i8:
1658  case MVT::i16:
1659  case MVT::i32: {
1660  unsigned Idx = ISDOpc - ISD::AND;
1661  Opc = OpcTable[Idx][0];
1662  RC = &AArch64::GPR32spRegClass;
1663  RegSize = 32;
1664  break;
1665  }
1666  case MVT::i64:
1667  Opc = OpcTable[ISDOpc - ISD::AND][1];
1668  RC = &AArch64::GPR64spRegClass;
1669  RegSize = 64;
1670  break;
1671  }
1672 
1674  return 0;
1675 
1676  Register ResultReg =
1677  fastEmitInst_ri(Opc, RC, LHSReg,
1679  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1680  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1682  }
1683  return ResultReg;
1684 }
1685 
1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1687  unsigned LHSReg, unsigned RHSReg,
1688  uint64_t ShiftImm) {
1689  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690  "ISD nodes are not consecutive!");
1691  static const unsigned OpcTable[3][2] = {
1692  { AArch64::ANDWrs, AArch64::ANDXrs },
1693  { AArch64::ORRWrs, AArch64::ORRXrs },
1694  { AArch64::EORWrs, AArch64::EORXrs }
1695  };
1696 
1697  // Don't deal with undefined shifts.
1698  if (ShiftImm >= RetVT.getSizeInBits())
1699  return 0;
1700 
1701  const TargetRegisterClass *RC;
1702  unsigned Opc;
1703  switch (RetVT.SimpleTy) {
1704  default:
1705  return 0;
1706  case MVT::i1:
1707  case MVT::i8:
1708  case MVT::i16:
1709  case MVT::i32:
1710  Opc = OpcTable[ISDOpc - ISD::AND][0];
1711  RC = &AArch64::GPR32RegClass;
1712  break;
1713  case MVT::i64:
1714  Opc = OpcTable[ISDOpc - ISD::AND][1];
1715  RC = &AArch64::GPR64RegClass;
1716  break;
1717  }
1718  Register ResultReg =
1719  fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1721  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1722  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1723  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1724  }
1725  return ResultReg;
1726 }
1727 
1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1729  uint64_t Imm) {
1730  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1731 }
1732 
1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1734  bool WantZExt, MachineMemOperand *MMO) {
1735  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1736  return 0;
1737 
1738  // Simplify this down to something we can handle.
1739  if (!simplifyAddress(Addr, VT))
1740  return 0;
1741 
1742  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1743  if (!ScaleFactor)
1744  llvm_unreachable("Unexpected value type.");
1745 
1746  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1747  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1748  bool UseScaled = true;
1749  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1750  UseScaled = false;
1751  ScaleFactor = 1;
1752  }
1753 
1754  static const unsigned GPOpcTable[2][8][4] = {
1755  // Sign-extend.
1756  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1757  AArch64::LDURXi },
1758  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1759  AArch64::LDURXi },
1760  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1761  AArch64::LDRXui },
1762  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1763  AArch64::LDRXui },
1764  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1765  AArch64::LDRXroX },
1766  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1767  AArch64::LDRXroX },
1768  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1769  AArch64::LDRXroW },
1770  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1771  AArch64::LDRXroW }
1772  },
1773  // Zero-extend.
1774  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1775  AArch64::LDURXi },
1776  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1777  AArch64::LDURXi },
1778  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1779  AArch64::LDRXui },
1780  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1781  AArch64::LDRXui },
1782  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1783  AArch64::LDRXroX },
1784  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1785  AArch64::LDRXroX },
1786  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1787  AArch64::LDRXroW },
1788  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1789  AArch64::LDRXroW }
1790  }
1791  };
1792 
1793  static const unsigned FPOpcTable[4][2] = {
1794  { AArch64::LDURSi, AArch64::LDURDi },
1795  { AArch64::LDRSui, AArch64::LDRDui },
1796  { AArch64::LDRSroX, AArch64::LDRDroX },
1797  { AArch64::LDRSroW, AArch64::LDRDroW }
1798  };
1799 
1800  unsigned Opc;
1801  const TargetRegisterClass *RC;
1802  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1803  Addr.getOffsetReg();
1804  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1805  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1806  Addr.getExtendType() == AArch64_AM::SXTW)
1807  Idx++;
1808 
1809  bool IsRet64Bit = RetVT == MVT::i64;
1810  switch (VT.SimpleTy) {
1811  default:
1812  llvm_unreachable("Unexpected value type.");
1813  case MVT::i1: // Intentional fall-through.
1814  case MVT::i8:
1815  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1816  RC = (IsRet64Bit && !WantZExt) ?
1817  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1818  break;
1819  case MVT::i16:
1820  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1821  RC = (IsRet64Bit && !WantZExt) ?
1822  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1823  break;
1824  case MVT::i32:
1825  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1826  RC = (IsRet64Bit && !WantZExt) ?
1827  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1828  break;
1829  case MVT::i64:
1830  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1831  RC = &AArch64::GPR64RegClass;
1832  break;
1833  case MVT::f32:
1834  Opc = FPOpcTable[Idx][0];
1835  RC = &AArch64::FPR32RegClass;
1836  break;
1837  case MVT::f64:
1838  Opc = FPOpcTable[Idx][1];
1839  RC = &AArch64::FPR64RegClass;
1840  break;
1841  }
1842 
1843  // Create the base instruction, then add the operands.
1844  Register ResultReg = createResultReg(RC);
1845  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1846  TII.get(Opc), ResultReg);
1847  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1848 
1849  // Loading an i1 requires special handling.
1850  if (VT == MVT::i1) {
1851  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1852  assert(ANDReg && "Unexpected AND instruction emission failure.");
1853  ResultReg = ANDReg;
1854  }
1855 
1856  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1857  // the 32bit reg to a 64bit reg.
1858  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1859  Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1860  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1861  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1862  .addImm(0)
1863  .addReg(ResultReg, getKillRegState(true))
1864  .addImm(AArch64::sub_32);
1865  ResultReg = Reg64;
1866  }
1867  return ResultReg;
1868 }
1869 
1870 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1871  MVT VT;
1872  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1873  return false;
1874 
1875  if (VT.isVector())
1876  return selectOperator(I, I->getOpcode());
1877 
1878  unsigned ResultReg;
1879  switch (I->getOpcode()) {
1880  default:
1881  llvm_unreachable("Unexpected instruction.");
1882  case Instruction::Add:
1883  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1884  break;
1885  case Instruction::Sub:
1886  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1887  break;
1888  }
1889  if (!ResultReg)
1890  return false;
1891 
1892  updateValueMap(I, ResultReg);
1893  return true;
1894 }
1895 
1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1897  MVT VT;
1898  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1899  return false;
1900 
1901  if (VT.isVector())
1902  return selectOperator(I, I->getOpcode());
1903 
1904  unsigned ResultReg;
1905  switch (I->getOpcode()) {
1906  default:
1907  llvm_unreachable("Unexpected instruction.");
1908  case Instruction::And:
1909  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1910  break;
1911  case Instruction::Or:
1912  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1913  break;
1914  case Instruction::Xor:
1915  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1916  break;
1917  }
1918  if (!ResultReg)
1919  return false;
1920 
1921  updateValueMap(I, ResultReg);
1922  return true;
1923 }
1924 
1925 bool AArch64FastISel::selectLoad(const Instruction *I) {
1926  MVT VT;
1927  // Verify we have a legal type before going any further. Currently, we handle
1928  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1929  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1930  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1931  cast<LoadInst>(I)->isAtomic())
1932  return false;
1933 
1934  const Value *SV = I->getOperand(0);
1935  if (TLI.supportSwiftError()) {
1936  // Swifterror values can come from either a function parameter with
1937  // swifterror attribute or an alloca with swifterror attribute.
1938  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1939  if (Arg->hasSwiftErrorAttr())
1940  return false;
1941  }
1942 
1943  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1944  if (Alloca->isSwiftError())
1945  return false;
1946  }
1947  }
1948 
1949  // See if we can handle this address.
1950  Address Addr;
1951  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1952  return false;
1953 
1954  // Fold the following sign-/zero-extend into the load instruction.
1955  bool WantZExt = true;
1956  MVT RetVT = VT;
1957  const Value *IntExtVal = nullptr;
1958  if (I->hasOneUse()) {
1959  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1960  if (isTypeSupported(ZE->getType(), RetVT))
1961  IntExtVal = ZE;
1962  else
1963  RetVT = VT;
1964  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1965  if (isTypeSupported(SE->getType(), RetVT))
1966  IntExtVal = SE;
1967  else
1968  RetVT = VT;
1969  WantZExt = false;
1970  }
1971  }
1972 
1973  unsigned ResultReg =
1974  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1975  if (!ResultReg)
1976  return false;
1977 
1978  // There are a few different cases we have to handle, because the load or the
1979  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1980  // SelectionDAG. There is also an ordering issue when both instructions are in
1981  // different basic blocks.
1982  // 1.) The load instruction is selected by FastISel, but the integer extend
1983  // not. This usually happens when the integer extend is in a different
1984  // basic block and SelectionDAG took over for that basic block.
1985  // 2.) The load instruction is selected before the integer extend. This only
1986  // happens when the integer extend is in a different basic block.
1987  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1988  // by FastISel. This happens if there are instructions between the load
1989  // and the integer extend that couldn't be selected by FastISel.
1990  if (IntExtVal) {
1991  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1992  // could select it. Emit a copy to subreg if necessary. FastISel will remove
1993  // it when it selects the integer extend.
1994  Register Reg = lookUpRegForValue(IntExtVal);
1995  auto *MI = MRI.getUniqueVRegDef(Reg);
1996  if (!MI) {
1997  if (RetVT == MVT::i64 && VT <= MVT::i32) {
1998  if (WantZExt) {
1999  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2000  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2001  ResultReg = std::prev(I)->getOperand(0).getReg();
2002  removeDeadCode(I, std::next(I));
2003  } else
2004  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2005  AArch64::sub_32);
2006  }
2007  updateValueMap(I, ResultReg);
2008  return true;
2009  }
2010 
2011  // The integer extend has already been emitted - delete all the instructions
2012  // that have been emitted by the integer extend lowering code and use the
2013  // result from the load instruction directly.
2014  while (MI) {
2015  Reg = 0;
2016  for (auto &Opnd : MI->uses()) {
2017  if (Opnd.isReg()) {
2018  Reg = Opnd.getReg();
2019  break;
2020  }
2021  }
2023  removeDeadCode(I, std::next(I));
2024  MI = nullptr;
2025  if (Reg)
2027  }
2028  updateValueMap(IntExtVal, ResultReg);
2029  return true;
2030  }
2031 
2032  updateValueMap(I, ResultReg);
2033  return true;
2034 }
2035 
2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2037  unsigned AddrReg,
2038  MachineMemOperand *MMO) {
2039  unsigned Opc;
2040  switch (VT.SimpleTy) {
2041  default: return false;
2042  case MVT::i8: Opc = AArch64::STLRB; break;
2043  case MVT::i16: Opc = AArch64::STLRH; break;
2044  case MVT::i32: Opc = AArch64::STLRW; break;
2045  case MVT::i64: Opc = AArch64::STLRX; break;
2046  }
2047 
2048  const MCInstrDesc &II = TII.get(Opc);
2049  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2050  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2051  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2052  .addReg(SrcReg)
2053  .addReg(AddrReg)
2054  .addMemOperand(MMO);
2055  return true;
2056 }
2057 
2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2059  MachineMemOperand *MMO) {
2060  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2061  return false;
2062 
2063  // Simplify this down to something we can handle.
2064  if (!simplifyAddress(Addr, VT))
2065  return false;
2066 
2067  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2068  if (!ScaleFactor)
2069  llvm_unreachable("Unexpected value type.");
2070 
2071  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2072  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2073  bool UseScaled = true;
2074  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2075  UseScaled = false;
2076  ScaleFactor = 1;
2077  }
2078 
2079  static const unsigned OpcTable[4][6] = {
2080  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2081  AArch64::STURSi, AArch64::STURDi },
2082  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2083  AArch64::STRSui, AArch64::STRDui },
2084  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2085  AArch64::STRSroX, AArch64::STRDroX },
2086  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2087  AArch64::STRSroW, AArch64::STRDroW }
2088  };
2089 
2090  unsigned Opc;
2091  bool VTIsi1 = false;
2092  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2093  Addr.getOffsetReg();
2094  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2095  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2096  Addr.getExtendType() == AArch64_AM::SXTW)
2097  Idx++;
2098 
2099  switch (VT.SimpleTy) {
2100  default: llvm_unreachable("Unexpected value type.");
2101  case MVT::i1: VTIsi1 = true; [[fallthrough]];
2102  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2103  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2104  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2105  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2106  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2107  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2108  }
2109 
2110  // Storing an i1 requires special handling.
2111  if (VTIsi1 && SrcReg != AArch64::WZR) {
2112  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2113  assert(ANDReg && "Unexpected AND instruction emission failure.");
2114  SrcReg = ANDReg;
2115  }
2116  // Create the base instruction, then add the operands.
2117  const MCInstrDesc &II = TII.get(Opc);
2118  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2119  MachineInstrBuilder MIB =
2120  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2121  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2122 
2123  return true;
2124 }
2125 
2126 bool AArch64FastISel::selectStore(const Instruction *I) {
2127  MVT VT;
2128  const Value *Op0 = I->getOperand(0);
2129  // Verify we have a legal type before going any further. Currently, we handle
2130  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2131  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2132  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2133  return false;
2134 
2135  const Value *PtrV = I->getOperand(1);
2136  if (TLI.supportSwiftError()) {
2137  // Swifterror values can come from either a function parameter with
2138  // swifterror attribute or an alloca with swifterror attribute.
2139  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2140  if (Arg->hasSwiftErrorAttr())
2141  return false;
2142  }
2143 
2144  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2145  if (Alloca->isSwiftError())
2146  return false;
2147  }
2148  }
2149 
2150  // Get the value to be stored into a register. Use the zero register directly
2151  // when possible to avoid an unnecessary copy and a wasted register.
2152  unsigned SrcReg = 0;
2153  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2154  if (CI->isZero())
2155  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2156  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2157  if (CF->isZero() && !CF->isNegative()) {
2158  VT = MVT::getIntegerVT(VT.getSizeInBits());
2159  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2160  }
2161  }
2162 
2163  if (!SrcReg)
2164  SrcReg = getRegForValue(Op0);
2165 
2166  if (!SrcReg)
2167  return false;
2168 
2169  auto *SI = cast<StoreInst>(I);
2170 
2171  // Try to emit a STLR for seq_cst/release.
2172  if (SI->isAtomic()) {
2173  AtomicOrdering Ord = SI->getOrdering();
2174  // The non-atomic instructions are sufficient for relaxed stores.
2175  if (isReleaseOrStronger(Ord)) {
2176  // The STLR addressing mode only supports a base reg; pass that directly.
2177  Register AddrReg = getRegForValue(PtrV);
2178  return emitStoreRelease(VT, SrcReg, AddrReg,
2179  createMachineMemOperandFor(I));
2180  }
2181  }
2182 
2183  // See if we can handle this address.
2184  Address Addr;
2185  if (!computeAddress(PtrV, Addr, Op0->getType()))
2186  return false;
2187 
2188  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2189  return false;
2190  return true;
2191 }
2192 
2194  switch (Pred) {
2195  case CmpInst::FCMP_ONE:
2196  case CmpInst::FCMP_UEQ:
2197  default:
2198  // AL is our "false" for now. The other two need more compares.
2199  return AArch64CC::AL;
2200  case CmpInst::ICMP_EQ:
2201  case CmpInst::FCMP_OEQ:
2202  return AArch64CC::EQ;
2203  case CmpInst::ICMP_SGT:
2204  case CmpInst::FCMP_OGT:
2205  return AArch64CC::GT;
2206  case CmpInst::ICMP_SGE:
2207  case CmpInst::FCMP_OGE:
2208  return AArch64CC::GE;
2209  case CmpInst::ICMP_UGT:
2210  case CmpInst::FCMP_UGT:
2211  return AArch64CC::HI;
2212  case CmpInst::FCMP_OLT:
2213  return AArch64CC::MI;
2214  case CmpInst::ICMP_ULE:
2215  case CmpInst::FCMP_OLE:
2216  return AArch64CC::LS;
2217  case CmpInst::FCMP_ORD:
2218  return AArch64CC::VC;
2219  case CmpInst::FCMP_UNO:
2220  return AArch64CC::VS;
2221  case CmpInst::FCMP_UGE:
2222  return AArch64CC::PL;
2223  case CmpInst::ICMP_SLT:
2224  case CmpInst::FCMP_ULT:
2225  return AArch64CC::LT;
2226  case CmpInst::ICMP_SLE:
2227  case CmpInst::FCMP_ULE:
2228  return AArch64CC::LE;
2229  case CmpInst::FCMP_UNE:
2230  case CmpInst::ICMP_NE:
2231  return AArch64CC::NE;
2232  case CmpInst::ICMP_UGE:
2233  return AArch64CC::HS;
2234  case CmpInst::ICMP_ULT:
2235  return AArch64CC::LO;
2236  }
2237 }
2238 
2239 /// Try to emit a combined compare-and-branch instruction.
2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2241  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2242  // will not be produced, as they are conditional branch instructions that do
2243  // not set flags.
2244  if (FuncInfo.MF->getFunction().hasFnAttribute(
2245  Attribute::SpeculativeLoadHardening))
2246  return false;
2247 
2248  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2249  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2250  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2251 
2252  const Value *LHS = CI->getOperand(0);
2253  const Value *RHS = CI->getOperand(1);
2254 
2255  MVT VT;
2256  if (!isTypeSupported(LHS->getType(), VT))
2257  return false;
2258 
2259  unsigned BW = VT.getSizeInBits();
2260  if (BW > 64)
2261  return false;
2262 
2263  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2264  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2265 
2266  // Try to take advantage of fallthrough opportunities.
2267  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2268  std::swap(TBB, FBB);
2270  }
2271 
2272  int TestBit = -1;
2273  bool IsCmpNE;
2274  switch (Predicate) {
2275  default:
2276  return false;
2277  case CmpInst::ICMP_EQ:
2278  case CmpInst::ICMP_NE:
2279  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2280  std::swap(LHS, RHS);
2281 
2282  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2283  return false;
2284 
2285  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2286  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2287  const Value *AndLHS = AI->getOperand(0);
2288  const Value *AndRHS = AI->getOperand(1);
2289 
2290  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2291  if (C->getValue().isPowerOf2())
2292  std::swap(AndLHS, AndRHS);
2293 
2294  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2295  if (C->getValue().isPowerOf2()) {
2296  TestBit = C->getValue().logBase2();
2297  LHS = AndLHS;
2298  }
2299  }
2300 
2301  if (VT == MVT::i1)
2302  TestBit = 0;
2303 
2304  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2305  break;
2306  case CmpInst::ICMP_SLT:
2307  case CmpInst::ICMP_SGE:
2308  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2309  return false;
2310 
2311  TestBit = BW - 1;
2312  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2313  break;
2314  case CmpInst::ICMP_SGT:
2315  case CmpInst::ICMP_SLE:
2316  if (!isa<ConstantInt>(RHS))
2317  return false;
2318 
2319  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2320  return false;
2321 
2322  TestBit = BW - 1;
2323  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2324  break;
2325  } // end switch
2326 
2327  static const unsigned OpcTable[2][2][2] = {
2328  { {AArch64::CBZW, AArch64::CBZX },
2329  {AArch64::CBNZW, AArch64::CBNZX} },
2330  { {AArch64::TBZW, AArch64::TBZX },
2331  {AArch64::TBNZW, AArch64::TBNZX} }
2332  };
2333 
2334  bool IsBitTest = TestBit != -1;
2335  bool Is64Bit = BW == 64;
2336  if (TestBit < 32 && TestBit >= 0)
2337  Is64Bit = false;
2338 
2339  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2340  const MCInstrDesc &II = TII.get(Opc);
2341 
2342  Register SrcReg = getRegForValue(LHS);
2343  if (!SrcReg)
2344  return false;
2345 
2346  if (BW == 64 && !Is64Bit)
2347  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2348 
2349  if ((BW < 32) && !IsBitTest)
2350  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2351 
2352  // Emit the combined compare and branch instruction.
2353  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2354  MachineInstrBuilder MIB =
2355  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2356  .addReg(SrcReg);
2357  if (IsBitTest)
2358  MIB.addImm(TestBit);
2359  MIB.addMBB(TBB);
2360 
2361  finishCondBranch(BI->getParent(), TBB, FBB);
2362  return true;
2363 }
2364 
2365 bool AArch64FastISel::selectBranch(const Instruction *I) {
2366  const BranchInst *BI = cast<BranchInst>(I);
2367  if (BI->isUnconditional()) {
2368  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2369  fastEmitBranch(MSucc, BI->getDebugLoc());
2370  return true;
2371  }
2372 
2373  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2374  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2375 
2376  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2377  if (CI->hasOneUse() && isValueAvailable(CI)) {
2378  // Try to optimize or fold the cmp.
2379  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2380  switch (Predicate) {
2381  default:
2382  break;
2383  case CmpInst::FCMP_FALSE:
2384  fastEmitBranch(FBB, MIMD.getDL());
2385  return true;
2386  case CmpInst::FCMP_TRUE:
2387  fastEmitBranch(TBB, MIMD.getDL());
2388  return true;
2389  }
2390 
2391  // Try to emit a combined compare-and-branch first.
2392  if (emitCompareAndBranch(BI))
2393  return true;
2394 
2395  // Try to take advantage of fallthrough opportunities.
2396  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2397  std::swap(TBB, FBB);
2399  }
2400 
2401  // Emit the cmp.
2402  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2403  return false;
2404 
2405  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2406  // instruction.
2409  switch (Predicate) {
2410  default:
2411  break;
2412  case CmpInst::FCMP_UEQ:
2413  ExtraCC = AArch64CC::EQ;
2414  CC = AArch64CC::VS;
2415  break;
2416  case CmpInst::FCMP_ONE:
2417  ExtraCC = AArch64CC::MI;
2418  CC = AArch64CC::GT;
2419  break;
2420  }
2421  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2422 
2423  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2424  if (ExtraCC != AArch64CC::AL) {
2425  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2426  .addImm(ExtraCC)
2427  .addMBB(TBB);
2428  }
2429 
2430  // Emit the branch.
2431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2432  .addImm(CC)
2433  .addMBB(TBB);
2434 
2435  finishCondBranch(BI->getParent(), TBB, FBB);
2436  return true;
2437  }
2438  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2439  uint64_t Imm = CI->getZExtValue();
2440  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2442  .addMBB(Target);
2443 
2444  // Obtain the branch probability and add the target to the successor list.
2445  if (FuncInfo.BPI) {
2446  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2447  BI->getParent(), Target->getBasicBlock());
2448  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2449  } else
2450  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2451  return true;
2452  } else {
2454  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2455  // Fake request the condition, otherwise the intrinsic might be completely
2456  // optimized away.
2457  Register CondReg = getRegForValue(BI->getCondition());
2458  if (!CondReg)
2459  return false;
2460 
2461  // Emit the branch.
2462  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2463  .addImm(CC)
2464  .addMBB(TBB);
2465 
2466  finishCondBranch(BI->getParent(), TBB, FBB);
2467  return true;
2468  }
2469  }
2470 
2471  Register CondReg = getRegForValue(BI->getCondition());
2472  if (CondReg == 0)
2473  return false;
2474 
2475  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2476  unsigned Opcode = AArch64::TBNZW;
2477  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2478  std::swap(TBB, FBB);
2479  Opcode = AArch64::TBZW;
2480  }
2481 
2482  const MCInstrDesc &II = TII.get(Opcode);
2483  Register ConstrainedCondReg
2484  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2486  .addReg(ConstrainedCondReg)
2487  .addImm(0)
2488  .addMBB(TBB);
2489 
2490  finishCondBranch(BI->getParent(), TBB, FBB);
2491  return true;
2492 }
2493 
2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2495  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2496  Register AddrReg = getRegForValue(BI->getOperand(0));
2497  if (AddrReg == 0)
2498  return false;
2499 
2500  // Emit the indirect branch.
2501  const MCInstrDesc &II = TII.get(AArch64::BR);
2502  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2504 
2505  // Make sure the CFG is up-to-date.
2506  for (const auto *Succ : BI->successors())
2507  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2508 
2509  return true;
2510 }
2511 
2512 bool AArch64FastISel::selectCmp(const Instruction *I) {
2513  const CmpInst *CI = cast<CmpInst>(I);
2514 
2515  // Vectors of i1 are weird: bail out.
2516  if (CI->getType()->isVectorTy())
2517  return false;
2518 
2519  // Try to optimize or fold the cmp.
2520  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2521  unsigned ResultReg = 0;
2522  switch (Predicate) {
2523  default:
2524  break;
2525  case CmpInst::FCMP_FALSE:
2526  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2527  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2528  TII.get(TargetOpcode::COPY), ResultReg)
2529  .addReg(AArch64::WZR, getKillRegState(true));
2530  break;
2531  case CmpInst::FCMP_TRUE:
2532  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2533  break;
2534  }
2535 
2536  if (ResultReg) {
2537  updateValueMap(I, ResultReg);
2538  return true;
2539  }
2540 
2541  // Emit the cmp.
2542  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2543  return false;
2544 
2545  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 
2547  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2548  // condition codes are inverted, because they are used by CSINC.
2549  static unsigned CondCodeTable[2][2] = {
2552  };
2553  unsigned *CondCodes = nullptr;
2554  switch (Predicate) {
2555  default:
2556  break;
2557  case CmpInst::FCMP_UEQ:
2558  CondCodes = &CondCodeTable[0][0];
2559  break;
2560  case CmpInst::FCMP_ONE:
2561  CondCodes = &CondCodeTable[1][0];
2562  break;
2563  }
2564 
2565  if (CondCodes) {
2566  Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2568  TmpReg1)
2569  .addReg(AArch64::WZR, getKillRegState(true))
2570  .addReg(AArch64::WZR, getKillRegState(true))
2571  .addImm(CondCodes[0]);
2572  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2573  ResultReg)
2574  .addReg(TmpReg1, getKillRegState(true))
2575  .addReg(AArch64::WZR, getKillRegState(true))
2576  .addImm(CondCodes[1]);
2577 
2578  updateValueMap(I, ResultReg);
2579  return true;
2580  }
2581 
2582  // Now set a register based on the comparison.
2584  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2587  ResultReg)
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(invertedCC);
2591 
2592  updateValueMap(I, ResultReg);
2593  return true;
2594 }
2595 
2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2597 /// value.
2599  if (!SI->getType()->isIntegerTy(1))
2600  return false;
2601 
2602  const Value *Src1Val, *Src2Val;
2603  unsigned Opc = 0;
2604  bool NeedExtraOp = false;
2605  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2606  if (CI->isOne()) {
2607  Src1Val = SI->getCondition();
2608  Src2Val = SI->getFalseValue();
2609  Opc = AArch64::ORRWrr;
2610  } else {
2611  assert(CI->isZero());
2612  Src1Val = SI->getFalseValue();
2613  Src2Val = SI->getCondition();
2614  Opc = AArch64::BICWrr;
2615  }
2616  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2617  if (CI->isOne()) {
2618  Src1Val = SI->getCondition();
2619  Src2Val = SI->getTrueValue();
2620  Opc = AArch64::ORRWrr;
2621  NeedExtraOp = true;
2622  } else {
2623  assert(CI->isZero());
2624  Src1Val = SI->getCondition();
2625  Src2Val = SI->getTrueValue();
2626  Opc = AArch64::ANDWrr;
2627  }
2628  }
2629 
2630  if (!Opc)
2631  return false;
2632 
2633  Register Src1Reg = getRegForValue(Src1Val);
2634  if (!Src1Reg)
2635  return false;
2636 
2637  Register Src2Reg = getRegForValue(Src2Val);
2638  if (!Src2Reg)
2639  return false;
2640 
2641  if (NeedExtraOp)
2642  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2643 
2644  Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2645  Src2Reg);
2646  updateValueMap(SI, ResultReg);
2647  return true;
2648 }
2649 
2650 bool AArch64FastISel::selectSelect(const Instruction *I) {
2651  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2652  MVT VT;
2653  if (!isTypeSupported(I->getType(), VT))
2654  return false;
2655 
2656  unsigned Opc;
2657  const TargetRegisterClass *RC;
2658  switch (VT.SimpleTy) {
2659  default:
2660  return false;
2661  case MVT::i1:
2662  case MVT::i8:
2663  case MVT::i16:
2664  case MVT::i32:
2665  Opc = AArch64::CSELWr;
2666  RC = &AArch64::GPR32RegClass;
2667  break;
2668  case MVT::i64:
2669  Opc = AArch64::CSELXr;
2670  RC = &AArch64::GPR64RegClass;
2671  break;
2672  case MVT::f32:
2673  Opc = AArch64::FCSELSrrr;
2674  RC = &AArch64::FPR32RegClass;
2675  break;
2676  case MVT::f64:
2677  Opc = AArch64::FCSELDrrr;
2678  RC = &AArch64::FPR64RegClass;
2679  break;
2680  }
2681 
2682  const SelectInst *SI = cast<SelectInst>(I);
2683  const Value *Cond = SI->getCondition();
2686 
2687  if (optimizeSelect(SI))
2688  return true;
2689 
2690  // Try to pickup the flags, so we don't have to emit another compare.
2691  if (foldXALUIntrinsic(CC, I, Cond)) {
2692  // Fake request the condition to force emission of the XALU intrinsic.
2693  Register CondReg = getRegForValue(Cond);
2694  if (!CondReg)
2695  return false;
2696  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2697  isValueAvailable(Cond)) {
2698  const auto *Cmp = cast<CmpInst>(Cond);
2699  // Try to optimize or fold the cmp.
2700  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2701  const Value *FoldSelect = nullptr;
2702  switch (Predicate) {
2703  default:
2704  break;
2705  case CmpInst::FCMP_FALSE:
2706  FoldSelect = SI->getFalseValue();
2707  break;
2708  case CmpInst::FCMP_TRUE:
2709  FoldSelect = SI->getTrueValue();
2710  break;
2711  }
2712 
2713  if (FoldSelect) {
2714  Register SrcReg = getRegForValue(FoldSelect);
2715  if (!SrcReg)
2716  return false;
2717 
2718  updateValueMap(I, SrcReg);
2719  return true;
2720  }
2721 
2722  // Emit the cmp.
2723  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2724  return false;
2725 
2726  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2728  switch (Predicate) {
2729  default:
2730  break;
2731  case CmpInst::FCMP_UEQ:
2732  ExtraCC = AArch64CC::EQ;
2733  CC = AArch64CC::VS;
2734  break;
2735  case CmpInst::FCMP_ONE:
2736  ExtraCC = AArch64CC::MI;
2737  CC = AArch64CC::GT;
2738  break;
2739  }
2740  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2741  } else {
2742  Register CondReg = getRegForValue(Cond);
2743  if (!CondReg)
2744  return false;
2745 
2746  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2747  CondReg = constrainOperandRegClass(II, CondReg, 1);
2748 
2749  // Emit a TST instruction (ANDS wzr, reg, #imm).
2750  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2751  AArch64::WZR)
2752  .addReg(CondReg)
2754  }
2755 
2756  Register Src1Reg = getRegForValue(SI->getTrueValue());
2757  Register Src2Reg = getRegForValue(SI->getFalseValue());
2758 
2759  if (!Src1Reg || !Src2Reg)
2760  return false;
2761 
2762  if (ExtraCC != AArch64CC::AL)
2763  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2764 
2765  Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2766  updateValueMap(I, ResultReg);
2767  return true;
2768 }
2769 
2770 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2771  Value *V = I->getOperand(0);
2772  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2773  return false;
2774 
2775  Register Op = getRegForValue(V);
2776  if (Op == 0)
2777  return false;
2778 
2779  Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2781  ResultReg).addReg(Op);
2782  updateValueMap(I, ResultReg);
2783  return true;
2784 }
2785 
2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2787  Value *V = I->getOperand(0);
2788  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2789  return false;
2790 
2791  Register Op = getRegForValue(V);
2792  if (Op == 0)
2793  return false;
2794 
2795  Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2796  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2797  ResultReg).addReg(Op);
2798  updateValueMap(I, ResultReg);
2799  return true;
2800 }
2801 
2802 // FPToUI and FPToSI
2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2804  MVT DestVT;
2805  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2806  return false;
2807 
2808  Register SrcReg = getRegForValue(I->getOperand(0));
2809  if (SrcReg == 0)
2810  return false;
2811 
2812  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2813  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2814  return false;
2815 
2816  unsigned Opc;
2817  if (SrcVT == MVT::f64) {
2818  if (Signed)
2819  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2820  else
2821  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2822  } else {
2823  if (Signed)
2824  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2825  else
2826  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2827  }
2828  Register ResultReg = createResultReg(
2829  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2831  .addReg(SrcReg);
2832  updateValueMap(I, ResultReg);
2833  return true;
2834 }
2835 
2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2837  MVT DestVT;
2838  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2839  return false;
2840  // Let regular ISEL handle FP16
2841  if (DestVT == MVT::f16)
2842  return false;
2843 
2844  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2845  "Unexpected value type.");
2846 
2847  Register SrcReg = getRegForValue(I->getOperand(0));
2848  if (!SrcReg)
2849  return false;
2850 
2851  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2852 
2853  // Handle sign-extension.
2854  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2855  SrcReg =
2856  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2857  if (!SrcReg)
2858  return false;
2859  }
2860 
2861  unsigned Opc;
2862  if (SrcVT == MVT::i64) {
2863  if (Signed)
2864  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2865  else
2866  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2867  } else {
2868  if (Signed)
2869  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2870  else
2871  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2872  }
2873 
2874  Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2875  updateValueMap(I, ResultReg);
2876  return true;
2877 }
2878 
2879 bool AArch64FastISel::fastLowerArguments() {
2880  if (!FuncInfo.CanLowerReturn)
2881  return false;
2882 
2883  const Function *F = FuncInfo.Fn;
2884  if (F->isVarArg())
2885  return false;
2886 
2887  CallingConv::ID CC = F->getCallingConv();
2888  if (CC != CallingConv::C && CC != CallingConv::Swift)
2889  return false;
2890 
2891  if (Subtarget->hasCustomCallingConv())
2892  return false;
2893 
2894  // Only handle simple cases of up to 8 GPR and FPR each.
2895  unsigned GPRCnt = 0;
2896  unsigned FPRCnt = 0;
2897  for (auto const &Arg : F->args()) {
2898  if (Arg.hasAttribute(Attribute::ByVal) ||
2899  Arg.hasAttribute(Attribute::InReg) ||
2900  Arg.hasAttribute(Attribute::StructRet) ||
2901  Arg.hasAttribute(Attribute::SwiftSelf) ||
2902  Arg.hasAttribute(Attribute::SwiftAsync) ||
2903  Arg.hasAttribute(Attribute::SwiftError) ||
2904  Arg.hasAttribute(Attribute::Nest))
2905  return false;
2906 
2907  Type *ArgTy = Arg.getType();
2908  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2909  return false;
2910 
2911  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2912  if (!ArgVT.isSimple())
2913  return false;
2914 
2915  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2916  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2917  return false;
2918 
2919  if (VT.isVector() &&
2920  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2921  return false;
2922 
2923  if (VT >= MVT::i1 && VT <= MVT::i64)
2924  ++GPRCnt;
2925  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2926  VT.is128BitVector())
2927  ++FPRCnt;
2928  else
2929  return false;
2930 
2931  if (GPRCnt > 8 || FPRCnt > 8)
2932  return false;
2933  }
2934 
2935  static const MCPhysReg Registers[6][8] = {
2936  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2937  AArch64::W5, AArch64::W6, AArch64::W7 },
2938  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2939  AArch64::X5, AArch64::X6, AArch64::X7 },
2940  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2941  AArch64::H5, AArch64::H6, AArch64::H7 },
2942  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2943  AArch64::S5, AArch64::S6, AArch64::S7 },
2944  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2945  AArch64::D5, AArch64::D6, AArch64::D7 },
2946  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2947  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2948  };
2949 
2950  unsigned GPRIdx = 0;
2951  unsigned FPRIdx = 0;
2952  for (auto const &Arg : F->args()) {
2953  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2954  unsigned SrcReg;
2955  const TargetRegisterClass *RC;
2956  if (VT >= MVT::i1 && VT <= MVT::i32) {
2957  SrcReg = Registers[0][GPRIdx++];
2958  RC = &AArch64::GPR32RegClass;
2959  VT = MVT::i32;
2960  } else if (VT == MVT::i64) {
2961  SrcReg = Registers[1][GPRIdx++];
2962  RC = &AArch64::GPR64RegClass;
2963  } else if (VT == MVT::f16) {
2964  SrcReg = Registers[2][FPRIdx++];
2965  RC = &AArch64::FPR16RegClass;
2966  } else if (VT == MVT::f32) {
2967  SrcReg = Registers[3][FPRIdx++];
2968  RC = &AArch64::FPR32RegClass;
2969  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2970  SrcReg = Registers[4][FPRIdx++];
2971  RC = &AArch64::FPR64RegClass;
2972  } else if (VT.is128BitVector()) {
2973  SrcReg = Registers[5][FPRIdx++];
2974  RC = &AArch64::FPR128RegClass;
2975  } else
2976  llvm_unreachable("Unexpected value type.");
2977 
2978  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2979  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2980  // Without this, EmitLiveInCopies may eliminate the livein if its only
2981  // use is a bitcast (which isn't turned into an instruction).
2982  Register ResultReg = createResultReg(RC);
2983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2984  TII.get(TargetOpcode::COPY), ResultReg)
2985  .addReg(DstReg, getKillRegState(true));
2986  updateValueMap(&Arg, ResultReg);
2987  }
2988  return true;
2989 }
2990 
2991 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2992  SmallVectorImpl<MVT> &OutVTs,
2993  unsigned &NumBytes) {
2994  CallingConv::ID CC = CLI.CallConv;
2996  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2997  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2998 
2999  // Get a count of how many bytes are to be pushed on the stack.
3000  NumBytes = CCInfo.getNextStackOffset();
3001 
3002  // Issue CALLSEQ_START
3003  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3005  .addImm(NumBytes).addImm(0);
3006 
3007  // Process the args.
3008  for (CCValAssign &VA : ArgLocs) {
3009  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3010  MVT ArgVT = OutVTs[VA.getValNo()];
3011 
3012  Register ArgReg = getRegForValue(ArgVal);
3013  if (!ArgReg)
3014  return false;
3015 
3016  // Handle arg promotion: SExt, ZExt, AExt.
3017  switch (VA.getLocInfo()) {
3018  case CCValAssign::Full:
3019  break;
3020  case CCValAssign::SExt: {
3021  MVT DestVT = VA.getLocVT();
3022  MVT SrcVT = ArgVT;
3023  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3024  if (!ArgReg)
3025  return false;
3026  break;
3027  }
3028  case CCValAssign::AExt:
3029  // Intentional fall-through.
3030  case CCValAssign::ZExt: {
3031  MVT DestVT = VA.getLocVT();
3032  MVT SrcVT = ArgVT;
3033  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3034  if (!ArgReg)
3035  return false;
3036  break;
3037  }
3038  default:
3039  llvm_unreachable("Unknown arg promotion!");
3040  }
3041 
3042  // Now copy/store arg to correct locations.
3043  if (VA.isRegLoc() && !VA.needsCustom()) {
3044  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3045  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3046  CLI.OutRegs.push_back(VA.getLocReg());
3047  } else if (VA.needsCustom()) {
3048  // FIXME: Handle custom args.
3049  return false;
3050  } else {
3051  assert(VA.isMemLoc() && "Assuming store on stack.");
3052 
3053  // Don't emit stores for undef values.
3054  if (isa<UndefValue>(ArgVal))
3055  continue;
3056 
3057  // Need to store on the stack.
3058  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3059 
3060  unsigned BEAlign = 0;
3061  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3062  BEAlign = 8 - ArgSize;
3063 
3064  Address Addr;
3065  Addr.setKind(Address::RegBase);
3066  Addr.setReg(AArch64::SP);
3067  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3068 
3069  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3070  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3071  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3072  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3073 
3074  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3075  return false;
3076  }
3077  }
3078  return true;
3079 }
3080 
3081 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3082  unsigned NumBytes) {
3083  CallingConv::ID CC = CLI.CallConv;
3084 
3085  // Issue CALLSEQ_END
3086  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3087  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3088  .addImm(NumBytes).addImm(0);
3089 
3090  // Now the return value.
3091  if (RetVT != MVT::isVoid) {
3093  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3094  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3095 
3096  // Only handle a single return value.
3097  if (RVLocs.size() != 1)
3098  return false;
3099 
3100  // Copy all of the result registers out of their specified physreg.
3101  MVT CopyVT = RVLocs[0].getValVT();
3102 
3103  // TODO: Handle big-endian results
3104  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3105  return false;
3106 
3107  Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3108  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3109  TII.get(TargetOpcode::COPY), ResultReg)
3110  .addReg(RVLocs[0].getLocReg());
3111  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3112 
3113  CLI.ResultReg = ResultReg;
3114  CLI.NumResultRegs = 1;
3115  }
3116 
3117  return true;
3118 }
3119 
3120 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3121  CallingConv::ID CC = CLI.CallConv;
3122  bool IsTailCall = CLI.IsTailCall;
3123  bool IsVarArg = CLI.IsVarArg;
3124  const Value *Callee = CLI.Callee;
3125  MCSymbol *Symbol = CLI.Symbol;
3126 
3127  if (!Callee && !Symbol)
3128  return false;
3129 
3130  // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3131  // a bti instruction following the call.
3132  if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3133  !Subtarget->noBTIAtReturnTwice() &&
3134  MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3135  return false;
3136 
3137  // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3138  if (CLI.CB && CLI.CB->isIndirectCall() &&
3139  CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3140  return false;
3141 
3142  // Allow SelectionDAG isel to handle tail calls.
3143  if (IsTailCall)
3144  return false;
3145 
3146  // FIXME: we could and should support this, but for now correctness at -O0 is
3147  // more important.
3148  if (Subtarget->isTargetILP32())
3149  return false;
3150 
3151  CodeModel::Model CM = TM.getCodeModel();
3152  // Only support the small-addressing and large code models.
3153  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3154  return false;
3155 
3156  // FIXME: Add large code model support for ELF.
3157  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3158  return false;
3159 
3160  // Let SDISel handle vararg functions.
3161  if (IsVarArg)
3162  return false;
3163 
3164  // FIXME: Only handle *simple* calls for now.
3165  MVT RetVT;
3166  if (CLI.RetTy->isVoidTy())
3167  RetVT = MVT::isVoid;
3168  else if (!isTypeLegal(CLI.RetTy, RetVT))
3169  return false;
3170 
3171  for (auto Flag : CLI.OutFlags)
3172  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3173  Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3174  return false;
3175 
3176  // Set up the argument vectors.
3177  SmallVector<MVT, 16> OutVTs;
3178  OutVTs.reserve(CLI.OutVals.size());
3179 
3180  for (auto *Val : CLI.OutVals) {
3181  MVT VT;
3182  if (!isTypeLegal(Val->getType(), VT) &&
3183  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3184  return false;
3185 
3186  // We don't handle vector parameters yet.
3187  if (VT.isVector() || VT.getSizeInBits() > 64)
3188  return false;
3189 
3190  OutVTs.push_back(VT);
3191  }
3192 
3193  Address Addr;
3194  if (Callee && !computeCallAddress(Callee, Addr))
3195  return false;
3196 
3197  // The weak function target may be zero; in that case we must use indirect
3198  // addressing via a stub on windows as it may be out of range for a
3199  // PC-relative jump.
3200  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3201  Addr.getGlobalValue()->hasExternalWeakLinkage())
3202  return false;
3203 
3204  // Handle the arguments now that we've gotten them.
3205  unsigned NumBytes;
3206  if (!processCallArgs(CLI, OutVTs, NumBytes))
3207  return false;
3208 
3209  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3210  if (RegInfo->isAnyArgRegReserved(*MF))
3211  RegInfo->emitReservedArgRegCallError(*MF);
3212 
3213  // Issue the call.
3214  MachineInstrBuilder MIB;
3215  if (Subtarget->useSmallAddressing()) {
3216  const MCInstrDesc &II =
3217  TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3218  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3219  if (Symbol)
3220  MIB.addSym(Symbol, 0);
3221  else if (Addr.getGlobalValue())
3222  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3223  else if (Addr.getReg()) {
3224  Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3225  MIB.addReg(Reg);
3226  } else
3227  return false;
3228  } else {
3229  unsigned CallReg = 0;
3230  if (Symbol) {
3231  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3232  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3233  ADRPReg)
3235 
3236  CallReg = createResultReg(&AArch64::GPR64RegClass);
3237  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3238  TII.get(AArch64::LDRXui), CallReg)
3239  .addReg(ADRPReg)
3240  .addSym(Symbol,
3242  } else if (Addr.getGlobalValue())
3243  CallReg = materializeGV(Addr.getGlobalValue());
3244  else if (Addr.getReg())
3245  CallReg = Addr.getReg();
3246 
3247  if (!CallReg)
3248  return false;
3249 
3250  const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3251  CallReg = constrainOperandRegClass(II, CallReg, 0);
3252  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3253  }
3254 
3255  // Add implicit physical register uses to the call.
3256  for (auto Reg : CLI.OutRegs)
3258 
3259  // Add a register mask with the call-preserved registers.
3260  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3261  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3262 
3263  CLI.Call = MIB;
3264 
3265  // Finish off the call including any return values.
3266  return finishCall(CLI, RetVT, NumBytes);
3267 }
3268 
3269 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3270  if (Alignment)
3271  return Len / Alignment <= 4;
3272  else
3273  return Len < 32;
3274 }
3275 
3276 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3277  uint64_t Len, unsigned Alignment) {
3278  // Make sure we don't bloat code by inlining very large memcpy's.
3279  if (!isMemCpySmall(Len, Alignment))
3280  return false;
3281 
3282  int64_t UnscaledOffset = 0;
3283  Address OrigDest = Dest;
3284  Address OrigSrc = Src;
3285 
3286  while (Len) {
3287  MVT VT;
3288  if (!Alignment || Alignment >= 8) {
3289  if (Len >= 8)
3290  VT = MVT::i64;
3291  else if (Len >= 4)
3292  VT = MVT::i32;
3293  else if (Len >= 2)
3294  VT = MVT::i16;
3295  else {
3296  VT = MVT::i8;
3297  }
3298  } else {
3299  // Bound based on alignment.
3300  if (Len >= 4 && Alignment == 4)
3301  VT = MVT::i32;
3302  else if (Len >= 2 && Alignment == 2)
3303  VT = MVT::i16;
3304  else {
3305  VT = MVT::i8;
3306  }
3307  }
3308 
3309  unsigned ResultReg = emitLoad(VT, VT, Src);
3310  if (!ResultReg)
3311  return false;
3312 
3313  if (!emitStore(VT, ResultReg, Dest))
3314  return false;
3315 
3316  int64_t Size = VT.getSizeInBits() / 8;
3317  Len -= Size;
3318  UnscaledOffset += Size;
3319 
3320  // We need to recompute the unscaled offset for each iteration.
3321  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3322  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3323  }
3324 
3325  return true;
3326 }
3327 
3328 /// Check if it is possible to fold the condition from the XALU intrinsic
3329 /// into the user. The condition code will only be updated on success.
3330 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3331  const Instruction *I,
3332  const Value *Cond) {
3333  if (!isa<ExtractValueInst>(Cond))
3334  return false;
3335 
3336  const auto *EV = cast<ExtractValueInst>(Cond);
3337  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3338  return false;
3339 
3340  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3341  MVT RetVT;
3342  const Function *Callee = II->getCalledFunction();
3343  Type *RetTy =
3344  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3345  if (!isTypeLegal(RetTy, RetVT))
3346  return false;
3347 
3348  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3349  return false;
3350 
3351  const Value *LHS = II->getArgOperand(0);
3352  const Value *RHS = II->getArgOperand(1);
3353 
3354  // Canonicalize immediate to the RHS.
3355  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3356  std::swap(LHS, RHS);
3357 
3358  // Simplify multiplies.
3359  Intrinsic::ID IID = II->getIntrinsicID();
3360  switch (IID) {
3361  default:
3362  break;
3363  case Intrinsic::smul_with_overflow:
3364  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3365  if (C->getValue() == 2)
3366  IID = Intrinsic::sadd_with_overflow;
3367  break;
3368  case Intrinsic::umul_with_overflow:
3369  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3370  if (C->getValue() == 2)
3371  IID = Intrinsic::uadd_with_overflow;
3372  break;
3373  }
3374 
3375  AArch64CC::CondCode TmpCC;
3376  switch (IID) {
3377  default:
3378  return false;
3379  case Intrinsic::sadd_with_overflow:
3380  case Intrinsic::ssub_with_overflow:
3381  TmpCC = AArch64CC::VS;
3382  break;
3383  case Intrinsic::uadd_with_overflow:
3384  TmpCC = AArch64CC::HS;
3385  break;
3386  case Intrinsic::usub_with_overflow:
3387  TmpCC = AArch64CC::LO;
3388  break;
3389  case Intrinsic::smul_with_overflow:
3390  case Intrinsic::umul_with_overflow:
3391  TmpCC = AArch64CC::NE;
3392  break;
3393  }
3394 
3395  // Check if both instructions are in the same basic block.
3396  if (!isValueAvailable(II))
3397  return false;
3398 
3399  // Make sure nothing is in the way
3402  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3403  // We only expect extractvalue instructions between the intrinsic and the
3404  // instruction to be selected.
3405  if (!isa<ExtractValueInst>(Itr))
3406  return false;
3407 
3408  // Check that the extractvalue operand comes from the intrinsic.
3409  const auto *EVI = cast<ExtractValueInst>(Itr);
3410  if (EVI->getAggregateOperand() != II)
3411  return false;
3412  }
3413 
3414  CC = TmpCC;
3415  return true;
3416 }
3417 
3418 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3419  // FIXME: Handle more intrinsics.
3420  switch (II->getIntrinsicID()) {
3421  default: return false;
3422  case Intrinsic::frameaddress: {
3423  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3424  MFI.setFrameAddressIsTaken(true);
3425 
3426  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3427  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3428  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3429  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3430  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3431  // Recursively load frame address
3432  // ldr x0, [fp]
3433  // ldr x0, [x0]
3434  // ldr x0, [x0]
3435  // ...
3436  unsigned DestReg;
3437  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3438  while (Depth--) {
3439  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3440  SrcReg, 0);
3441  assert(DestReg && "Unexpected LDR instruction emission failure.");
3442  SrcReg = DestReg;
3443  }
3444 
3445  updateValueMap(II, SrcReg);
3446  return true;
3447  }
3448  case Intrinsic::sponentry: {
3449  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3450 
3451  // SP = FP + Fixed Object + 16
3452  int FI = MFI.CreateFixedObject(4, 0, false);
3453  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3455  TII.get(AArch64::ADDXri), ResultReg)
3456  .addFrameIndex(FI)
3457  .addImm(0)
3458  .addImm(0);
3459 
3460  updateValueMap(II, ResultReg);
3461  return true;
3462  }
3463  case Intrinsic::memcpy:
3464  case Intrinsic::memmove: {
3465  const auto *MTI = cast<MemTransferInst>(II);
3466  // Don't handle volatile.
3467  if (MTI->isVolatile())
3468  return false;
3469 
3470  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3471  // we would emit dead code because we don't currently handle memmoves.
3472  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3473  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3474  // Small memcpy's are common enough that we want to do them without a call
3475  // if possible.
3476  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3477  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3478  MTI->getSourceAlignment());
3479  if (isMemCpySmall(Len, Alignment)) {
3480  Address Dest, Src;
3481  if (!computeAddress(MTI->getRawDest(), Dest) ||
3482  !computeAddress(MTI->getRawSource(), Src))
3483  return false;
3484  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3485  return true;
3486  }
3487  }
3488 
3489  if (!MTI->getLength()->getType()->isIntegerTy(64))
3490  return false;
3491 
3492  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3493  // Fast instruction selection doesn't support the special
3494  // address spaces.
3495  return false;
3496 
3497  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3498  return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3499  }
3500  case Intrinsic::memset: {
3501  const MemSetInst *MSI = cast<MemSetInst>(II);
3502  // Don't handle volatile.
3503  if (MSI->isVolatile())
3504  return false;
3505 
3506  if (!MSI->getLength()->getType()->isIntegerTy(64))
3507  return false;
3508 
3509  if (MSI->getDestAddressSpace() > 255)
3510  // Fast instruction selection doesn't support the special
3511  // address spaces.
3512  return false;
3513 
3514  return lowerCallTo(II, "memset", II->arg_size() - 1);
3515  }
3516  case Intrinsic::sin:
3517  case Intrinsic::cos:
3518  case Intrinsic::pow: {
3519  MVT RetVT;
3520  if (!isTypeLegal(II->getType(), RetVT))
3521  return false;
3522 
3523  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3524  return false;
3525 
3526  static const RTLIB::Libcall LibCallTable[3][2] = {
3527  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3528  { RTLIB::COS_F32, RTLIB::COS_F64 },
3529  { RTLIB::POW_F32, RTLIB::POW_F64 }
3530  };
3531  RTLIB::Libcall LC;
3532  bool Is64Bit = RetVT == MVT::f64;
3533  switch (II->getIntrinsicID()) {
3534  default:
3535  llvm_unreachable("Unexpected intrinsic.");
3536  case Intrinsic::sin:
3537  LC = LibCallTable[0][Is64Bit];
3538  break;
3539  case Intrinsic::cos:
3540  LC = LibCallTable[1][Is64Bit];
3541  break;
3542  case Intrinsic::pow:
3543  LC = LibCallTable[2][Is64Bit];
3544  break;
3545  }
3546 
3547  ArgListTy Args;
3548  Args.reserve(II->arg_size());
3549 
3550  // Populate the argument list.
3551  for (auto &Arg : II->args()) {
3552  ArgListEntry Entry;
3553  Entry.Val = Arg;
3554  Entry.Ty = Arg->getType();
3555  Args.push_back(Entry);
3556  }
3557 
3558  CallLoweringInfo CLI;
3559  MCContext &Ctx = MF->getContext();
3560  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3561  TLI.getLibcallName(LC), std::move(Args));
3562  if (!lowerCallTo(CLI))
3563  return false;
3564  updateValueMap(II, CLI.ResultReg);
3565  return true;
3566  }
3567  case Intrinsic::fabs: {
3568  MVT VT;
3569  if (!isTypeLegal(II->getType(), VT))
3570  return false;
3571 
3572  unsigned Opc;
3573  switch (VT.SimpleTy) {
3574  default:
3575  return false;
3576  case MVT::f32:
3577  Opc = AArch64::FABSSr;
3578  break;
3579  case MVT::f64:
3580  Opc = AArch64::FABSDr;
3581  break;
3582  }
3583  Register SrcReg = getRegForValue(II->getOperand(0));
3584  if (!SrcReg)
3585  return false;
3586  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3587  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3588  .addReg(SrcReg);
3589  updateValueMap(II, ResultReg);
3590  return true;
3591  }
3592  case Intrinsic::trap:
3593  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3594  .addImm(1);
3595  return true;
3596  case Intrinsic::debugtrap:
3597  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3598  .addImm(0xF000);
3599  return true;
3600 
3601  case Intrinsic::sqrt: {
3602  Type *RetTy = II->getCalledFunction()->getReturnType();
3603 
3604  MVT VT;
3605  if (!isTypeLegal(RetTy, VT))
3606  return false;
3607 
3608  Register Op0Reg = getRegForValue(II->getOperand(0));
3609  if (!Op0Reg)
3610  return false;
3611 
3612  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3613  if (!ResultReg)
3614  return false;
3615 
3616  updateValueMap(II, ResultReg);
3617  return true;
3618  }
3619  case Intrinsic::sadd_with_overflow:
3620  case Intrinsic::uadd_with_overflow:
3621  case Intrinsic::ssub_with_overflow:
3622  case Intrinsic::usub_with_overflow:
3623  case Intrinsic::smul_with_overflow:
3624  case Intrinsic::umul_with_overflow: {
3625  // This implements the basic lowering of the xalu with overflow intrinsics.
3626  const Function *Callee = II->getCalledFunction();
3627  auto *Ty = cast<StructType>(Callee->getReturnType());
3628  Type *RetTy = Ty->getTypeAtIndex(0U);
3629 
3630  MVT VT;
3631  if (!isTypeLegal(RetTy, VT))
3632  return false;
3633 
3634  if (VT != MVT::i32 && VT != MVT::i64)
3635  return false;
3636 
3637  const Value *LHS = II->getArgOperand(0);
3638  const Value *RHS = II->getArgOperand(1);
3639  // Canonicalize immediate to the RHS.
3640  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3641  std::swap(LHS, RHS);
3642 
3643  // Simplify multiplies.
3644  Intrinsic::ID IID = II->getIntrinsicID();
3645  switch (IID) {
3646  default:
3647  break;
3648  case Intrinsic::smul_with_overflow:
3649  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3650  if (C->getValue() == 2) {
3651  IID = Intrinsic::sadd_with_overflow;
3652  RHS = LHS;
3653  }
3654  break;
3655  case Intrinsic::umul_with_overflow:
3656  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3657  if (C->getValue() == 2) {
3658  IID = Intrinsic::uadd_with_overflow;
3659  RHS = LHS;
3660  }
3661  break;
3662  }
3663 
3664  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3666  switch (IID) {
3667  default: llvm_unreachable("Unexpected intrinsic!");
3668  case Intrinsic::sadd_with_overflow:
3669  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3670  CC = AArch64CC::VS;
3671  break;
3672  case Intrinsic::uadd_with_overflow:
3673  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3674  CC = AArch64CC::HS;
3675  break;
3676  case Intrinsic::ssub_with_overflow:
3677  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3678  CC = AArch64CC::VS;
3679  break;
3680  case Intrinsic::usub_with_overflow:
3681  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3682  CC = AArch64CC::LO;
3683  break;
3684  case Intrinsic::smul_with_overflow: {
3685  CC = AArch64CC::NE;
3686  Register LHSReg = getRegForValue(LHS);
3687  if (!LHSReg)
3688  return false;
3689 
3690  Register RHSReg = getRegForValue(RHS);
3691  if (!RHSReg)
3692  return false;
3693 
3694  if (VT == MVT::i32) {
3695  MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3696  Register MulSubReg =
3697  fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3698  // cmp xreg, wreg, sxtw
3699  emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3700  AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3701  /*WantResult=*/false);
3702  MulReg = MulSubReg;
3703  } else {
3704  assert(VT == MVT::i64 && "Unexpected value type.");
3705  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3706  // reused in the next instruction.
3707  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3708  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3709  emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3710  /*WantResult=*/false);
3711  }
3712  break;
3713  }
3714  case Intrinsic::umul_with_overflow: {
3715  CC = AArch64CC::NE;
3716  Register LHSReg = getRegForValue(LHS);
3717  if (!LHSReg)
3718  return false;
3719 
3720  Register RHSReg = getRegForValue(RHS);
3721  if (!RHSReg)
3722  return false;
3723 
3724  if (VT == MVT::i32) {
3725  MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3726  // tst xreg, #0xffffffff00000000
3727  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3728  TII.get(AArch64::ANDSXri), AArch64::XZR)
3729  .addReg(MulReg)
3730  .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3731  MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3732  } else {
3733  assert(VT == MVT::i64 && "Unexpected value type.");
3734  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3735  // reused in the next instruction.
3736  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3737  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3738  emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3739  }
3740  break;
3741  }
3742  }
3743 
3744  if (MulReg) {
3745  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3746  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3747  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3748  }
3749 
3750  if (!ResultReg1)
3751  return false;
3752 
3753  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3754  AArch64::WZR, AArch64::WZR,
3756  (void)ResultReg2;
3757  assert((ResultReg1 + 1) == ResultReg2 &&
3758  "Nonconsecutive result registers.");
3759  updateValueMap(II, ResultReg1, 2);
3760  return true;
3761  }
3762  }
3763  return false;
3764 }
3765 
3766 bool AArch64FastISel::selectRet(const Instruction *I) {
3767  const ReturnInst *Ret = cast<ReturnInst>(I);
3768  const Function &F = *I->getParent()->getParent();
3769 
3770  if (!FuncInfo.CanLowerReturn)
3771  return false;
3772 
3773  if (F.isVarArg())
3774  return false;
3775 
3776  if (TLI.supportSwiftError() &&
3777  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3778  return false;
3779 
3780  if (TLI.supportSplitCSR(FuncInfo.MF))
3781  return false;
3782 
3783  // Build a list of return value registers.
3784  SmallVector<unsigned, 4> RetRegs;
3785 
3786  if (Ret->getNumOperands() > 0) {
3787  CallingConv::ID CC = F.getCallingConv();
3789  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3790 
3791  // Analyze operands of the call, assigning locations to each operand.
3793  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3796  CCInfo.AnalyzeReturn(Outs, RetCC);
3797 
3798  // Only handle a single return value for now.
3799  if (ValLocs.size() != 1)
3800  return false;
3801 
3802  CCValAssign &VA = ValLocs[0];
3803  const Value *RV = Ret->getOperand(0);
3804 
3805  // Don't bother handling odd stuff for now.
3806  if ((VA.getLocInfo() != CCValAssign::Full) &&
3807  (VA.getLocInfo() != CCValAssign::BCvt))
3808  return false;
3809 
3810  // Only handle register returns for now.
3811  if (!VA.isRegLoc())
3812  return false;
3813 
3814  Register Reg = getRegForValue(RV);
3815  if (Reg == 0)
3816  return false;
3817 
3818  unsigned SrcReg = Reg + VA.getValNo();
3819  Register DestReg = VA.getLocReg();
3820  // Avoid a cross-class copy. This is very unlikely.
3821  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3822  return false;
3823 
3824  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3825  if (!RVEVT.isSimple())
3826  return false;
3827 
3828  // Vectors (of > 1 lane) in big endian need tricky handling.
3829  if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3830  !Subtarget->isLittleEndian())
3831  return false;
3832 
3833  MVT RVVT = RVEVT.getSimpleVT();
3834  if (RVVT == MVT::f128)
3835  return false;
3836 
3837  MVT DestVT = VA.getValVT();
3838  // Special handling for extended integers.
3839  if (RVVT != DestVT) {
3840  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3841  return false;
3842 
3843  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3844  return false;
3845 
3846  bool IsZExt = Outs[0].Flags.isZExt();
3847  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3848  if (SrcReg == 0)
3849  return false;
3850  }
3851 
3852  // "Callee" (i.e. value producer) zero extends pointers at function
3853  // boundary.
3854  if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3855  SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3856 
3857  // Make the copy.
3858  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3859  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3860 
3861  // Add register to return instruction.
3862  RetRegs.push_back(VA.getLocReg());
3863  }
3864 
3865  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3866  TII.get(AArch64::RET_ReallyLR));
3867  for (unsigned RetReg : RetRegs)
3868  MIB.addReg(RetReg, RegState::Implicit);
3869  return true;
3870 }
3871 
3872 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3873  Type *DestTy = I->getType();
3874  Value *Op = I->getOperand(0);
3875  Type *SrcTy = Op->getType();
3876 
3877  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3878  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3879  if (!SrcEVT.isSimple())
3880  return false;
3881  if (!DestEVT.isSimple())
3882  return false;
3883 
3884  MVT SrcVT = SrcEVT.getSimpleVT();
3885  MVT DestVT = DestEVT.getSimpleVT();
3886 
3887  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3888  SrcVT != MVT::i8)
3889  return false;
3890  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3891  DestVT != MVT::i1)
3892  return false;
3893 
3894  Register SrcReg = getRegForValue(Op);
3895  if (!SrcReg)
3896  return false;
3897 
3898  // If we're truncating from i64 to a smaller non-legal type then generate an
3899  // AND. Otherwise, we know the high bits are undefined and a truncate only
3900  // generate a COPY. We cannot mark the source register also as result
3901  // register, because this can incorrectly transfer the kill flag onto the
3902  // source register.
3903  unsigned ResultReg;
3904  if (SrcVT == MVT::i64) {
3905  uint64_t Mask = 0;
3906  switch (DestVT.SimpleTy) {
3907  default:
3908  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3909  return false;
3910  case MVT::i1:
3911  Mask = 0x1;
3912  break;
3913  case MVT::i8:
3914  Mask = 0xff;
3915  break;
3916  case MVT::i16:
3917  Mask = 0xffff;
3918  break;
3919  }
3920  // Issue an extract_subreg to get the lower 32-bits.
3921  Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3922  AArch64::sub_32);
3923  // Create the AND instruction which performs the actual truncation.
3924  ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3925  assert(ResultReg && "Unexpected AND instruction emission failure.");
3926  } else {
3927  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3928  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3929  TII.get(TargetOpcode::COPY), ResultReg)
3930  .addReg(SrcReg);
3931  }
3932 
3933  updateValueMap(I, ResultReg);
3934  return true;
3935 }
3936 
3937 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3938  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3939  DestVT == MVT::i64) &&
3940  "Unexpected value type.");
3941  // Handle i8 and i16 as i32.
3942  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3943  DestVT = MVT::i32;
3944 
3945  if (IsZExt) {
3946  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3947  assert(ResultReg && "Unexpected AND instruction emission failure.");
3948  if (DestVT == MVT::i64) {
3949  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3950  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3951  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3952  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3953  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3954  .addImm(0)
3955  .addReg(ResultReg)
3956  .addImm(AArch64::sub_32);
3957  ResultReg = Reg64;
3958  }
3959  return ResultReg;
3960  } else {
3961  if (DestVT == MVT::i64) {
3962  // FIXME: We're SExt i1 to i64.
3963  return 0;
3964  }
3965  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3966  0, 0);
3967  }
3968 }
3969 
3970 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3971  unsigned Opc, ZReg;
3972  switch (RetVT.SimpleTy) {
3973  default: return 0;
3974  case MVT::i8:
3975  case MVT::i16:
3976  case MVT::i32:
3977  RetVT = MVT::i32;
3978  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3979  case MVT::i64:
3980  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3981  }
3982 
3983  const TargetRegisterClass *RC =
3984  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3985  return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3986 }
3987 
3988 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3989  if (RetVT != MVT::i64)
3990  return 0;
3991 
3992  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3993  Op0, Op1, AArch64::XZR);
3994 }
3995 
3996 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3997  if (RetVT != MVT::i64)
3998  return 0;
3999 
4000  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4001  Op0, Op1, AArch64::XZR);
4002 }
4003 
4004 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4005  unsigned Op1Reg) {
4006  unsigned Opc = 0;
4007  bool NeedTrunc = false;
4008  uint64_t Mask = 0;
4009  switch (RetVT.SimpleTy) {
4010  default: return 0;
4011  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4012  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4013  case MVT::i32: Opc = AArch64::LSLVWr; break;
4014  case MVT::i64: Opc = AArch64::LSLVXr; break;
4015  }
4016 
4017  const TargetRegisterClass *RC =
4018  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4019  if (NeedTrunc)
4020  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4021 
4022  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4023  if (NeedTrunc)
4024  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4025  return ResultReg;
4026 }
4027 
4028 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4029  uint64_t Shift, bool IsZExt) {
4030  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4031  "Unexpected source/return type pair.");
4032  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4033  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4034  "Unexpected source value type.");
4035  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4036  RetVT == MVT::i64) && "Unexpected return value type.");
4037 
4038  bool Is64Bit = (RetVT == MVT::i64);
4039  unsigned RegSize = Is64Bit ? 64 : 32;
4040  unsigned DstBits = RetVT.getSizeInBits();
4041  unsigned SrcBits = SrcVT.getSizeInBits();
4042  const TargetRegisterClass *RC =
4043  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4044 
4045  // Just emit a copy for "zero" shifts.
4046  if (Shift == 0) {
4047  if (RetVT == SrcVT) {
4048  Register ResultReg = createResultReg(RC);
4049  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4050  TII.get(TargetOpcode::COPY), ResultReg)
4051  .addReg(Op0);
4052  return ResultReg;
4053  } else
4054  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4055  }
4056 
4057  // Don't deal with undefined shifts.
4058  if (Shift >= DstBits)
4059  return 0;
4060 
4061  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4062  // {S|U}BFM Wd, Wn, #r, #s
4063  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4064 
4065  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4066  // %2 = shl i16 %1, 4
4067  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4068  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4069  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4070  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4071 
4072  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4073  // %2 = shl i16 %1, 8
4074  // Wd<32+7-24,32-24> = Wn<7:0>
4075  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4076  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4077  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4078 
4079  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4080  // %2 = shl i16 %1, 12
4081  // Wd<32+3-20,32-20> = Wn<3:0>
4082  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4083  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4084  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4085 
4086  unsigned ImmR = RegSize - Shift;
4087  // Limit the width to the length of the source type.
4088  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4089  static const unsigned OpcTable[2][2] = {
4090  {AArch64::SBFMWri, AArch64::SBFMXri},
4091  {AArch64::UBFMWri, AArch64::UBFMXri}
4092  };
4093  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4094  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4095  Register TmpReg = MRI.createVirtualRegister(RC);
4096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4097  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4098  .addImm(0)
4099  .addReg(Op0)
4100  .addImm(AArch64::sub_32);
4101  Op0 = TmpReg;
4102  }
4103  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4104 }
4105 
4106 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4107  unsigned Op1Reg) {
4108  unsigned Opc = 0;
4109  bool NeedTrunc = false;
4110  uint64_t Mask = 0;
4111  switch (RetVT.SimpleTy) {
4112  default: return 0;
4113  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4114  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4115  case MVT::i32: Opc = AArch64::LSRVWr; break;
4116  case MVT::i64: Opc = AArch64::LSRVXr; break;
4117  }
4118 
4119  const TargetRegisterClass *RC =
4120  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121  if (NeedTrunc) {
4122  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4123  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4124  }
4125  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4126  if (NeedTrunc)
4127  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4128  return ResultReg;
4129 }
4130 
4131 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4132  uint64_t Shift, bool IsZExt) {
4133  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4134  "Unexpected source/return type pair.");
4135  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4136  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4137  "Unexpected source value type.");
4138  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4139  RetVT == MVT::i64) && "Unexpected return value type.");
4140 
4141  bool Is64Bit = (RetVT == MVT::i64);
4142  unsigned RegSize = Is64Bit ? 64 : 32;
4143  unsigned DstBits = RetVT.getSizeInBits();
4144  unsigned SrcBits = SrcVT.getSizeInBits();
4145  const TargetRegisterClass *RC =
4146  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4147 
4148  // Just emit a copy for "zero" shifts.
4149  if (Shift == 0) {
4150  if (RetVT == SrcVT) {
4151  Register ResultReg = createResultReg(RC);
4152  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4153  TII.get(TargetOpcode::COPY), ResultReg)
4154  .addReg(Op0);
4155  return ResultReg;
4156  } else
4157  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4158  }
4159 
4160  // Don't deal with undefined shifts.
4161  if (Shift >= DstBits)
4162  return 0;
4163 
4164  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4165  // {S|U}BFM Wd, Wn, #r, #s
4166  // Wd<s-r:0> = Wn<s:r> when r <= s
4167 
4168  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4169  // %2 = lshr i16 %1, 4
4170  // Wd<7-4:0> = Wn<7:4>
4171  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4172  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4173  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4174 
4175  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4176  // %2 = lshr i16 %1, 8
4177  // Wd<7-7,0> = Wn<7:7>
4178  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4179  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4180  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4181 
4182  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4183  // %2 = lshr i16 %1, 12
4184  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4185  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4186  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4187  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4188 
4189  if (Shift >= SrcBits && IsZExt)
4190  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4191 
4192  // It is not possible to fold a sign-extend into the LShr instruction. In this
4193  // case emit a sign-extend.
4194  if (!IsZExt) {
4195  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4196  if (!Op0)
4197  return 0;
4198  SrcVT = RetVT;
4199  SrcBits = SrcVT.getSizeInBits();
4200  IsZExt = true;
4201  }
4202 
4203  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4204  unsigned ImmS = SrcBits - 1;
4205  static const unsigned OpcTable[2][2] = {
4206  {AArch64::SBFMWri, AArch64::SBFMXri},
4207  {AArch64::UBFMWri, AArch64::UBFMXri}
4208  };
4209  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4210  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4211  Register TmpReg = MRI.createVirtualRegister(RC);
4212  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4213  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4214  .addImm(0)
4215  .addReg(Op0)
4216  .addImm(AArch64::sub_32);
4217  Op0 = TmpReg;
4218  }
4219  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4220 }
4221 
4222 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4223  unsigned Op1Reg) {
4224  unsigned Opc = 0;
4225  bool NeedTrunc = false;
4226  uint64_t Mask = 0;
4227  switch (RetVT.SimpleTy) {
4228  default: return 0;
4229  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4230  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4231  case MVT::i32: Opc = AArch64::ASRVWr; break;
4232  case MVT::i64: Opc = AArch64::ASRVXr; break;
4233  }
4234 
4235  const TargetRegisterClass *RC =
4236  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4237  if (NeedTrunc) {
4238  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4239  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4240  }
4241  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4242  if (NeedTrunc)
4243  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4244  return ResultReg;
4245 }
4246 
4247 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4248  uint64_t Shift, bool IsZExt) {
4249  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4250  "Unexpected source/return type pair.");
4251  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4252  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4253  "Unexpected source value type.");
4254  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4255  RetVT == MVT::i64) && "Unexpected return value type.");
4256 
4257  bool Is64Bit = (RetVT == MVT::i64);
4258  unsigned RegSize = Is64Bit ? 64 : 32;
4259  unsigned DstBits = RetVT.getSizeInBits();
4260  unsigned SrcBits = SrcVT.getSizeInBits();
4261  const TargetRegisterClass *RC =
4262  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4263 
4264  // Just emit a copy for "zero" shifts.
4265  if (Shift == 0) {
4266  if (RetVT == SrcVT) {
4267  Register ResultReg = createResultReg(RC);
4268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4269  TII.get(TargetOpcode::COPY), ResultReg)
4270  .addReg(Op0);
4271  return ResultReg;
4272  } else
4273  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4274  }
4275 
4276  // Don't deal with undefined shifts.
4277  if (Shift >= DstBits)
4278  return 0;
4279 
4280  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4281  // {S|U}BFM Wd, Wn, #r, #s
4282  // Wd<s-r:0> = Wn<s:r> when r <= s
4283 
4284  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4285  // %2 = ashr i16 %1, 4
4286  // Wd<7-4:0> = Wn<7:4>
4287  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4288  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4289  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4290 
4291  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4292  // %2 = ashr i16 %1, 8
4293  // Wd<7-7,0> = Wn<7:7>
4294  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4295  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4296  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4297 
4298  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4299  // %2 = ashr i16 %1, 12
4300  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4301  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4302  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4303  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4304 
4305  if (Shift >= SrcBits && IsZExt)
4306  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4307 
4308  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4309  unsigned ImmS = SrcBits - 1;
4310  static const unsigned OpcTable[2][2] = {
4311  {AArch64::SBFMWri, AArch64::SBFMXri},
4312  {AArch64::UBFMWri, AArch64::UBFMXri}
4313  };
4314  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4315  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4316  Register TmpReg = MRI.createVirtualRegister(RC);
4317  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4318  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4319  .addImm(0)
4320  .addReg(Op0)
4321  .addImm(AArch64::sub_32);
4322  Op0 = TmpReg;
4323  }
4324  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4325 }
4326 
4327 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4328  bool IsZExt) {
4329  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4330 
4331  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4332  // DestVT are odd things, so test to make sure that they are both types we can
4333  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4334  // bail out to SelectionDAG.
4335  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4336  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4337  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4338  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4339  return 0;
4340 
4341  unsigned Opc;
4342  unsigned Imm = 0;
4343 
4344  switch (SrcVT.SimpleTy) {
4345  default:
4346  return 0;
4347  case MVT::i1:
4348  return emiti1Ext(SrcReg, DestVT, IsZExt);
4349  case MVT::i8:
4350  if (DestVT == MVT::i64)
4351  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4352  else
4353  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4354  Imm = 7;
4355  break;
4356  case MVT::i16:
4357  if (DestVT == MVT::i64)
4358  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4359  else
4360  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4361  Imm = 15;
4362  break;
4363  case MVT::i32:
4364  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4365  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4366  Imm = 31;
4367  break;
4368  }
4369 
4370  // Handle i8 and i16 as i32.
4371  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4372  DestVT = MVT::i32;
4373  else if (DestVT == MVT::i64) {
4374  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4375  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4376  TII.get(AArch64::SUBREG_TO_REG), Src64)
4377  .addImm(0)
4378  .addReg(SrcReg)
4379  .addImm(AArch64::sub_32);
4380  SrcReg = Src64;
4381  }
4382 
4383  const TargetRegisterClass *RC =
4384  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4385  return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4386 }
4387 
4388 static bool isZExtLoad(const MachineInstr *LI) {
4389  switch (LI->getOpcode()) {
4390  default:
4391  return false;
4392  case AArch64::LDURBBi:
4393  case AArch64::LDURHHi:
4394  case AArch64::LDURWi:
4395  case AArch64::LDRBBui:
4396  case AArch64::LDRHHui:
4397  case AArch64::LDRWui:
4398  case AArch64::LDRBBroX:
4399  case AArch64::LDRHHroX:
4400  case AArch64::LDRWroX:
4401  case AArch64::LDRBBroW:
4402  case AArch64::LDRHHroW:
4403  case AArch64::LDRWroW:
4404  return true;
4405  }
4406 }
4407 
4408 static bool isSExtLoad(const MachineInstr *LI) {
4409  switch (LI->getOpcode()) {
4410  default:
4411  return false;
4412  case AArch64::LDURSBWi:
4413  case AArch64::LDURSHWi:
4414  case AArch64::LDURSBXi:
4415  case AArch64::LDURSHXi:
4416  case AArch64::LDURSWi:
4417  case AArch64::LDRSBWui:
4418  case AArch64::LDRSHWui:
4419  case AArch64::LDRSBXui:
4420  case AArch64::LDRSHXui:
4421  case AArch64::LDRSWui:
4422  case AArch64::LDRSBWroX:
4423  case AArch64::LDRSHWroX:
4424  case AArch64::LDRSBXroX:
4425  case AArch64::LDRSHXroX:
4426  case AArch64::LDRSWroX:
4427  case AArch64::LDRSBWroW:
4428  case AArch64::LDRSHWroW:
4429  case AArch64::LDRSBXroW:
4430  case AArch64::LDRSHXroW:
4431  case AArch64::LDRSWroW:
4432  return true;
4433  }
4434 }
4435 
4436 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4437  MVT SrcVT) {
4438  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4439  if (!LI || !LI->hasOneUse())
4440  return false;
4441 
4442  // Check if the load instruction has already been selected.
4443  Register Reg = lookUpRegForValue(LI);
4444  if (!Reg)
4445  return false;
4446 
4448  if (!MI)
4449  return false;
4450 
4451  // Check if the correct load instruction has been emitted - SelectionDAG might
4452  // have emitted a zero-extending load, but we need a sign-extending load.
4453  bool IsZExt = isa<ZExtInst>(I);
4454  const auto *LoadMI = MI;
4455  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4456  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4457  Register LoadReg = MI->getOperand(1).getReg();
4458  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4459  assert(LoadMI && "Expected valid instruction");
4460  }
4461  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4462  return false;
4463 
4464  // Nothing to be done.
4465  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4466  updateValueMap(I, Reg);
4467  return true;
4468  }
4469 
4470  if (IsZExt) {
4471  Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4472  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4473  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4474  .addImm(0)
4475  .addReg(Reg, getKillRegState(true))
4476  .addImm(AArch64::sub_32);
4477  Reg = Reg64;
4478  } else {
4479  assert((MI->getOpcode() == TargetOpcode::COPY &&
4480  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4481  "Expected copy instruction");
4482  Reg = MI->getOperand(1).getReg();
4484  removeDeadCode(I, std::next(I));
4485  }
4486  updateValueMap(I, Reg);
4487  return true;
4488 }
4489 
4490 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4491  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4492  "Unexpected integer extend instruction.");
4493  MVT RetVT;
4494  MVT SrcVT;
4495  if (!isTypeSupported(I->getType(), RetVT))
4496  return false;
4497 
4498  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4499  return false;
4500 
4501  // Try to optimize already sign-/zero-extended values from load instructions.
4502  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4503  return true;
4504 
4505  Register SrcReg = getRegForValue(I->getOperand(0));
4506  if (!SrcReg)
4507  return false;
4508 
4509  // Try to optimize already sign-/zero-extended values from function arguments.
4510  bool IsZExt = isa<ZExtInst>(I);
4511  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4512  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4513  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4514  Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4516  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4517  .addImm(0)
4518  .addReg(SrcReg)
4519  .addImm(AArch64::sub_32);
4520  SrcReg = ResultReg;
4521  }
4522 
4523  updateValueMap(I, SrcReg);
4524  return true;
4525  }
4526  }
4527 
4528  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4529  if (!ResultReg)
4530  return false;
4531 
4532  updateValueMap(I, ResultReg);
4533  return true;
4534 }
4535 
4536 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4537  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4538  if (!DestEVT.isSimple())
4539  return false;
4540 
4541  MVT DestVT = DestEVT.getSimpleVT();
4542  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4543  return false;
4544 
4545  unsigned DivOpc;
4546  bool Is64bit = (DestVT == MVT::i64);
4547  switch (ISDOpcode) {
4548  default:
4549  return false;
4550  case ISD::SREM:
4551  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4552  break;
4553  case ISD::UREM:
4554  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4555  break;
4556  }
4557  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4558  Register Src0Reg = getRegForValue(I->getOperand(0));
4559  if (!Src0Reg)
4560  return false;
4561 
4562  Register Src1Reg = getRegForValue(I->getOperand(1));
4563  if (!Src1Reg)
4564  return false;
4565 
4566  const TargetRegisterClass *RC =
4567  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4568  Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4569  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4570  // The remainder is computed as numerator - (quotient * denominator) using the
4571  // MSUB instruction.
4572  Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4573  updateValueMap(I, ResultReg);
4574  return true;
4575 }
4576 
4577 bool AArch64FastISel::selectMul(const Instruction *I) {
4578  MVT VT;
4579  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4580  return false;
4581 
4582  if (VT.isVector())
4583  return selectBinaryOp(I, ISD::MUL);
4584 
4585  const Value *Src0 = I->getOperand(0);
4586  const Value *Src1 = I->getOperand(1);
4587  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4588  if (C->getValue().isPowerOf2())
4589  std::swap(Src0, Src1);
4590 
4591  // Try to simplify to a shift instruction.
4592  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4593  if (C->getValue().isPowerOf2()) {
4594  uint64_t ShiftVal = C->getValue().logBase2();
4595  MVT SrcVT = VT;
4596  bool IsZExt = true;
4597  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4598  if (!isIntExtFree(ZExt)) {
4599  MVT VT;
4600  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4601  SrcVT = VT;
4602  IsZExt = true;
4603  Src0 = ZExt->getOperand(0);
4604  }
4605  }
4606  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4607  if (!isIntExtFree(SExt)) {
4608  MVT VT;
4609  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4610  SrcVT = VT;
4611  IsZExt = false;
4612  Src0 = SExt->getOperand(0);
4613  }
4614  }
4615  }
4616 
4617  Register Src0Reg = getRegForValue(Src0);
4618  if (!Src0Reg)
4619  return false;
4620 
4621  unsigned ResultReg =
4622  emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4623 
4624  if (ResultReg) {
4625  updateValueMap(I, ResultReg);
4626  return true;
4627  }
4628  }
4629 
4630  Register Src0Reg = getRegForValue(I->getOperand(0));
4631  if (!Src0Reg)
4632  return false;
4633 
4634  Register Src1Reg = getRegForValue(I->getOperand(1));
4635  if (!Src1Reg)
4636  return false;
4637 
4638  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4639 
4640  if (!ResultReg)
4641  return false;
4642 
4643  updateValueMap(I, ResultReg);
4644  return true;
4645 }
4646 
4647 bool AArch64FastISel::selectShift(const Instruction *I) {
4648  MVT RetVT;
4649  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4650  return false;
4651 
4652  if (RetVT.isVector())
4653  return selectOperator(I, I->getOpcode());
4654 
4655  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4656  unsigned ResultReg = 0;
4657  uint64_t ShiftVal = C->getZExtValue();
4658  MVT SrcVT = RetVT;
4659  bool IsZExt = I->getOpcode() != Instruction::AShr;
4660  const Value *Op0 = I->getOperand(0);
4661  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4662  if (!isIntExtFree(ZExt)) {
4663  MVT TmpVT;
4664  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4665  SrcVT = TmpVT;
4666  IsZExt = true;
4667  Op0 = ZExt->getOperand(0);
4668  }
4669  }
4670  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4671  if (!isIntExtFree(SExt)) {
4672  MVT TmpVT;
4673  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4674  SrcVT = TmpVT;
4675  IsZExt = false;
4676  Op0 = SExt->getOperand(0);
4677  }
4678  }
4679  }
4680 
4681  Register Op0Reg = getRegForValue(Op0);
4682  if (!Op0Reg)
4683  return false;
4684 
4685  switch (I->getOpcode()) {
4686  default: llvm_unreachable("Unexpected instruction.");
4687  case Instruction::Shl:
4688  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4689  break;
4690  case Instruction::AShr:
4691  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4692  break;
4693  case Instruction::LShr:
4694  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4695  break;
4696  }
4697  if (!ResultReg)
4698  return false;
4699 
4700  updateValueMap(I, ResultReg);
4701  return true;
4702  }
4703 
4704  Register Op0Reg = getRegForValue(I->getOperand(0));
4705  if (!Op0Reg)
4706  return false;
4707 
4708  Register Op1Reg = getRegForValue(I->getOperand(1));
4709  if (!Op1Reg)
4710  return false;
4711 
4712  unsigned ResultReg = 0;
4713  switch (I->getOpcode()) {
4714  default: llvm_unreachable("Unexpected instruction.");
4715  case Instruction::Shl:
4716  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4717  break;
4718  case Instruction::AShr:
4719  ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4720  break;
4721  case Instruction::LShr:
4722  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4723  break;
4724  }
4725 
4726  if (!ResultReg)
4727  return false;
4728 
4729  updateValueMap(I, ResultReg);
4730  return true;
4731 }
4732 
4733 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4734  MVT RetVT, SrcVT;
4735 
4736  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4737  return false;
4738  if (!isTypeLegal(I->getType(), RetVT))
4739  return false;
4740 
4741  unsigned Opc;
4742  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4743  Opc = AArch64::FMOVWSr;
4744  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4745  Opc = AArch64::FMOVXDr;
4746  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4747  Opc = AArch64::FMOVSWr;
4748  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4749  Opc = AArch64::FMOVDXr;
4750  else
4751  return false;
4752 
4753  const TargetRegisterClass *RC = nullptr;
4754  switch (RetVT.SimpleTy) {
4755  default: llvm_unreachable("Unexpected value type.");
4756  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4757  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4758  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4759  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4760  }
4761  Register Op0Reg = getRegForValue(I->getOperand(0));
4762  if (!Op0Reg)
4763  return false;
4764 
4765  Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4766  if (!ResultReg)
4767  return false;
4768 
4769  updateValueMap(I, ResultReg);
4770  return true;
4771 }
4772 
4773 bool AArch64FastISel::selectFRem(const Instruction *I) {
4774  MVT RetVT;
4775  if (!isTypeLegal(I->getType(), RetVT))
4776  return false;
4777 
4778  RTLIB::Libcall LC;
4779  switch (RetVT.SimpleTy) {
4780  default:
4781  return false;
4782  case MVT::f32:
4783  LC = RTLIB::REM_F32;
4784  break;
4785  case MVT::f64:
4786  LC = RTLIB::REM_F64;
4787  break;
4788  }
4789 
4790  ArgListTy Args;
4791  Args.reserve(I->getNumOperands());
4792 
4793  // Populate the argument list.
4794  for (auto &Arg : I->operands()) {
4795  ArgListEntry Entry;
4796  Entry.Val = Arg;
4797  Entry.Ty = Arg->getType();
4798  Args.push_back(Entry);
4799  }
4800 
4801  CallLoweringInfo CLI;
4802  MCContext &Ctx = MF->getContext();
4803  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4804  TLI.getLibcallName(LC), std::move(Args));
4805  if (!lowerCallTo(CLI))
4806  return false;
4807  updateValueMap(I, CLI.ResultReg);
4808  return true;
4809 }
4810 
4811 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4812  MVT VT;
4813  if (!isTypeLegal(I->getType(), VT))
4814  return false;
4815 
4816  if (!isa<ConstantInt>(I->getOperand(1)))
4817  return selectBinaryOp(I, ISD::SDIV);
4818 
4819  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4820  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4821  !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4822  return selectBinaryOp(I, ISD::SDIV);
4823 
4824  unsigned Lg2 = C.countTrailingZeros();
4825  Register Src0Reg = getRegForValue(I->getOperand(0));
4826  if (!Src0Reg)
4827  return false;
4828 
4829  if (cast<BinaryOperator>(I)->isExact()) {
4830  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4831  if (!ResultReg)
4832  return false;
4833  updateValueMap(I, ResultReg);
4834  return true;
4835  }
4836 
4837  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4838  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4839  if (!AddReg)
4840  return false;
4841 
4842  // (Src0 < 0) ? Pow2 - 1 : 0;
4843  if (!emitICmp_ri(VT, Src0Reg, 0))
4844  return false;
4845 
4846  unsigned SelectOpc;
4847  const TargetRegisterClass *RC;
4848  if (VT == MVT::i64) {
4849  SelectOpc = AArch64::CSELXr;
4850  RC = &AArch64::GPR64RegClass;
4851  } else {
4852  SelectOpc = AArch64::CSELWr;
4853  RC = &AArch64::GPR32RegClass;
4854  }
4855  Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4856  AArch64CC::LT);
4857  if (!SelectReg)
4858  return false;
4859 
4860  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4861  // negate the result.
4862  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4863  unsigned ResultReg;
4864  if (C.isNegative())
4865  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4866  AArch64_AM::ASR, Lg2);
4867  else
4868  ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4869 
4870  if (!ResultReg)
4871  return false;
4872 
4873  updateValueMap(I, ResultReg);
4874  return true;
4875 }
4876 
4877 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4878 /// have to duplicate it for AArch64, because otherwise we would fail during the
4879 /// sign-extend emission.
4880 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4881  Register IdxN = getRegForValue(Idx);
4882  if (IdxN == 0)
4883  // Unhandled operand. Halt "fast" selection and bail.
4884  return 0;
4885 
4886  // If the index is smaller or larger than intptr_t, truncate or extend it.
4887  MVT PtrVT = TLI.getPointerTy(DL);
4888  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4889  if (IdxVT.bitsLT(PtrVT)) {
4890  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4891  } else if (IdxVT.bitsGT(PtrVT))
4892  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4893  return IdxN;
4894 }
4895 
4896 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4897 /// duplicate it for AArch64, because otherwise we would bail out even for
4898 /// simple cases. This is because the standard fastEmit functions don't cover
4899 /// MUL at all and ADD is lowered very inefficientily.
4900 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4901  if (Subtarget->isTargetILP32())
4902  return false;
4903 
4904  Register N = getRegForValue(I->getOperand(0));
4905  if (!N)
4906  return false;
4907 
4908  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4909  // into a single N = N + TotalOffset.
4910  uint64_t TotalOffs = 0;
4911  MVT VT = TLI.getPointerTy(DL);
4913  GTI != E; ++GTI) {
4914  const Value *Idx = GTI.getOperand();
4915  if (auto *StTy = GTI.getStructTypeOrNull()) {
4916  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4917  // N = N + Offset
4918  if (Field)
4919  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4920  } else {
4921  Type *Ty = GTI.getIndexedType();
4922 
4923  // If this is a constant subscript, handle it quickly.
4924  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4925  if (CI->isZero())
4926  continue;
4927  // N = N + Offset
4928  TotalOffs +=
4929  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4930  continue;
4931  }
4932  if (TotalOffs) {
4933  N = emitAdd_ri_(VT, N, TotalOffs);
4934  if (!N)
4935  return false;
4936  TotalOffs = 0;
4937  }
4938 
4939  // N = N + Idx * ElementSize;
4940  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4941  unsigned IdxN = getRegForGEPIndex(Idx);
4942  if (!IdxN)
4943  return false;
4944 
4945  if (ElementSize != 1) {
4946  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4947  if (!C)
4948  return false;
4949  IdxN = emitMul_rr(VT, IdxN, C);
4950  if (!IdxN)
4951  return false;
4952  }
4953  N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4954  if (!N)
4955  return false;
4956  }
4957  }
4958  if (TotalOffs) {
4959  N = emitAdd_ri_(VT, N, TotalOffs);
4960  if (!N)
4961  return false;
4962  }
4963  updateValueMap(I, N);
4964  return true;
4965 }
4966 
4967 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4968  assert(TM.getOptLevel() == CodeGenOpt::None &&
4969  "cmpxchg survived AtomicExpand at optlevel > -O0");
4970 
4971  auto *RetPairTy = cast<StructType>(I->getType());
4972  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4973  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4974  "cmpxchg has a non-i1 status result");
4975 
4976  MVT VT;
4977  if (!isTypeLegal(RetTy, VT))
4978  return false;
4979 
4980  const TargetRegisterClass *ResRC;
4981  unsigned Opc, CmpOpc;
4982  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4983  // extractvalue selection doesn't support that.
4984  if (VT == MVT::i32) {
4985  Opc = AArch64::CMP_SWAP_32;
4986  CmpOpc = AArch64::SUBSWrs;
4987  ResRC = &AArch64::GPR32RegClass;
4988  } else if (VT == MVT::i64) {
4989  Opc = AArch64::CMP_SWAP_64;
4990  CmpOpc = AArch64::SUBSXrs;
4991  ResRC = &AArch64::GPR64RegClass;
4992  } else {
4993  return false;
4994  }
4995 
4996  const MCInstrDesc &II = TII.get(Opc);
4997 
4998  const Register AddrReg = constrainOperandRegClass(
4999  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5000  const Register DesiredReg = constrainOperandRegClass(
5001  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5002  const Register NewReg = constrainOperandRegClass(
5003  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5004 
5005  const Register ResultReg1 = createResultReg(ResRC);
5006  const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5007  const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5008 
5009  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5010  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5011  .addDef(ResultReg1)
5012  .addDef(ScratchReg)
5013  .addUse(AddrReg)
5014  .addUse(DesiredReg)
5015  .addUse(NewReg);
5016 
5017  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5018  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5019  .addUse(ResultReg1)
5020  .addUse(DesiredReg)
5021  .addImm(0);
5022 
5023  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5024  .addDef(ResultReg2)
5025  .addUse(AArch64::WZR)
5026  .addUse(AArch64::WZR)
5028 
5029  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5030  updateValueMap(I, ResultReg1, 2);
5031  return true;
5032 }
5033 
5034 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5035  if (TLI.fallBackToDAGISel(*I))
5036  return false;
5037  switch (I->getOpcode()) {
5038  default:
5039  break;
5040  case Instruction::Add:
5041  case Instruction::Sub:
5042  return selectAddSub(I);
5043  case Instruction::Mul:
5044  return selectMul(I);
5045  case Instruction::SDiv:
5046  return selectSDiv(I);
5047  case Instruction::SRem:
5048  if (!selectBinaryOp(I, ISD::SREM))
5049  return selectRem(I, ISD::SREM);
5050  return true;
5051  case Instruction::URem:
5052  if (!selectBinaryOp(I, ISD::UREM))
5053  return selectRem(I, ISD::UREM);
5054  return true;
5055  case Instruction::Shl:
5056  case Instruction::LShr:
5057  case Instruction::AShr:
5058  return selectShift(I);
5059  case Instruction::And:
5060  case Instruction::Or:
5061  case Instruction::Xor:
5062  return selectLogicalOp(I);
5063  case Instruction::Br:
5064  return selectBranch(I);
5065  case Instruction::IndirectBr:
5066  return selectIndirectBr(I);
5067  case Instruction::BitCast:
5068  if (!FastISel::selectBitCast(I))
5069  return selectBitCast(I);
5070  return true;
5071  case Instruction::FPToSI:
5072  if (!selectCast(I, ISD::FP_TO_SINT))
5073  return selectFPToInt(I, /*Signed=*/true);
5074  return true;
5075  case Instruction::FPToUI:
5076  return selectFPToInt(I, /*Signed=*/false);
5077  case Instruction::ZExt:
5078  case Instruction::SExt:
5079  return selectIntExt(I);
5080  case Instruction::Trunc:
5081  if (!selectCast(I, ISD::TRUNCATE))
5082  return selectTrunc(I);
5083  return true;
5084  case Instruction::FPExt:
5085  return selectFPExt(I);
5086  case Instruction::FPTrunc:
5087  return selectFPTrunc(I);
5088  case Instruction::SIToFP:
5089  if (!selectCast(I, ISD::SINT_TO_FP))
5090  return selectIntToFP(I, /*Signed=*/true);
5091  return true;
5092  case Instruction::UIToFP:
5093  return selectIntToFP(I, /*Signed=*/false);
5094  case Instruction::Load:
5095  return selectLoad(I);
5096  case Instruction::Store:
5097  return selectStore(I);
5098  case Instruction::FCmp:
5099  case Instruction::ICmp:
5100  return selectCmp(I);
5101  case Instruction::Select:
5102  return selectSelect(I);
5103  case Instruction::Ret:
5104  return selectRet(I);
5105  case Instruction::FRem:
5106  return selectFRem(I);
5107  case Instruction::GetElementPtr:
5108  return selectGetElementPtr(I);
5109  case Instruction::AtomicCmpXchg:
5110  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5111  }
5112 
5113  // fall-back to target-independent instruction selection.
5114  return selectOperator(I, I->getOpcode());
5115 }
5116 
5118  const TargetLibraryInfo *LibInfo) {
5119 
5120  SMEAttrs CallerAttrs(*FuncInfo.Fn);
5121  if (CallerAttrs.hasZAState() ||
5122  (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
5123  return nullptr;
5124  return new AArch64FastISel(FuncInfo, LibInfo);
5125 }
llvm::FunctionLoweringInfo::Fn
const Function * Fn
Definition: FunctionLoweringInfo.h:54
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:734
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::SMEAttrs::hasZAState
bool hasZAState() const
Definition: AArch64SMEAttributes.h:79
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:272
AArch64RegisterInfo.h
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:283
llvm::SMEAttrs::hasStreamingInterface
bool hasStreamingInterface() const
Definition: AArch64SMEAttributes.h:50
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4715
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1115
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
AArch64MachineFunctionInfo.h
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
AArch64.h
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3051
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
optimizeSelect
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeSelect
Definition: README.txt:81
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:322
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
MCInstrDesc.h
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259
RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:124
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
GetElementPtrTypeIterator.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:826
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:377
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:727
ErrorHandling.h
llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition: AArch64InstrInfo.cpp:8231
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::AArch64RegisterInfo::isAnyArgRegReserved
bool isAnyArgRegReserved(const MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:456
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
Registers
SI Pre allocate WWM Registers
Definition: SIPreAllocateWWMRegs.cpp:80
AArch64BaseInfo.h
MachineBasicBlock.h
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
APInt.h
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1792
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
Shift
bool Shift
Definition: README.txt:468
getCompareCC
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
Definition: