LLVM 22.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 enum BaseKind { RegBase, FrameIndexBase };
85
86 private:
87 BaseKind Kind = RegBase;
89 union {
90 unsigned Reg;
91 int FI;
92 } Base;
93 Register OffsetReg;
94 unsigned Shift = 0;
95 int64_t Offset = 0;
96 const GlobalValue *GV = nullptr;
97
98 public:
99 Address() { Base.Reg = 0; }
100
101 void setKind(BaseKind K) { Kind = K; }
102 BaseKind getKind() const { return Kind; }
103 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
104 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
105 bool isRegBase() const { return Kind == RegBase; }
106 bool isFIBase() const { return Kind == FrameIndexBase; }
107
108 void setReg(Register Reg) {
109 assert(isRegBase() && "Invalid base register access!");
110 Base.Reg = Reg.id();
111 }
112
113 Register getReg() const {
114 assert(isRegBase() && "Invalid base register access!");
115 return Base.Reg;
116 }
117
118 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
119
120 Register getOffsetReg() const { return OffsetReg; }
121
122 void setFI(unsigned FI) {
123 assert(isFIBase() && "Invalid base frame index access!");
124 Base.FI = FI;
125 }
126
127 unsigned getFI() const {
128 assert(isFIBase() && "Invalid base frame index access!");
129 return Base.FI;
130 }
131
132 void setOffset(int64_t O) { Offset = O; }
133 int64_t getOffset() { return Offset; }
134 void setShift(unsigned S) { Shift = S; }
135 unsigned getShift() { return Shift; }
136
137 void setGlobalValue(const GlobalValue *G) { GV = G; }
138 const GlobalValue *getGlobalValue() { return GV; }
139 };
140
141 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
142 /// make the right decision when generating code for different targets.
143 const AArch64Subtarget *Subtarget;
144 LLVMContext *Context;
145
146 bool fastLowerArguments() override;
147 bool fastLowerCall(CallLoweringInfo &CLI) override;
148 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
149
150private:
151 // Selection routines.
152 bool selectAddSub(const Instruction *I);
153 bool selectLogicalOp(const Instruction *I);
154 bool selectLoad(const Instruction *I);
155 bool selectStore(const Instruction *I);
156 bool selectBranch(const Instruction *I);
157 bool selectIndirectBr(const Instruction *I);
158 bool selectCmp(const Instruction *I);
159 bool selectSelect(const Instruction *I);
160 bool selectFPExt(const Instruction *I);
161 bool selectFPTrunc(const Instruction *I);
162 bool selectFPToInt(const Instruction *I, bool Signed);
163 bool selectIntToFP(const Instruction *I, bool Signed);
164 bool selectRem(const Instruction *I, unsigned ISDOpcode);
165 bool selectRet(const Instruction *I);
166 bool selectTrunc(const Instruction *I);
167 bool selectIntExt(const Instruction *I);
168 bool selectMul(const Instruction *I);
169 bool selectShift(const Instruction *I);
170 bool selectBitCast(const Instruction *I);
171 bool selectFRem(const Instruction *I);
172 bool selectSDiv(const Instruction *I);
173 bool selectGetElementPtr(const Instruction *I);
174 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
175
176 // Utility helper routines.
177 bool isTypeLegal(Type *Ty, MVT &VT);
178 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
179 bool isValueAvailable(const Value *V) const;
180 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
181 bool computeCallAddress(const Value *V, Address &Addr);
182 bool simplifyAddress(Address &Addr, MVT VT);
183 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
185 unsigned ScaleFactor, MachineMemOperand *MMO);
186 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
187 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
188 MaybeAlign Alignment);
189 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
190 const Value *Cond);
191 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
192 bool optimizeSelect(const SelectInst *SI);
193 Register getRegForGEPIndex(const Value *Idx);
194
195 // Emit helper routines.
196 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
197 const Value *RHS, bool SetFlags = false,
198 bool WantResult = true, bool IsZExt = false);
199 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
200 Register RHSReg, bool SetFlags = false,
201 bool WantResult = true);
202 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
203 bool SetFlags = false, bool WantResult = true);
204 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
205 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
206 uint64_t ShiftImm, bool SetFlags = false,
207 bool WantResult = true);
208 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
210 uint64_t ShiftImm, bool SetFlags = false,
211 bool WantResult = true);
212
213 // Emit functions.
214 bool emitCompareAndBranch(const BranchInst *BI);
215 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
216 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
218 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
219 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
220 MachineMemOperand *MMO = nullptr);
221 bool emitStore(MVT VT, Register SrcReg, Address Addr,
222 MachineMemOperand *MMO = nullptr);
223 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
224 MachineMemOperand *MMO = nullptr);
225 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
227 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
228 bool SetFlags = false, bool WantResult = true,
229 bool IsZExt = false);
230 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
231 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
232 bool SetFlags = false, bool WantResult = true,
233 bool IsZExt = false);
234 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
235 bool WantResult = true);
236 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
237 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
238 bool WantResult = true);
239 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
240 const Value *RHS);
241 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
242 uint64_t Imm);
243 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
244 Register RHSReg, uint64_t ShiftImm);
245 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
246 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
247 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
250 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
251 bool IsZExt = true);
252 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = false);
258
259 Register materializeInt(const ConstantInt *CI, MVT VT);
260 Register materializeFP(const ConstantFP *CFP, MVT VT);
261 Register materializeGV(const GlobalValue *GV);
262
263 // Call handling routines.
264private:
265 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
266 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
267 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
268 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
269
270public:
271 // Backend specific FastISel code.
272 Register fastMaterializeAlloca(const AllocaInst *AI) override;
273 Register fastMaterializeConstant(const Constant *C) override;
274 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
275
276 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
277 const TargetLibraryInfo *LibInfo)
278 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
279 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
280 Context = &FuncInfo.Fn->getContext();
281 }
282
283 bool fastSelectInstruction(const Instruction *I) override;
284
285#include "AArch64GenFastISel.inc"
286};
287
288} // end anonymous namespace
289
290/// Check if the sign-/zero-extend will be a noop.
291static bool isIntExtFree(const Instruction *I) {
293 "Unexpected integer extend instruction.");
294 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
295 "Unexpected value type.");
296 bool IsZExt = isa<ZExtInst>(I);
297
298 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
299 if (LI->hasOneUse())
300 return true;
301
302 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
303 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
304 return true;
305
306 return false;
307}
308
309/// Determine the implicit scale factor that is applied by a memory
310/// operation for a given value type.
311static unsigned getImplicitScaleFactor(MVT VT) {
312 switch (VT.SimpleTy) {
313 default:
314 return 0; // invalid
315 case MVT::i1: // fall-through
316 case MVT::i8:
317 return 1;
318 case MVT::i16:
319 return 2;
320 case MVT::i32: // fall-through
321 case MVT::f32:
322 return 4;
323 case MVT::i64: // fall-through
324 case MVT::f64:
325 return 8;
326 }
327}
328
329CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
330 if (CC == CallingConv::GHC)
331 return CC_AArch64_GHC;
332 if (CC == CallingConv::CFGuard_Check)
334 if (Subtarget->isTargetDarwin())
336 if (Subtarget->isTargetWindows())
337 return CC_AArch64_Win64PCS;
338 return CC_AArch64_AAPCS;
339}
340
341Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
342 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
343 "Alloca should always return a pointer.");
344
345 // Don't handle dynamic allocas.
346 auto SI = FuncInfo.StaticAllocaMap.find(AI);
347 if (SI == FuncInfo.StaticAllocaMap.end())
348 return Register();
349
350 if (SI != FuncInfo.StaticAllocaMap.end()) {
351 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
352 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
353 ResultReg)
354 .addFrameIndex(SI->second)
355 .addImm(0)
356 .addImm(0);
357 return ResultReg;
358 }
359
360 return Register();
361}
362
363Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
364 if (VT > MVT::i64)
365 return Register();
366
367 if (!CI->isZero())
368 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
369
370 // Create a copy from the zero register to materialize a "0" value.
371 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
372 : &AArch64::GPR32RegClass;
373 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
374 Register ResultReg = createResultReg(RC);
375 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
376 ResultReg).addReg(ZeroReg, getKillRegState(true));
377 return ResultReg;
378}
379
380Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
381 // Positive zero (+0.0) has to be materialized with a fmov from the zero
382 // register, because the immediate version of fmov cannot encode zero.
383 if (CFP->isNullValue())
384 return fastMaterializeFloatZero(CFP);
385
386 if (VT != MVT::f32 && VT != MVT::f64)
387 return Register();
388
389 const APFloat Val = CFP->getValueAPF();
390 bool Is64Bit = (VT == MVT::f64);
391 // This checks to see if we can use FMOV instructions to materialize
392 // a constant, otherwise we have to materialize via the constant pool.
393 int Imm =
394 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
395 if (Imm != -1) {
396 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
397 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
398 }
399
400 // For the large code model materialize the FP constant in code.
401 if (TM.getCodeModel() == CodeModel::Large) {
402 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
403 const TargetRegisterClass *RC = Is64Bit ?
404 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
405
406 Register TmpReg = createResultReg(RC);
407 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
408 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
409
410 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
412 TII.get(TargetOpcode::COPY), ResultReg)
413 .addReg(TmpReg, getKillRegState(true));
414
415 return ResultReg;
416 }
417
418 // Materialize via constant pool. MachineConstantPool wants an explicit
419 // alignment.
420 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
421
422 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
423 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
426
427 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
428 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
429 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
430 .addReg(ADRPReg)
432 return ResultReg;
433}
434
435Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
436 // We can't handle thread-local variables quickly yet.
437 if (GV->isThreadLocal())
438 return Register();
439
440 // MachO still uses GOT for large code-model accesses, but ELF requires
441 // movz/movk sequences, which FastISel doesn't handle yet.
442 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
443 return Register();
444
445 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
446 return Register();
447
448 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
449
450 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
451 if (!DestEVT.isSimple())
452 return Register();
453
454 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
455 Register ResultReg;
456
457 if (OpFlags & AArch64II::MO_GOT) {
458 // ADRP + LDRX
459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
460 ADRPReg)
461 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
462
463 unsigned LdrOpc;
464 if (Subtarget->isTargetILP32()) {
465 ResultReg = createResultReg(&AArch64::GPR32RegClass);
466 LdrOpc = AArch64::LDRWui;
467 } else {
468 ResultReg = createResultReg(&AArch64::GPR64RegClass);
469 LdrOpc = AArch64::LDRXui;
470 }
471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
472 ResultReg)
473 .addReg(ADRPReg)
475 AArch64II::MO_NC | OpFlags);
476 if (!Subtarget->isTargetILP32())
477 return ResultReg;
478
479 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
480 // so we must extend the result on ILP32.
481 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
482 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
483 TII.get(TargetOpcode::SUBREG_TO_REG))
484 .addDef(Result64)
485 .addImm(0)
486 .addReg(ResultReg, RegState::Kill)
487 .addImm(AArch64::sub_32);
488 return Result64;
489 } else {
490 // ADRP + ADDX
491 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
492 ADRPReg)
493 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
494
495 if (OpFlags & AArch64II::MO_TAGGED) {
496 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
497 // We do so by creating a MOVK that sets bits 48-63 of the register to
498 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
499 // the small code model so we can assume a binary size of <= 4GB, which
500 // makes the untagged PC relative offset positive. The binary must also be
501 // loaded into address range [0, 2^48). Both of these properties need to
502 // be ensured at runtime when using tagged addresses.
503 //
504 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
505 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
506 // are not exactly 1:1 with FastISel so we cannot easily abstract this
507 // out. At some point, it would be nice to find a way to not have this
508 // duplicate code.
509 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
511 DstReg)
512 .addReg(ADRPReg)
513 .addGlobalAddress(GV, /*Offset=*/0x100000000,
515 .addImm(48);
516 ADRPReg = DstReg;
517 }
518
519 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
520 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
521 ResultReg)
522 .addReg(ADRPReg)
523 .addGlobalAddress(GV, 0,
525 .addImm(0);
526 }
527 return ResultReg;
528}
529
530Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
531 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
532
533 // Only handle simple types.
534 if (!CEVT.isSimple())
535 return Register();
536 MVT VT = CEVT.getSimpleVT();
537 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
538 // 'null' pointers need to have a somewhat special treatment.
540 assert(VT == MVT::i64 && "Expected 64-bit pointers");
541 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
542 }
543
544 if (const auto *CI = dyn_cast<ConstantInt>(C))
545 return materializeInt(CI, VT);
546 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
547 return materializeFP(CFP, VT);
548 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
549 return materializeGV(GV);
550
551 return Register();
552}
553
554Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
555 assert(CFP->isNullValue() &&
556 "Floating-point constant is not a positive zero.");
557 MVT VT;
558 if (!isTypeLegal(CFP->getType(), VT))
559 return Register();
560
561 if (VT != MVT::f32 && VT != MVT::f64)
562 return Register();
563
564 bool Is64Bit = (VT == MVT::f64);
565 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
566 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
567 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
568}
569
570/// Check if the multiply is by a power-of-2 constant.
571static bool isMulPowOf2(const Value *I) {
572 if (const auto *MI = dyn_cast<MulOperator>(I)) {
573 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
574 if (C->getValue().isPowerOf2())
575 return true;
576 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
577 if (C->getValue().isPowerOf2())
578 return true;
579 }
580 return false;
581}
582
583// Computes the address to get to an object.
584bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
585{
586 const User *U = nullptr;
587 unsigned Opcode = Instruction::UserOp1;
588 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
589 // Don't walk into other basic blocks unless the object is an alloca from
590 // another block, otherwise it may not have a virtual register assigned.
591 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
592 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
593 Opcode = I->getOpcode();
594 U = I;
595 }
596 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
597 Opcode = C->getOpcode();
598 U = C;
599 }
600
601 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
602 if (Ty->getAddressSpace() > 255)
603 // Fast instruction selection doesn't support the special
604 // address spaces.
605 return false;
606
607 switch (Opcode) {
608 default:
609 break;
610 case Instruction::BitCast:
611 // Look through bitcasts.
612 return computeAddress(U->getOperand(0), Addr, Ty);
613
614 case Instruction::IntToPtr:
615 // Look past no-op inttoptrs.
616 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
617 TLI.getPointerTy(DL))
618 return computeAddress(U->getOperand(0), Addr, Ty);
619 break;
620
621 case Instruction::PtrToInt:
622 // Look past no-op ptrtoints.
623 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
624 return computeAddress(U->getOperand(0), Addr, Ty);
625 break;
626
627 case Instruction::GetElementPtr: {
628 Address SavedAddr = Addr;
629 uint64_t TmpOffset = Addr.getOffset();
630
631 // Iterate through the GEP folding the constants into offsets where
632 // we can.
634 GTI != E; ++GTI) {
635 const Value *Op = GTI.getOperand();
636 if (StructType *STy = GTI.getStructTypeOrNull()) {
637 const StructLayout *SL = DL.getStructLayout(STy);
638 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
639 TmpOffset += SL->getElementOffset(Idx);
640 } else {
641 uint64_t S = GTI.getSequentialElementStride(DL);
642 while (true) {
643 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
644 // Constant-offset addressing.
645 TmpOffset += CI->getSExtValue() * S;
646 break;
647 }
648 if (canFoldAddIntoGEP(U, Op)) {
649 // A compatible add with a constant operand. Fold the constant.
650 ConstantInt *CI =
651 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
652 TmpOffset += CI->getSExtValue() * S;
653 // Iterate on the other operand.
654 Op = cast<AddOperator>(Op)->getOperand(0);
655 continue;
656 }
657 // Unsupported
658 goto unsupported_gep;
659 }
660 }
661 }
662
663 // Try to grab the base operand now.
664 Addr.setOffset(TmpOffset);
665 if (computeAddress(U->getOperand(0), Addr, Ty))
666 return true;
667
668 // We failed, restore everything and try the other options.
669 Addr = SavedAddr;
670
671 unsupported_gep:
672 break;
673 }
674 case Instruction::Alloca: {
675 const AllocaInst *AI = cast<AllocaInst>(Obj);
676 DenseMap<const AllocaInst *, int>::iterator SI =
677 FuncInfo.StaticAllocaMap.find(AI);
678 if (SI != FuncInfo.StaticAllocaMap.end()) {
679 Addr.setKind(Address::FrameIndexBase);
680 Addr.setFI(SI->second);
681 return true;
682 }
683 break;
684 }
685 case Instruction::Add: {
686 // Adds of constants are common and easy enough.
687 const Value *LHS = U->getOperand(0);
688 const Value *RHS = U->getOperand(1);
689
691 std::swap(LHS, RHS);
692
693 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
694 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
695 return computeAddress(LHS, Addr, Ty);
696 }
697
698 Address Backup = Addr;
699 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
700 return true;
701 Addr = Backup;
702
703 break;
704 }
705 case Instruction::Sub: {
706 // Subs of constants are common and easy enough.
707 const Value *LHS = U->getOperand(0);
708 const Value *RHS = U->getOperand(1);
709
710 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
711 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
712 return computeAddress(LHS, Addr, Ty);
713 }
714 break;
715 }
716 case Instruction::Shl: {
717 if (Addr.getOffsetReg())
718 break;
719
720 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
721 if (!CI)
722 break;
723
724 unsigned Val = CI->getZExtValue();
725 if (Val < 1 || Val > 3)
726 break;
727
728 uint64_t NumBytes = 0;
729 if (Ty && Ty->isSized()) {
730 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
731 NumBytes = NumBits / 8;
732 if (!isPowerOf2_64(NumBits))
733 NumBytes = 0;
734 }
735
736 if (NumBytes != (1ULL << Val))
737 break;
738
739 Addr.setShift(Val);
740 Addr.setExtendType(AArch64_AM::LSL);
741
742 const Value *Src = U->getOperand(0);
743 if (const auto *I = dyn_cast<Instruction>(Src)) {
744 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
745 // Fold the zext or sext when it won't become a noop.
746 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
747 if (!isIntExtFree(ZE) &&
748 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
749 Addr.setExtendType(AArch64_AM::UXTW);
750 Src = ZE->getOperand(0);
751 }
752 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
753 if (!isIntExtFree(SE) &&
754 SE->getOperand(0)->getType()->isIntegerTy(32)) {
755 Addr.setExtendType(AArch64_AM::SXTW);
756 Src = SE->getOperand(0);
757 }
758 }
759 }
760 }
761
762 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
763 if (AI->getOpcode() == Instruction::And) {
764 const Value *LHS = AI->getOperand(0);
765 const Value *RHS = AI->getOperand(1);
766
767 if (const auto *C = dyn_cast<ConstantInt>(LHS))
768 if (C->getValue() == 0xffffffff)
769 std::swap(LHS, RHS);
770
771 if (const auto *C = dyn_cast<ConstantInt>(RHS))
772 if (C->getValue() == 0xffffffff) {
773 Addr.setExtendType(AArch64_AM::UXTW);
774 Register Reg = getRegForValue(LHS);
775 if (!Reg)
776 return false;
777 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
778 Addr.setOffsetReg(Reg);
779 return true;
780 }
781 }
782
783 Register Reg = getRegForValue(Src);
784 if (!Reg)
785 return false;
786 Addr.setOffsetReg(Reg);
787 return true;
788 }
789 case Instruction::Mul: {
790 if (Addr.getOffsetReg())
791 break;
792
793 if (!isMulPowOf2(U))
794 break;
795
796 const Value *LHS = U->getOperand(0);
797 const Value *RHS = U->getOperand(1);
798
799 // Canonicalize power-of-2 value to the RHS.
800 if (const auto *C = dyn_cast<ConstantInt>(LHS))
801 if (C->getValue().isPowerOf2())
802 std::swap(LHS, RHS);
803
804 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
805 const auto *C = cast<ConstantInt>(RHS);
806 unsigned Val = C->getValue().logBase2();
807 if (Val < 1 || Val > 3)
808 break;
809
810 uint64_t NumBytes = 0;
811 if (Ty && Ty->isSized()) {
812 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
813 NumBytes = NumBits / 8;
814 if (!isPowerOf2_64(NumBits))
815 NumBytes = 0;
816 }
817
818 if (NumBytes != (1ULL << Val))
819 break;
820
821 Addr.setShift(Val);
822 Addr.setExtendType(AArch64_AM::LSL);
823
824 const Value *Src = LHS;
825 if (const auto *I = dyn_cast<Instruction>(Src)) {
826 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
827 // Fold the zext or sext when it won't become a noop.
828 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
829 if (!isIntExtFree(ZE) &&
830 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
831 Addr.setExtendType(AArch64_AM::UXTW);
832 Src = ZE->getOperand(0);
833 }
834 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
835 if (!isIntExtFree(SE) &&
836 SE->getOperand(0)->getType()->isIntegerTy(32)) {
837 Addr.setExtendType(AArch64_AM::SXTW);
838 Src = SE->getOperand(0);
839 }
840 }
841 }
842 }
843
844 Register Reg = getRegForValue(Src);
845 if (!Reg)
846 return false;
847 Addr.setOffsetReg(Reg);
848 return true;
849 }
850 case Instruction::And: {
851 if (Addr.getOffsetReg())
852 break;
853
854 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
855 break;
856
857 const Value *LHS = U->getOperand(0);
858 const Value *RHS = U->getOperand(1);
859
860 if (const auto *C = dyn_cast<ConstantInt>(LHS))
861 if (C->getValue() == 0xffffffff)
862 std::swap(LHS, RHS);
863
864 if (const auto *C = dyn_cast<ConstantInt>(RHS))
865 if (C->getValue() == 0xffffffff) {
866 Addr.setShift(0);
867 Addr.setExtendType(AArch64_AM::LSL);
868 Addr.setExtendType(AArch64_AM::UXTW);
869
870 Register Reg = getRegForValue(LHS);
871 if (!Reg)
872 return false;
873 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
874 Addr.setOffsetReg(Reg);
875 return true;
876 }
877 break;
878 }
879 case Instruction::SExt:
880 case Instruction::ZExt: {
881 if (!Addr.getReg() || Addr.getOffsetReg())
882 break;
883
884 const Value *Src = nullptr;
885 // Fold the zext or sext when it won't become a noop.
886 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
887 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
888 Addr.setExtendType(AArch64_AM::UXTW);
889 Src = ZE->getOperand(0);
890 }
891 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
892 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
893 Addr.setExtendType(AArch64_AM::SXTW);
894 Src = SE->getOperand(0);
895 }
896 }
897
898 if (!Src)
899 break;
900
901 Addr.setShift(0);
902 Register Reg = getRegForValue(Src);
903 if (!Reg)
904 return false;
905 Addr.setOffsetReg(Reg);
906 return true;
907 }
908 } // end switch
909
910 if (Addr.isRegBase() && !Addr.getReg()) {
911 Register Reg = getRegForValue(Obj);
912 if (!Reg)
913 return false;
914 Addr.setReg(Reg);
915 return true;
916 }
917
918 if (!Addr.getOffsetReg()) {
919 Register Reg = getRegForValue(Obj);
920 if (!Reg)
921 return false;
922 Addr.setOffsetReg(Reg);
923 return true;
924 }
925
926 return false;
927}
928
929bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
930 const User *U = nullptr;
931 unsigned Opcode = Instruction::UserOp1;
932 bool InMBB = true;
933
934 if (const auto *I = dyn_cast<Instruction>(V)) {
935 Opcode = I->getOpcode();
936 U = I;
937 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
938 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
939 Opcode = C->getOpcode();
940 U = C;
941 }
942
943 switch (Opcode) {
944 default: break;
945 case Instruction::BitCast:
946 // Look past bitcasts if its operand is in the same BB.
947 if (InMBB)
948 return computeCallAddress(U->getOperand(0), Addr);
949 break;
950 case Instruction::IntToPtr:
951 // Look past no-op inttoptrs if its operand is in the same BB.
952 if (InMBB &&
953 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
954 TLI.getPointerTy(DL))
955 return computeCallAddress(U->getOperand(0), Addr);
956 break;
957 case Instruction::PtrToInt:
958 // Look past no-op ptrtoints if its operand is in the same BB.
959 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
960 return computeCallAddress(U->getOperand(0), Addr);
961 break;
962 }
963
964 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
965 Addr.setGlobalValue(GV);
966 return true;
967 }
968
969 // If all else fails, try to materialize the value in a register.
970 if (!Addr.getGlobalValue()) {
971 Addr.setReg(getRegForValue(V));
972 return Addr.getReg().isValid();
973 }
974
975 return false;
976}
977
978bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
979 EVT evt = TLI.getValueType(DL, Ty, true);
980
981 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
982 return false;
983
984 // Only handle simple types.
985 if (evt == MVT::Other || !evt.isSimple())
986 return false;
987 VT = evt.getSimpleVT();
988
989 // This is a legal type, but it's not something we handle in fast-isel.
990 if (VT == MVT::f128)
991 return false;
992
993 // Handle all other legal types, i.e. a register that will directly hold this
994 // value.
995 return TLI.isTypeLegal(VT);
996}
997
998/// Determine if the value type is supported by FastISel.
999///
1000/// FastISel for AArch64 can handle more value types than are legal. This adds
1001/// simple value type such as i1, i8, and i16.
1002bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1003 if (Ty->isVectorTy() && !IsVectorAllowed)
1004 return false;
1005
1006 if (isTypeLegal(Ty, VT))
1007 return true;
1008
1009 // If this is a type than can be sign or zero-extended to a basic operation
1010 // go ahead and accept it now.
1011 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1012 return true;
1013
1014 return false;
1015}
1016
1017bool AArch64FastISel::isValueAvailable(const Value *V) const {
1018 if (!isa<Instruction>(V))
1019 return true;
1020
1021 const auto *I = cast<Instruction>(V);
1022 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1023}
1024
1025bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1026 if (Subtarget->isTargetILP32())
1027 return false;
1028
1029 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1030 if (!ScaleFactor)
1031 return false;
1032
1033 bool ImmediateOffsetNeedsLowering = false;
1034 bool RegisterOffsetNeedsLowering = false;
1035 int64_t Offset = Addr.getOffset();
1036 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1037 ImmediateOffsetNeedsLowering = true;
1038 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1039 !isUInt<12>(Offset / ScaleFactor))
1040 ImmediateOffsetNeedsLowering = true;
1041
1042 // Cannot encode an offset register and an immediate offset in the same
1043 // instruction. Fold the immediate offset into the load/store instruction and
1044 // emit an additional add to take care of the offset register.
1045 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1046 RegisterOffsetNeedsLowering = true;
1047
1048 // Cannot encode zero register as base.
1049 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1050 RegisterOffsetNeedsLowering = true;
1051
1052 // If this is a stack pointer and the offset needs to be simplified then put
1053 // the alloca address into a register, set the base type back to register and
1054 // continue. This should almost never happen.
1055 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1056 {
1057 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1058 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1059 ResultReg)
1060 .addFrameIndex(Addr.getFI())
1061 .addImm(0)
1062 .addImm(0);
1063 Addr.setKind(Address::RegBase);
1064 Addr.setReg(ResultReg);
1065 }
1066
1067 if (RegisterOffsetNeedsLowering) {
1068 Register ResultReg;
1069 if (Addr.getReg()) {
1070 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1071 Addr.getExtendType() == AArch64_AM::UXTW )
1072 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1073 Addr.getOffsetReg(), Addr.getExtendType(),
1074 Addr.getShift());
1075 else
1076 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1077 Addr.getOffsetReg(), AArch64_AM::LSL,
1078 Addr.getShift());
1079 } else {
1080 if (Addr.getExtendType() == AArch64_AM::UXTW)
1081 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1082 Addr.getShift(), /*IsZExt=*/true);
1083 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1084 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1085 Addr.getShift(), /*IsZExt=*/false);
1086 else
1087 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1088 Addr.getShift());
1089 }
1090 if (!ResultReg)
1091 return false;
1092
1093 Addr.setReg(ResultReg);
1094 Addr.setOffsetReg(0);
1095 Addr.setShift(0);
1096 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1097 }
1098
1099 // Since the offset is too large for the load/store instruction get the
1100 // reg+offset into a register.
1101 if (ImmediateOffsetNeedsLowering) {
1102 Register ResultReg;
1103 if (Addr.getReg())
1104 // Try to fold the immediate into the add instruction.
1105 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1106 else
1107 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1108
1109 if (!ResultReg)
1110 return false;
1111 Addr.setReg(ResultReg);
1112 Addr.setOffset(0);
1113 }
1114 return true;
1115}
1116
1117void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1118 const MachineInstrBuilder &MIB,
1120 unsigned ScaleFactor,
1121 MachineMemOperand *MMO) {
1122 int64_t Offset = Addr.getOffset() / ScaleFactor;
1123 // Frame base works a bit differently. Handle it separately.
1124 if (Addr.isFIBase()) {
1125 int FI = Addr.getFI();
1126 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1127 // and alignment should be based on the VT.
1128 MMO = FuncInfo.MF->getMachineMemOperand(
1129 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1130 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1131 // Now add the rest of the operands.
1132 MIB.addFrameIndex(FI).addImm(Offset);
1133 } else {
1134 assert(Addr.isRegBase() && "Unexpected address kind.");
1135 const MCInstrDesc &II = MIB->getDesc();
1136 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1137 Addr.setReg(
1138 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1139 Addr.setOffsetReg(
1140 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1141 if (Addr.getOffsetReg()) {
1142 assert(Addr.getOffset() == 0 && "Unexpected offset");
1143 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1144 Addr.getExtendType() == AArch64_AM::SXTX;
1145 MIB.addReg(Addr.getReg());
1146 MIB.addReg(Addr.getOffsetReg());
1147 MIB.addImm(IsSigned);
1148 MIB.addImm(Addr.getShift() != 0);
1149 } else
1150 MIB.addReg(Addr.getReg()).addImm(Offset);
1151 }
1152
1153 if (MMO)
1154 MIB.addMemOperand(MMO);
1155}
1156
1157Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1158 const Value *RHS, bool SetFlags,
1159 bool WantResult, bool IsZExt) {
1161 bool NeedExtend = false;
1162 switch (RetVT.SimpleTy) {
1163 default:
1164 return Register();
1165 case MVT::i1:
1166 NeedExtend = true;
1167 break;
1168 case MVT::i8:
1169 NeedExtend = true;
1170 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1171 break;
1172 case MVT::i16:
1173 NeedExtend = true;
1174 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1175 break;
1176 case MVT::i32: // fall-through
1177 case MVT::i64:
1178 break;
1179 }
1180 MVT SrcVT = RetVT;
1181 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1182
1183 // Canonicalize immediates to the RHS first.
1184 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1185 std::swap(LHS, RHS);
1186
1187 // Canonicalize mul by power of 2 to the RHS.
1188 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1189 if (isMulPowOf2(LHS))
1190 std::swap(LHS, RHS);
1191
1192 // Canonicalize shift immediate to the RHS.
1193 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1194 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1195 if (isa<ConstantInt>(SI->getOperand(1)))
1196 if (SI->getOpcode() == Instruction::Shl ||
1197 SI->getOpcode() == Instruction::LShr ||
1198 SI->getOpcode() == Instruction::AShr )
1199 std::swap(LHS, RHS);
1200
1201 Register LHSReg = getRegForValue(LHS);
1202 if (!LHSReg)
1203 return Register();
1204
1205 if (NeedExtend)
1206 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1207
1208 Register ResultReg;
1209 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1210 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1211 if (C->isNegative())
1212 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1213 WantResult);
1214 else
1215 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1216 WantResult);
1217 } else if (const auto *C = dyn_cast<Constant>(RHS))
1218 if (C->isNullValue())
1219 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1220
1221 if (ResultReg)
1222 return ResultReg;
1223
1224 // Only extend the RHS within the instruction if there is a valid extend type.
1225 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1226 isValueAvailable(RHS)) {
1227 Register RHSReg = getRegForValue(RHS);
1228 if (!RHSReg)
1229 return Register();
1230 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1231 SetFlags, WantResult);
1232 }
1233
1234 // Check if the mul can be folded into the instruction.
1235 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1236 if (isMulPowOf2(RHS)) {
1237 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1238 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1239
1240 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1241 if (C->getValue().isPowerOf2())
1242 std::swap(MulLHS, MulRHS);
1243
1244 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1245 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1246 Register RHSReg = getRegForValue(MulLHS);
1247 if (!RHSReg)
1248 return Register();
1249 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1250 ShiftVal, SetFlags, WantResult);
1251 if (ResultReg)
1252 return ResultReg;
1253 }
1254 }
1255
1256 // Check if the shift can be folded into the instruction.
1257 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1258 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1259 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1261 switch (SI->getOpcode()) {
1262 default: break;
1263 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1264 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1265 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1266 }
1267 uint64_t ShiftVal = C->getZExtValue();
1268 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1269 Register RHSReg = getRegForValue(SI->getOperand(0));
1270 if (!RHSReg)
1271 return Register();
1272 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1273 ShiftVal, SetFlags, WantResult);
1274 if (ResultReg)
1275 return ResultReg;
1276 }
1277 }
1278 }
1279 }
1280
1281 Register RHSReg = getRegForValue(RHS);
1282 if (!RHSReg)
1283 return Register();
1284
1285 if (NeedExtend)
1286 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1287
1288 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1289}
1290
1291Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1292 Register RHSReg, bool SetFlags,
1293 bool WantResult) {
1294 assert(LHSReg && RHSReg && "Invalid register number.");
1295
1296 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1297 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1298 return Register();
1299
1300 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1301 return Register();
1302
1303 static const unsigned OpcTable[2][2][2] = {
1304 { { AArch64::SUBWrr, AArch64::SUBXrr },
1305 { AArch64::ADDWrr, AArch64::ADDXrr } },
1306 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1307 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1308 };
1309 bool Is64Bit = RetVT == MVT::i64;
1310 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1311 const TargetRegisterClass *RC =
1312 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1313 Register ResultReg;
1314 if (WantResult)
1315 ResultReg = createResultReg(RC);
1316 else
1317 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1318
1319 const MCInstrDesc &II = TII.get(Opc);
1320 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1321 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1322 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1323 .addReg(LHSReg)
1324 .addReg(RHSReg);
1325 return ResultReg;
1326}
1327
1328Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1329 uint64_t Imm, bool SetFlags,
1330 bool WantResult) {
1331 assert(LHSReg && "Invalid register number.");
1332
1333 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1334 return Register();
1335
1336 unsigned ShiftImm;
1337 if (isUInt<12>(Imm))
1338 ShiftImm = 0;
1339 else if ((Imm & 0xfff000) == Imm) {
1340 ShiftImm = 12;
1341 Imm >>= 12;
1342 } else
1343 return Register();
1344
1345 static const unsigned OpcTable[2][2][2] = {
1346 { { AArch64::SUBWri, AArch64::SUBXri },
1347 { AArch64::ADDWri, AArch64::ADDXri } },
1348 { { AArch64::SUBSWri, AArch64::SUBSXri },
1349 { AArch64::ADDSWri, AArch64::ADDSXri } }
1350 };
1351 bool Is64Bit = RetVT == MVT::i64;
1352 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1353 const TargetRegisterClass *RC;
1354 if (SetFlags)
1355 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1356 else
1357 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1358 Register ResultReg;
1359 if (WantResult)
1360 ResultReg = createResultReg(RC);
1361 else
1362 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1363
1364 const MCInstrDesc &II = TII.get(Opc);
1365 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1366 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1367 .addReg(LHSReg)
1368 .addImm(Imm)
1369 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1370 return ResultReg;
1371}
1372
1373Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1374 Register RHSReg,
1376 uint64_t ShiftImm, bool SetFlags,
1377 bool WantResult) {
1378 assert(LHSReg && RHSReg && "Invalid register number.");
1379 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1380 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1381
1382 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1383 return Register();
1384
1385 // Don't deal with undefined shifts.
1386 if (ShiftImm >= RetVT.getSizeInBits())
1387 return Register();
1388
1389 static const unsigned OpcTable[2][2][2] = {
1390 { { AArch64::SUBWrs, AArch64::SUBXrs },
1391 { AArch64::ADDWrs, AArch64::ADDXrs } },
1392 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1393 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1394 };
1395 bool Is64Bit = RetVT == MVT::i64;
1396 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1397 const TargetRegisterClass *RC =
1398 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1399 Register ResultReg;
1400 if (WantResult)
1401 ResultReg = createResultReg(RC);
1402 else
1403 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1404
1405 const MCInstrDesc &II = TII.get(Opc);
1406 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1407 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1408 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1409 .addReg(LHSReg)
1410 .addReg(RHSReg)
1411 .addImm(getShifterImm(ShiftType, ShiftImm));
1412 return ResultReg;
1413}
1414
1415Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1416 Register RHSReg,
1418 uint64_t ShiftImm, bool SetFlags,
1419 bool WantResult) {
1420 assert(LHSReg && RHSReg && "Invalid register number.");
1421 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1422 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1423
1424 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1425 return Register();
1426
1427 if (ShiftImm >= 4)
1428 return Register();
1429
1430 static const unsigned OpcTable[2][2][2] = {
1431 { { AArch64::SUBWrx, AArch64::SUBXrx },
1432 { AArch64::ADDWrx, AArch64::ADDXrx } },
1433 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1434 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1435 };
1436 bool Is64Bit = RetVT == MVT::i64;
1437 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1438 const TargetRegisterClass *RC = nullptr;
1439 if (SetFlags)
1440 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1441 else
1442 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1443 Register ResultReg;
1444 if (WantResult)
1445 ResultReg = createResultReg(RC);
1446 else
1447 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1448
1449 const MCInstrDesc &II = TII.get(Opc);
1450 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1451 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1453 .addReg(LHSReg)
1454 .addReg(RHSReg)
1455 .addImm(getArithExtendImm(ExtType, ShiftImm));
1456 return ResultReg;
1457}
1458
1459bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1460 Type *Ty = LHS->getType();
1461 EVT EVT = TLI.getValueType(DL, Ty, true);
1462 if (!EVT.isSimple())
1463 return false;
1464 MVT VT = EVT.getSimpleVT();
1465
1466 switch (VT.SimpleTy) {
1467 default:
1468 return false;
1469 case MVT::i1:
1470 case MVT::i8:
1471 case MVT::i16:
1472 case MVT::i32:
1473 case MVT::i64:
1474 return emitICmp(VT, LHS, RHS, IsZExt);
1475 case MVT::f32:
1476 case MVT::f64:
1477 return emitFCmp(VT, LHS, RHS);
1478 }
1479}
1480
1481bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1482 bool IsZExt) {
1483 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1484 IsZExt)
1485 .isValid();
1486}
1487
1488bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1489 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1490 /*SetFlags=*/true, /*WantResult=*/false)
1491 .isValid();
1492}
1493
1494bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1495 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1496 return false;
1497
1498 // Check to see if the 2nd operand is a constant that we can encode directly
1499 // in the compare.
1500 bool UseImm = false;
1501 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1502 if (CFP->isZero() && !CFP->isNegative())
1503 UseImm = true;
1504
1505 Register LHSReg = getRegForValue(LHS);
1506 if (!LHSReg)
1507 return false;
1508
1509 if (UseImm) {
1510 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1512 .addReg(LHSReg);
1513 return true;
1514 }
1515
1516 Register RHSReg = getRegForValue(RHS);
1517 if (!RHSReg)
1518 return false;
1519
1520 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1522 .addReg(LHSReg)
1523 .addReg(RHSReg);
1524 return true;
1525}
1526
1527Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1528 bool SetFlags, bool WantResult, bool IsZExt) {
1529 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1530 IsZExt);
1531}
1532
1533/// This method is a wrapper to simplify add emission.
1534///
1535/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1536/// that fails, then try to materialize the immediate into a register and use
1537/// emitAddSub_rr instead.
1538Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1539 Register ResultReg;
1540 if (Imm < 0)
1541 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1542 else
1543 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1544
1545 if (ResultReg)
1546 return ResultReg;
1547
1548 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1549 if (!CReg)
1550 return Register();
1551
1552 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1553 return ResultReg;
1554}
1555
1556Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1557 bool SetFlags, bool WantResult, bool IsZExt) {
1558 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1559 IsZExt);
1560}
1561
1562Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1563 Register RHSReg, bool WantResult) {
1564 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1565 /*SetFlags=*/true, WantResult);
1566}
1567
1568Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1569 Register RHSReg,
1571 uint64_t ShiftImm, bool WantResult) {
1572 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1573 ShiftImm, /*SetFlags=*/true, WantResult);
1574}
1575
1576Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1577 const Value *LHS, const Value *RHS) {
1578 // Canonicalize immediates to the RHS first.
1580 std::swap(LHS, RHS);
1581
1582 // Canonicalize mul by power-of-2 to the RHS.
1583 if (LHS->hasOneUse() && isValueAvailable(LHS))
1584 if (isMulPowOf2(LHS))
1585 std::swap(LHS, RHS);
1586
1587 // Canonicalize shift immediate to the RHS.
1588 if (LHS->hasOneUse() && isValueAvailable(LHS))
1589 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1590 if (isa<ConstantInt>(SI->getOperand(1)))
1591 std::swap(LHS, RHS);
1592
1593 Register LHSReg = getRegForValue(LHS);
1594 if (!LHSReg)
1595 return Register();
1596
1597 Register ResultReg;
1598 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1599 uint64_t Imm = C->getZExtValue();
1600 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1601 }
1602 if (ResultReg)
1603 return ResultReg;
1604
1605 // Check if the mul can be folded into the instruction.
1606 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1607 if (isMulPowOf2(RHS)) {
1608 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1609 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1610
1611 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1612 if (C->getValue().isPowerOf2())
1613 std::swap(MulLHS, MulRHS);
1614
1615 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1616 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1617
1618 Register RHSReg = getRegForValue(MulLHS);
1619 if (!RHSReg)
1620 return Register();
1621 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1622 if (ResultReg)
1623 return ResultReg;
1624 }
1625 }
1626
1627 // Check if the shift can be folded into the instruction.
1628 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1629 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1630 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1631 uint64_t ShiftVal = C->getZExtValue();
1632 Register RHSReg = getRegForValue(SI->getOperand(0));
1633 if (!RHSReg)
1634 return Register();
1635 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1636 if (ResultReg)
1637 return ResultReg;
1638 }
1639 }
1640
1641 Register RHSReg = getRegForValue(RHS);
1642 if (!RHSReg)
1643 return Register();
1644
1645 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1647 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1648 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1649 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1650 }
1651 return ResultReg;
1652}
1653
1654Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655 Register LHSReg, uint64_t Imm) {
1656 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1657 "ISD nodes are not consecutive!");
1658 static const unsigned OpcTable[3][2] = {
1659 { AArch64::ANDWri, AArch64::ANDXri },
1660 { AArch64::ORRWri, AArch64::ORRXri },
1661 { AArch64::EORWri, AArch64::EORXri }
1662 };
1663 const TargetRegisterClass *RC;
1664 unsigned Opc;
1665 unsigned RegSize;
1666 switch (RetVT.SimpleTy) {
1667 default:
1668 return Register();
1669 case MVT::i1:
1670 case MVT::i8:
1671 case MVT::i16:
1672 case MVT::i32: {
1673 unsigned Idx = ISDOpc - ISD::AND;
1674 Opc = OpcTable[Idx][0];
1675 RC = &AArch64::GPR32spRegClass;
1676 RegSize = 32;
1677 break;
1678 }
1679 case MVT::i64:
1680 Opc = OpcTable[ISDOpc - ISD::AND][1];
1681 RC = &AArch64::GPR64spRegClass;
1682 RegSize = 64;
1683 break;
1684 }
1685
1687 return Register();
1688
1689 Register ResultReg =
1690 fastEmitInst_ri(Opc, RC, LHSReg,
1692 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1693 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1694 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1695 }
1696 return ResultReg;
1697}
1698
1699Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1700 Register LHSReg, Register RHSReg,
1701 uint64_t ShiftImm) {
1702 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1703 "ISD nodes are not consecutive!");
1704 static const unsigned OpcTable[3][2] = {
1705 { AArch64::ANDWrs, AArch64::ANDXrs },
1706 { AArch64::ORRWrs, AArch64::ORRXrs },
1707 { AArch64::EORWrs, AArch64::EORXrs }
1708 };
1709
1710 // Don't deal with undefined shifts.
1711 if (ShiftImm >= RetVT.getSizeInBits())
1712 return Register();
1713
1714 const TargetRegisterClass *RC;
1715 unsigned Opc;
1716 switch (RetVT.SimpleTy) {
1717 default:
1718 return Register();
1719 case MVT::i1:
1720 case MVT::i8:
1721 case MVT::i16:
1722 case MVT::i32:
1723 Opc = OpcTable[ISDOpc - ISD::AND][0];
1724 RC = &AArch64::GPR32RegClass;
1725 break;
1726 case MVT::i64:
1727 Opc = OpcTable[ISDOpc - ISD::AND][1];
1728 RC = &AArch64::GPR64RegClass;
1729 break;
1730 }
1731 Register ResultReg =
1732 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1734 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1735 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1736 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1737 }
1738 return ResultReg;
1739}
1740
1741Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1742 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1743}
1744
1745Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1746 bool WantZExt, MachineMemOperand *MMO) {
1747 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1748 return Register();
1749
1750 // Simplify this down to something we can handle.
1751 if (!simplifyAddress(Addr, VT))
1752 return Register();
1753
1754 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1755 if (!ScaleFactor)
1756 llvm_unreachable("Unexpected value type.");
1757
1758 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1759 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1760 bool UseScaled = true;
1761 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1762 UseScaled = false;
1763 ScaleFactor = 1;
1764 }
1765
1766 static const unsigned GPOpcTable[2][8][4] = {
1767 // Sign-extend.
1768 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1769 AArch64::LDURXi },
1770 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1771 AArch64::LDURXi },
1772 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1773 AArch64::LDRXui },
1774 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1775 AArch64::LDRXui },
1776 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1777 AArch64::LDRXroX },
1778 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1779 AArch64::LDRXroX },
1780 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1781 AArch64::LDRXroW },
1782 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1783 AArch64::LDRXroW }
1784 },
1785 // Zero-extend.
1786 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1787 AArch64::LDURXi },
1788 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1789 AArch64::LDURXi },
1790 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1791 AArch64::LDRXui },
1792 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1793 AArch64::LDRXui },
1794 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1795 AArch64::LDRXroX },
1796 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1797 AArch64::LDRXroX },
1798 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1799 AArch64::LDRXroW },
1800 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1801 AArch64::LDRXroW }
1802 }
1803 };
1804
1805 static const unsigned FPOpcTable[4][2] = {
1806 { AArch64::LDURSi, AArch64::LDURDi },
1807 { AArch64::LDRSui, AArch64::LDRDui },
1808 { AArch64::LDRSroX, AArch64::LDRDroX },
1809 { AArch64::LDRSroW, AArch64::LDRDroW }
1810 };
1811
1812 unsigned Opc;
1813 const TargetRegisterClass *RC;
1814 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1815 Addr.getOffsetReg();
1816 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1817 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1818 Addr.getExtendType() == AArch64_AM::SXTW)
1819 Idx++;
1820
1821 bool IsRet64Bit = RetVT == MVT::i64;
1822 switch (VT.SimpleTy) {
1823 default:
1824 llvm_unreachable("Unexpected value type.");
1825 case MVT::i1: // Intentional fall-through.
1826 case MVT::i8:
1827 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1828 RC = (IsRet64Bit && !WantZExt) ?
1829 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1830 break;
1831 case MVT::i16:
1832 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1833 RC = (IsRet64Bit && !WantZExt) ?
1834 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1835 break;
1836 case MVT::i32:
1837 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1838 RC = (IsRet64Bit && !WantZExt) ?
1839 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1840 break;
1841 case MVT::i64:
1842 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1843 RC = &AArch64::GPR64RegClass;
1844 break;
1845 case MVT::f32:
1846 Opc = FPOpcTable[Idx][0];
1847 RC = &AArch64::FPR32RegClass;
1848 break;
1849 case MVT::f64:
1850 Opc = FPOpcTable[Idx][1];
1851 RC = &AArch64::FPR64RegClass;
1852 break;
1853 }
1854
1855 // Create the base instruction, then add the operands.
1856 Register ResultReg = createResultReg(RC);
1857 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1858 TII.get(Opc), ResultReg);
1859 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1860
1861 // Loading an i1 requires special handling.
1862 if (VT == MVT::i1) {
1863 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1864 assert(ANDReg && "Unexpected AND instruction emission failure.");
1865 ResultReg = ANDReg;
1866 }
1867
1868 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1869 // the 32bit reg to a 64bit reg.
1870 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1871 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1872 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1873 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1874 .addImm(0)
1875 .addReg(ResultReg, getKillRegState(true))
1876 .addImm(AArch64::sub_32);
1877 ResultReg = Reg64;
1878 }
1879 return ResultReg;
1880}
1881
1882bool AArch64FastISel::selectAddSub(const Instruction *I) {
1883 MVT VT;
1884 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1885 return false;
1886
1887 if (VT.isVector())
1888 return selectOperator(I, I->getOpcode());
1889
1890 Register ResultReg;
1891 switch (I->getOpcode()) {
1892 default:
1893 llvm_unreachable("Unexpected instruction.");
1894 case Instruction::Add:
1895 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1896 break;
1897 case Instruction::Sub:
1898 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1899 break;
1900 }
1901 if (!ResultReg)
1902 return false;
1903
1904 updateValueMap(I, ResultReg);
1905 return true;
1906}
1907
1908bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1909 MVT VT;
1910 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1911 return false;
1912
1913 if (VT.isVector())
1914 return selectOperator(I, I->getOpcode());
1915
1916 Register ResultReg;
1917 switch (I->getOpcode()) {
1918 default:
1919 llvm_unreachable("Unexpected instruction.");
1920 case Instruction::And:
1921 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1922 break;
1923 case Instruction::Or:
1924 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1925 break;
1926 case Instruction::Xor:
1927 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 }
1930 if (!ResultReg)
1931 return false;
1932
1933 updateValueMap(I, ResultReg);
1934 return true;
1935}
1936
1937bool AArch64FastISel::selectLoad(const Instruction *I) {
1938 MVT VT;
1939 // Verify we have a legal type before going any further. Currently, we handle
1940 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1941 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1942 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1943 cast<LoadInst>(I)->isAtomic())
1944 return false;
1945
1946 const Value *SV = I->getOperand(0);
1947 if (TLI.supportSwiftError()) {
1948 // Swifterror values can come from either a function parameter with
1949 // swifterror attribute or an alloca with swifterror attribute.
1950 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1951 if (Arg->hasSwiftErrorAttr())
1952 return false;
1953 }
1954
1955 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1956 if (Alloca->isSwiftError())
1957 return false;
1958 }
1959 }
1960
1961 // See if we can handle this address.
1962 Address Addr;
1963 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1964 return false;
1965
1966 // Fold the following sign-/zero-extend into the load instruction.
1967 bool WantZExt = true;
1968 MVT RetVT = VT;
1969 const Value *IntExtVal = nullptr;
1970 if (I->hasOneUse()) {
1971 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1972 if (isTypeSupported(ZE->getType(), RetVT))
1973 IntExtVal = ZE;
1974 else
1975 RetVT = VT;
1976 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1977 if (isTypeSupported(SE->getType(), RetVT))
1978 IntExtVal = SE;
1979 else
1980 RetVT = VT;
1981 WantZExt = false;
1982 }
1983 }
1984
1985 Register ResultReg =
1986 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1987 if (!ResultReg)
1988 return false;
1989
1990 // There are a few different cases we have to handle, because the load or the
1991 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1992 // SelectionDAG. There is also an ordering issue when both instructions are in
1993 // different basic blocks.
1994 // 1.) The load instruction is selected by FastISel, but the integer extend
1995 // not. This usually happens when the integer extend is in a different
1996 // basic block and SelectionDAG took over for that basic block.
1997 // 2.) The load instruction is selected before the integer extend. This only
1998 // happens when the integer extend is in a different basic block.
1999 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2000 // by FastISel. This happens if there are instructions between the load
2001 // and the integer extend that couldn't be selected by FastISel.
2002 if (IntExtVal) {
2003 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2004 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2005 // it when it selects the integer extend.
2006 Register Reg = lookUpRegForValue(IntExtVal);
2007 auto *MI = MRI.getUniqueVRegDef(Reg);
2008 if (!MI) {
2009 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2010 if (WantZExt) {
2011 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2012 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2013 ResultReg = std::prev(I)->getOperand(0).getReg();
2014 removeDeadCode(I, std::next(I));
2015 } else
2016 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2017 AArch64::sub_32);
2018 }
2019 updateValueMap(I, ResultReg);
2020 return true;
2021 }
2022
2023 // The integer extend has already been emitted - delete all the instructions
2024 // that have been emitted by the integer extend lowering code and use the
2025 // result from the load instruction directly.
2026 while (MI) {
2027 Reg = 0;
2028 for (auto &Opnd : MI->uses()) {
2029 if (Opnd.isReg()) {
2030 Reg = Opnd.getReg();
2031 break;
2032 }
2033 }
2035 removeDeadCode(I, std::next(I));
2036 MI = nullptr;
2037 if (Reg)
2038 MI = MRI.getUniqueVRegDef(Reg);
2039 }
2040 updateValueMap(IntExtVal, ResultReg);
2041 return true;
2042 }
2043
2044 updateValueMap(I, ResultReg);
2045 return true;
2046}
2047
2048bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2049 Register AddrReg,
2050 MachineMemOperand *MMO) {
2051 unsigned Opc;
2052 switch (VT.SimpleTy) {
2053 default: return false;
2054 case MVT::i8: Opc = AArch64::STLRB; break;
2055 case MVT::i16: Opc = AArch64::STLRH; break;
2056 case MVT::i32: Opc = AArch64::STLRW; break;
2057 case MVT::i64: Opc = AArch64::STLRX; break;
2058 }
2059
2060 const MCInstrDesc &II = TII.get(Opc);
2061 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2062 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2064 .addReg(SrcReg)
2065 .addReg(AddrReg)
2066 .addMemOperand(MMO);
2067 return true;
2068}
2069
2070bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2071 MachineMemOperand *MMO) {
2072 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2073 return false;
2074
2075 // Simplify this down to something we can handle.
2076 if (!simplifyAddress(Addr, VT))
2077 return false;
2078
2079 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2080 if (!ScaleFactor)
2081 llvm_unreachable("Unexpected value type.");
2082
2083 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2084 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2085 bool UseScaled = true;
2086 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2087 UseScaled = false;
2088 ScaleFactor = 1;
2089 }
2090
2091 static const unsigned OpcTable[4][6] = {
2092 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2093 AArch64::STURSi, AArch64::STURDi },
2094 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2095 AArch64::STRSui, AArch64::STRDui },
2096 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2097 AArch64::STRSroX, AArch64::STRDroX },
2098 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2099 AArch64::STRSroW, AArch64::STRDroW }
2100 };
2101
2102 unsigned Opc;
2103 bool VTIsi1 = false;
2104 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2105 Addr.getOffsetReg();
2106 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2107 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2108 Addr.getExtendType() == AArch64_AM::SXTW)
2109 Idx++;
2110
2111 switch (VT.SimpleTy) {
2112 default: llvm_unreachable("Unexpected value type.");
2113 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2114 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2115 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2116 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2117 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2118 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2119 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2120 }
2121
2122 // Storing an i1 requires special handling.
2123 if (VTIsi1 && SrcReg != AArch64::WZR) {
2124 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2125 assert(ANDReg && "Unexpected AND instruction emission failure.");
2126 SrcReg = ANDReg;
2127 }
2128 // Create the base instruction, then add the operands.
2129 const MCInstrDesc &II = TII.get(Opc);
2130 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2131 MachineInstrBuilder MIB =
2132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2133 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2134
2135 return true;
2136}
2137
2138bool AArch64FastISel::selectStore(const Instruction *I) {
2139 MVT VT;
2140 const Value *Op0 = I->getOperand(0);
2141 // Verify we have a legal type before going any further. Currently, we handle
2142 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2143 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2144 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2145 return false;
2146
2147 const Value *PtrV = I->getOperand(1);
2148 if (TLI.supportSwiftError()) {
2149 // Swifterror values can come from either a function parameter with
2150 // swifterror attribute or an alloca with swifterror attribute.
2151 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2152 if (Arg->hasSwiftErrorAttr())
2153 return false;
2154 }
2155
2156 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2157 if (Alloca->isSwiftError())
2158 return false;
2159 }
2160 }
2161
2162 // Get the value to be stored into a register. Use the zero register directly
2163 // when possible to avoid an unnecessary copy and a wasted register.
2164 Register SrcReg;
2165 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2166 if (CI->isZero())
2167 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2168 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2169 if (CF->isZero() && !CF->isNegative()) {
2171 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172 }
2173 }
2174
2175 if (!SrcReg)
2176 SrcReg = getRegForValue(Op0);
2177
2178 if (!SrcReg)
2179 return false;
2180
2181 auto *SI = cast<StoreInst>(I);
2182
2183 // Try to emit a STLR for seq_cst/release.
2184 if (SI->isAtomic()) {
2185 AtomicOrdering Ord = SI->getOrdering();
2186 // The non-atomic instructions are sufficient for relaxed stores.
2187 if (isReleaseOrStronger(Ord)) {
2188 // The STLR addressing mode only supports a base reg; pass that directly.
2189 Register AddrReg = getRegForValue(PtrV);
2190 if (!AddrReg)
2191 return false;
2192 return emitStoreRelease(VT, SrcReg, AddrReg,
2193 createMachineMemOperandFor(I));
2194 }
2195 }
2196
2197 // See if we can handle this address.
2198 Address Addr;
2199 if (!computeAddress(PtrV, Addr, Op0->getType()))
2200 return false;
2201
2202 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2203 return false;
2204 return true;
2205}
2206
2208 switch (Pred) {
2209 case CmpInst::FCMP_ONE:
2210 case CmpInst::FCMP_UEQ:
2211 default:
2212 // AL is our "false" for now. The other two need more compares.
2213 return AArch64CC::AL;
2214 case CmpInst::ICMP_EQ:
2215 case CmpInst::FCMP_OEQ:
2216 return AArch64CC::EQ;
2217 case CmpInst::ICMP_SGT:
2218 case CmpInst::FCMP_OGT:
2219 return AArch64CC::GT;
2220 case CmpInst::ICMP_SGE:
2221 case CmpInst::FCMP_OGE:
2222 return AArch64CC::GE;
2223 case CmpInst::ICMP_UGT:
2224 case CmpInst::FCMP_UGT:
2225 return AArch64CC::HI;
2226 case CmpInst::FCMP_OLT:
2227 return AArch64CC::MI;
2228 case CmpInst::ICMP_ULE:
2229 case CmpInst::FCMP_OLE:
2230 return AArch64CC::LS;
2231 case CmpInst::FCMP_ORD:
2232 return AArch64CC::VC;
2233 case CmpInst::FCMP_UNO:
2234 return AArch64CC::VS;
2235 case CmpInst::FCMP_UGE:
2236 return AArch64CC::PL;
2237 case CmpInst::ICMP_SLT:
2238 case CmpInst::FCMP_ULT:
2239 return AArch64CC::LT;
2240 case CmpInst::ICMP_SLE:
2241 case CmpInst::FCMP_ULE:
2242 return AArch64CC::LE;
2243 case CmpInst::FCMP_UNE:
2244 case CmpInst::ICMP_NE:
2245 return AArch64CC::NE;
2246 case CmpInst::ICMP_UGE:
2247 return AArch64CC::HS;
2248 case CmpInst::ICMP_ULT:
2249 return AArch64CC::LO;
2250 }
2251}
2252
2253/// Try to emit a combined compare-and-branch instruction.
2254bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2255 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2256 // will not be produced, as they are conditional branch instructions that do
2257 // not set flags.
2258 if (FuncInfo.MF->getFunction().hasFnAttribute(
2259 Attribute::SpeculativeLoadHardening))
2260 return false;
2261
2262 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2263 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2264 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2265
2266 const Value *LHS = CI->getOperand(0);
2267 const Value *RHS = CI->getOperand(1);
2268
2269 MVT VT;
2270 if (!isTypeSupported(LHS->getType(), VT))
2271 return false;
2272
2273 unsigned BW = VT.getSizeInBits();
2274 if (BW > 64)
2275 return false;
2276
2277 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2278 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2279
2280 // Try to take advantage of fallthrough opportunities.
2281 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2282 std::swap(TBB, FBB);
2284 }
2285
2286 int TestBit = -1;
2287 bool IsCmpNE;
2288 switch (Predicate) {
2289 default:
2290 return false;
2291 case CmpInst::ICMP_EQ:
2292 case CmpInst::ICMP_NE:
2293 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2294 std::swap(LHS, RHS);
2295
2296 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2297 return false;
2298
2299 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2300 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2301 const Value *AndLHS = AI->getOperand(0);
2302 const Value *AndRHS = AI->getOperand(1);
2303
2304 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2305 if (C->getValue().isPowerOf2())
2306 std::swap(AndLHS, AndRHS);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2309 if (C->getValue().isPowerOf2()) {
2310 TestBit = C->getValue().logBase2();
2311 LHS = AndLHS;
2312 }
2313 }
2314
2315 if (VT == MVT::i1)
2316 TestBit = 0;
2317
2318 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2319 break;
2320 case CmpInst::ICMP_SLT:
2321 case CmpInst::ICMP_SGE:
2322 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2323 return false;
2324
2325 TestBit = BW - 1;
2326 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2327 break;
2328 case CmpInst::ICMP_SGT:
2329 case CmpInst::ICMP_SLE:
2330 if (!isa<ConstantInt>(RHS))
2331 return false;
2332
2333 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2334 return false;
2335
2336 TestBit = BW - 1;
2337 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2338 break;
2339 } // end switch
2340
2341 static const unsigned OpcTable[2][2][2] = {
2342 { {AArch64::CBZW, AArch64::CBZX },
2343 {AArch64::CBNZW, AArch64::CBNZX} },
2344 { {AArch64::TBZW, AArch64::TBZX },
2345 {AArch64::TBNZW, AArch64::TBNZX} }
2346 };
2347
2348 bool IsBitTest = TestBit != -1;
2349 bool Is64Bit = BW == 64;
2350 if (TestBit < 32 && TestBit >= 0)
2351 Is64Bit = false;
2352
2353 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2354 const MCInstrDesc &II = TII.get(Opc);
2355
2356 Register SrcReg = getRegForValue(LHS);
2357 if (!SrcReg)
2358 return false;
2359
2360 if (BW == 64 && !Is64Bit)
2361 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2362
2363 if ((BW < 32) && !IsBitTest)
2364 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2365
2366 // Emit the combined compare and branch instruction.
2367 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2368 MachineInstrBuilder MIB =
2369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2370 .addReg(SrcReg);
2371 if (IsBitTest)
2372 MIB.addImm(TestBit);
2373 MIB.addMBB(TBB);
2374
2375 finishCondBranch(BI->getParent(), TBB, FBB);
2376 return true;
2377}
2378
2379bool AArch64FastISel::selectBranch(const Instruction *I) {
2380 const BranchInst *BI = cast<BranchInst>(I);
2381 if (BI->isUnconditional()) {
2382 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2383 fastEmitBranch(MSucc, BI->getDebugLoc());
2384 return true;
2385 }
2386
2387 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2388 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2389
2390 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2391 if (CI->hasOneUse() && isValueAvailable(CI)) {
2392 // Try to optimize or fold the cmp.
2393 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2394 switch (Predicate) {
2395 default:
2396 break;
2398 fastEmitBranch(FBB, MIMD.getDL());
2399 return true;
2400 case CmpInst::FCMP_TRUE:
2401 fastEmitBranch(TBB, MIMD.getDL());
2402 return true;
2403 }
2404
2405 // Try to emit a combined compare-and-branch first.
2406 if (emitCompareAndBranch(BI))
2407 return true;
2408
2409 // Try to take advantage of fallthrough opportunities.
2410 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2411 std::swap(TBB, FBB);
2413 }
2414
2415 // Emit the cmp.
2416 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2417 return false;
2418
2419 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2420 // instruction.
2421 AArch64CC::CondCode CC = getCompareCC(Predicate);
2423 switch (Predicate) {
2424 default:
2425 break;
2426 case CmpInst::FCMP_UEQ:
2427 ExtraCC = AArch64CC::EQ;
2428 CC = AArch64CC::VS;
2429 break;
2430 case CmpInst::FCMP_ONE:
2431 ExtraCC = AArch64CC::MI;
2432 CC = AArch64CC::GT;
2433 break;
2434 }
2435 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2436
2437 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2438 if (ExtraCC != AArch64CC::AL) {
2439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2440 .addImm(ExtraCC)
2441 .addMBB(TBB);
2442 }
2443
2444 // Emit the branch.
2445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2446 .addImm(CC)
2447 .addMBB(TBB);
2448
2449 finishCondBranch(BI->getParent(), TBB, FBB);
2450 return true;
2451 }
2452 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2453 uint64_t Imm = CI->getZExtValue();
2454 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2456 .addMBB(Target);
2457
2458 // Obtain the branch probability and add the target to the successor list.
2459 if (FuncInfo.BPI) {
2460 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2461 BI->getParent(), Target->getBasicBlock());
2462 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2463 } else
2464 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2465 return true;
2466 } else {
2468 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2469 // Fake request the condition, otherwise the intrinsic might be completely
2470 // optimized away.
2471 Register CondReg = getRegForValue(BI->getCondition());
2472 if (!CondReg)
2473 return false;
2474
2475 // Emit the branch.
2476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2477 .addImm(CC)
2478 .addMBB(TBB);
2479
2480 finishCondBranch(BI->getParent(), TBB, FBB);
2481 return true;
2482 }
2483 }
2484
2485 Register CondReg = getRegForValue(BI->getCondition());
2486 if (!CondReg)
2487 return false;
2488
2489 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490 unsigned Opcode = AArch64::TBNZW;
2491 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492 std::swap(TBB, FBB);
2493 Opcode = AArch64::TBZW;
2494 }
2495
2496 const MCInstrDesc &II = TII.get(Opcode);
2497 Register ConstrainedCondReg
2498 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2500 .addReg(ConstrainedCondReg)
2501 .addImm(0)
2502 .addMBB(TBB);
2503
2504 finishCondBranch(BI->getParent(), TBB, FBB);
2505 return true;
2506}
2507
2508bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510 Register AddrReg = getRegForValue(BI->getOperand(0));
2511 if (!AddrReg)
2512 return false;
2513
2514 // Authenticated indirectbr is not implemented yet.
2515 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2516 return false;
2517
2518 // Emit the indirect branch.
2519 const MCInstrDesc &II = TII.get(AArch64::BR);
2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2522
2523 // Make sure the CFG is up-to-date.
2524 for (const auto *Succ : BI->successors())
2525 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2526
2527 return true;
2528}
2529
2530bool AArch64FastISel::selectCmp(const Instruction *I) {
2531 const CmpInst *CI = cast<CmpInst>(I);
2532
2533 // Vectors of i1 are weird: bail out.
2534 if (CI->getType()->isVectorTy())
2535 return false;
2536
2537 // Try to optimize or fold the cmp.
2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539 Register ResultReg;
2540 switch (Predicate) {
2541 default:
2542 break;
2544 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546 TII.get(TargetOpcode::COPY), ResultReg)
2547 .addReg(AArch64::WZR, getKillRegState(true));
2548 break;
2549 case CmpInst::FCMP_TRUE:
2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551 break;
2552 }
2553
2554 if (ResultReg) {
2555 updateValueMap(I, ResultReg);
2556 return true;
2557 }
2558
2559 // Emit the cmp.
2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2561 return false;
2562
2563 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566 // condition codes are inverted, because they are used by CSINC.
2567 static unsigned CondCodeTable[2][2] = {
2570 };
2571 unsigned *CondCodes = nullptr;
2572 switch (Predicate) {
2573 default:
2574 break;
2575 case CmpInst::FCMP_UEQ:
2576 CondCodes = &CondCodeTable[0][0];
2577 break;
2578 case CmpInst::FCMP_ONE:
2579 CondCodes = &CondCodeTable[1][0];
2580 break;
2581 }
2582
2583 if (CondCodes) {
2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 TmpReg1)
2587 .addReg(AArch64::WZR, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[0]);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 ResultReg)
2592 .addReg(TmpReg1, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[1]);
2595
2596 updateValueMap(I, ResultReg);
2597 return true;
2598 }
2599
2600 // Now set a register based on the comparison.
2601 AArch64CC::CondCode CC = getCompareCC(Predicate);
2602 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605 ResultReg)
2606 .addReg(AArch64::WZR, getKillRegState(true))
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addImm(invertedCC);
2609
2610 updateValueMap(I, ResultReg);
2611 return true;
2612}
2613
2614/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615/// value.
2616bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617 if (!SI->getType()->isIntegerTy(1))
2618 return false;
2619
2620 const Value *Src1Val, *Src2Val;
2621 unsigned Opc = 0;
2622 bool NeedExtraOp = false;
2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2624 if (CI->isOne()) {
2625 Src1Val = SI->getCondition();
2626 Src2Val = SI->getFalseValue();
2627 Opc = AArch64::ORRWrr;
2628 } else {
2629 assert(CI->isZero());
2630 Src1Val = SI->getFalseValue();
2631 Src2Val = SI->getCondition();
2632 Opc = AArch64::BICWrr;
2633 }
2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2635 if (CI->isOne()) {
2636 Src1Val = SI->getCondition();
2637 Src2Val = SI->getTrueValue();
2638 Opc = AArch64::ORRWrr;
2639 NeedExtraOp = true;
2640 } else {
2641 assert(CI->isZero());
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ANDWrr;
2645 }
2646 }
2647
2648 if (!Opc)
2649 return false;
2650
2651 Register Src1Reg = getRegForValue(Src1Val);
2652 if (!Src1Reg)
2653 return false;
2654
2655 Register Src2Reg = getRegForValue(Src2Val);
2656 if (!Src2Reg)
2657 return false;
2658
2659 if (NeedExtraOp)
2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661
2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663 Src2Reg);
2664 updateValueMap(SI, ResultReg);
2665 return true;
2666}
2667
2668bool AArch64FastISel::selectSelect(const Instruction *I) {
2669 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670 MVT VT;
2671 if (!isTypeSupported(I->getType(), VT))
2672 return false;
2673
2674 unsigned Opc;
2675 const TargetRegisterClass *RC;
2676 switch (VT.SimpleTy) {
2677 default:
2678 return false;
2679 case MVT::i1:
2680 case MVT::i8:
2681 case MVT::i16:
2682 case MVT::i32:
2683 Opc = AArch64::CSELWr;
2684 RC = &AArch64::GPR32RegClass;
2685 break;
2686 case MVT::i64:
2687 Opc = AArch64::CSELXr;
2688 RC = &AArch64::GPR64RegClass;
2689 break;
2690 case MVT::f32:
2691 Opc = AArch64::FCSELSrrr;
2692 RC = &AArch64::FPR32RegClass;
2693 break;
2694 case MVT::f64:
2695 Opc = AArch64::FCSELDrrr;
2696 RC = &AArch64::FPR64RegClass;
2697 break;
2698 }
2699
2700 const SelectInst *SI = cast<SelectInst>(I);
2701 const Value *Cond = SI->getCondition();
2704
2705 if (optimizeSelect(SI))
2706 return true;
2707
2708 // Try to pickup the flags, so we don't have to emit another compare.
2709 if (foldXALUIntrinsic(CC, I, Cond)) {
2710 // Fake request the condition to force emission of the XALU intrinsic.
2711 Register CondReg = getRegForValue(Cond);
2712 if (!CondReg)
2713 return false;
2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2715 isValueAvailable(Cond)) {
2716 const auto *Cmp = cast<CmpInst>(Cond);
2717 // Try to optimize or fold the cmp.
2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2719 const Value *FoldSelect = nullptr;
2720 switch (Predicate) {
2721 default:
2722 break;
2724 FoldSelect = SI->getFalseValue();
2725 break;
2726 case CmpInst::FCMP_TRUE:
2727 FoldSelect = SI->getTrueValue();
2728 break;
2729 }
2730
2731 if (FoldSelect) {
2732 Register SrcReg = getRegForValue(FoldSelect);
2733 if (!SrcReg)
2734 return false;
2735
2736 updateValueMap(I, SrcReg);
2737 return true;
2738 }
2739
2740 // Emit the cmp.
2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742 return false;
2743
2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745 CC = getCompareCC(Predicate);
2746 switch (Predicate) {
2747 default:
2748 break;
2749 case CmpInst::FCMP_UEQ:
2750 ExtraCC = AArch64CC::EQ;
2751 CC = AArch64CC::VS;
2752 break;
2753 case CmpInst::FCMP_ONE:
2754 ExtraCC = AArch64CC::MI;
2755 CC = AArch64CC::GT;
2756 break;
2757 }
2758 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759 } else {
2760 Register CondReg = getRegForValue(Cond);
2761 if (!CondReg)
2762 return false;
2763
2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765 CondReg = constrainOperandRegClass(II, CondReg, 1);
2766
2767 // Emit a TST instruction (ANDS wzr, reg, #imm).
2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769 AArch64::WZR)
2770 .addReg(CondReg)
2772 }
2773
2774 Register Src1Reg = getRegForValue(SI->getTrueValue());
2775 Register Src2Reg = getRegForValue(SI->getFalseValue());
2776
2777 if (!Src1Reg || !Src2Reg)
2778 return false;
2779
2780 if (ExtraCC != AArch64CC::AL)
2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2782
2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2784 updateValueMap(I, ResultReg);
2785 return true;
2786}
2787
2788bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789 Value *V = I->getOperand(0);
2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791 return false;
2792
2793 Register Op = getRegForValue(V);
2794 if (Op == 0)
2795 return false;
2796
2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799 ResultReg).addReg(Op);
2800 updateValueMap(I, ResultReg);
2801 return true;
2802}
2803
2804bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805 Value *V = I->getOperand(0);
2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807 return false;
2808
2809 Register Op = getRegForValue(V);
2810 if (Op == 0)
2811 return false;
2812
2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815 ResultReg).addReg(Op);
2816 updateValueMap(I, ResultReg);
2817 return true;
2818}
2819
2820// FPToUI and FPToSI
2821bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822 MVT DestVT;
2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2824 return false;
2825
2826 Register SrcReg = getRegForValue(I->getOperand(0));
2827 if (!SrcReg)
2828 return false;
2829
2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2832 return false;
2833
2834 unsigned Opc;
2835 if (SrcVT == MVT::f64) {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840 } else {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845 }
2846 Register ResultReg = createResultReg(
2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2849 .addReg(SrcReg);
2850 updateValueMap(I, ResultReg);
2851 return true;
2852}
2853
2854bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855 MVT DestVT;
2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2857 return false;
2858 // Let regular ISEL handle FP16
2859 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2860 return false;
2861
2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863 "Unexpected value type.");
2864
2865 Register SrcReg = getRegForValue(I->getOperand(0));
2866 if (!SrcReg)
2867 return false;
2868
2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2870
2871 // Handle sign-extension.
2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873 SrcReg =
2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875 if (!SrcReg)
2876 return false;
2877 }
2878
2879 unsigned Opc;
2880 if (SrcVT == MVT::i64) {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885 } else {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890 }
2891
2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2893 updateValueMap(I, ResultReg);
2894 return true;
2895}
2896
2897bool AArch64FastISel::fastLowerArguments() {
2898 if (!FuncInfo.CanLowerReturn)
2899 return false;
2900
2901 const Function *F = FuncInfo.Fn;
2902 if (F->isVarArg())
2903 return false;
2904
2905 CallingConv::ID CC = F->getCallingConv();
2906 if (CC != CallingConv::C && CC != CallingConv::Swift)
2907 return false;
2908
2909 if (Subtarget->hasCustomCallingConv())
2910 return false;
2911
2912 // Only handle simple cases of up to 8 GPR and FPR each.
2913 unsigned GPRCnt = 0;
2914 unsigned FPRCnt = 0;
2915 for (auto const &Arg : F->args()) {
2916 if (Arg.hasAttribute(Attribute::ByVal) ||
2917 Arg.hasAttribute(Attribute::InReg) ||
2918 Arg.hasAttribute(Attribute::StructRet) ||
2919 Arg.hasAttribute(Attribute::SwiftSelf) ||
2920 Arg.hasAttribute(Attribute::SwiftAsync) ||
2921 Arg.hasAttribute(Attribute::SwiftError) ||
2922 Arg.hasAttribute(Attribute::Nest))
2923 return false;
2924
2925 Type *ArgTy = Arg.getType();
2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927 return false;
2928
2929 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2930 if (!ArgVT.isSimple())
2931 return false;
2932
2933 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935 return false;
2936
2937 if (VT.isVector() &&
2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939 return false;
2940
2941 if (VT >= MVT::i1 && VT <= MVT::i64)
2942 ++GPRCnt;
2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944 VT.is128BitVector())
2945 ++FPRCnt;
2946 else
2947 return false;
2948
2949 if (GPRCnt > 8 || FPRCnt > 8)
2950 return false;
2951 }
2952
2953 static const MCPhysReg Registers[6][8] = {
2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955 AArch64::W5, AArch64::W6, AArch64::W7 },
2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957 AArch64::X5, AArch64::X6, AArch64::X7 },
2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959 AArch64::H5, AArch64::H6, AArch64::H7 },
2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961 AArch64::S5, AArch64::S6, AArch64::S7 },
2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963 AArch64::D5, AArch64::D6, AArch64::D7 },
2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966 };
2967
2968 unsigned GPRIdx = 0;
2969 unsigned FPRIdx = 0;
2970 for (auto const &Arg : F->args()) {
2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2972 unsigned SrcReg;
2973 const TargetRegisterClass *RC;
2974 if (VT >= MVT::i1 && VT <= MVT::i32) {
2975 SrcReg = Registers[0][GPRIdx++];
2976 RC = &AArch64::GPR32RegClass;
2977 VT = MVT::i32;
2978 } else if (VT == MVT::i64) {
2979 SrcReg = Registers[1][GPRIdx++];
2980 RC = &AArch64::GPR64RegClass;
2981 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2982 SrcReg = Registers[2][FPRIdx++];
2983 RC = &AArch64::FPR16RegClass;
2984 } else if (VT == MVT::f32) {
2985 SrcReg = Registers[3][FPRIdx++];
2986 RC = &AArch64::FPR32RegClass;
2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988 SrcReg = Registers[4][FPRIdx++];
2989 RC = &AArch64::FPR64RegClass;
2990 } else if (VT.is128BitVector()) {
2991 SrcReg = Registers[5][FPRIdx++];
2992 RC = &AArch64::FPR128RegClass;
2993 } else
2994 llvm_unreachable("Unexpected value type.");
2995
2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998 // Without this, EmitLiveInCopies may eliminate the livein if its only
2999 // use is a bitcast (which isn't turned into an instruction).
3000 Register ResultReg = createResultReg(RC);
3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3002 TII.get(TargetOpcode::COPY), ResultReg)
3003 .addReg(DstReg, getKillRegState(true));
3004 updateValueMap(&Arg, ResultReg);
3005 }
3006 return true;
3007}
3008
3009bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010 SmallVectorImpl<MVT> &OutVTs,
3011 SmallVectorImpl<Type *> &OrigTys,
3012 unsigned &NumBytes) {
3013 CallingConv::ID CC = CLI.CallConv;
3015 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3016 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3017 CCAssignFnForCall(CC));
3018
3019 // Get a count of how many bytes are to be pushed on the stack.
3020 NumBytes = CCInfo.getStackSize();
3021
3022 // Issue CALLSEQ_START
3023 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3024 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3025 .addImm(NumBytes).addImm(0);
3026
3027 // Process the args.
3028 for (CCValAssign &VA : ArgLocs) {
3029 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3030 MVT ArgVT = OutVTs[VA.getValNo()];
3031
3032 Register ArgReg = getRegForValue(ArgVal);
3033 if (!ArgReg)
3034 return false;
3035
3036 // Handle arg promotion: SExt, ZExt, AExt.
3037 switch (VA.getLocInfo()) {
3038 case CCValAssign::Full:
3039 break;
3040 case CCValAssign::SExt: {
3041 MVT DestVT = VA.getLocVT();
3042 MVT SrcVT = ArgVT;
3043 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3044 if (!ArgReg)
3045 return false;
3046 break;
3047 }
3048 case CCValAssign::AExt:
3049 // Intentional fall-through.
3050 case CCValAssign::ZExt: {
3051 MVT DestVT = VA.getLocVT();
3052 MVT SrcVT = ArgVT;
3053 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3054 if (!ArgReg)
3055 return false;
3056 break;
3057 }
3058 default:
3059 llvm_unreachable("Unknown arg promotion!");
3060 }
3061
3062 // Now copy/store arg to correct locations.
3063 if (VA.isRegLoc() && !VA.needsCustom()) {
3064 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3065 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3066 CLI.OutRegs.push_back(VA.getLocReg());
3067 } else if (VA.needsCustom()) {
3068 // FIXME: Handle custom args.
3069 return false;
3070 } else {
3071 assert(VA.isMemLoc() && "Assuming store on stack.");
3072
3073 // Don't emit stores for undef values.
3074 if (isa<UndefValue>(ArgVal))
3075 continue;
3076
3077 // Need to store on the stack.
3078 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3079
3080 unsigned BEAlign = 0;
3081 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3082 BEAlign = 8 - ArgSize;
3083
3084 Address Addr;
3085 Addr.setKind(Address::RegBase);
3086 Addr.setReg(AArch64::SP);
3087 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3088
3089 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3090 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3091 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3092 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3093
3094 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3095 return false;
3096 }
3097 }
3098 return true;
3099}
3100
3101bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3102 CallingConv::ID CC = CLI.CallConv;
3103
3104 // Issue CALLSEQ_END
3105 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3106 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3107 .addImm(NumBytes).addImm(0);
3108
3109 // Now the return values.
3111 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3112 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3113
3114 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3115 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3116 CCValAssign &VA = RVLocs[i];
3117 MVT CopyVT = VA.getValVT();
3118 Register CopyReg = ResultReg + i;
3119
3120 // TODO: Handle big-endian results
3121 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3122 return false;
3123
3124 // Copy result out of their specified physreg.
3125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3126 CopyReg)
3127 .addReg(VA.getLocReg());
3128 CLI.InRegs.push_back(VA.getLocReg());
3129 }
3130
3131 CLI.ResultReg = ResultReg;
3132 CLI.NumResultRegs = RVLocs.size();
3133
3134 return true;
3135}
3136
3137bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3138 CallingConv::ID CC = CLI.CallConv;
3139 bool IsTailCall = CLI.IsTailCall;
3140 bool IsVarArg = CLI.IsVarArg;
3141 const Value *Callee = CLI.Callee;
3142 MCSymbol *Symbol = CLI.Symbol;
3143
3144 if (!Callee && !Symbol)
3145 return false;
3146
3147 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3148 // a bti instruction following the call.
3149 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3150 !Subtarget->noBTIAtReturnTwice() &&
3151 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3152 return false;
3153
3154 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3155 if (CLI.CB && CLI.CB->isIndirectCall() &&
3156 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3157 return false;
3158
3159 // Allow SelectionDAG isel to handle tail calls.
3160 if (IsTailCall)
3161 return false;
3162
3163 // FIXME: we could and should support this, but for now correctness at -O0 is
3164 // more important.
3165 if (Subtarget->isTargetILP32())
3166 return false;
3167
3168 CodeModel::Model CM = TM.getCodeModel();
3169 // Only support the small-addressing and large code models.
3170 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171 return false;
3172
3173 // FIXME: Add large code model support for ELF.
3174 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175 return false;
3176
3177 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3178 // attribute. Check "RtLibUseGOT" instead.
3179 if (MF->getFunction().getParent()->getRtLibUseGOT())
3180 return false;
3181
3182 // Let SDISel handle vararg functions.
3183 if (IsVarArg)
3184 return false;
3185
3186 if (Subtarget->isWindowsArm64EC())
3187 return false;
3188
3189 for (auto Flag : CLI.OutFlags)
3190 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3191 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3192 return false;
3193
3194 // Set up the argument vectors.
3195 SmallVector<MVT, 16> OutVTs;
3197 OutVTs.reserve(CLI.OutVals.size());
3198
3199 for (auto *Val : CLI.OutVals) {
3200 MVT VT;
3201 if (!isTypeLegal(Val->getType(), VT) &&
3202 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3203 return false;
3204
3205 // We don't handle vector parameters yet.
3206 if (VT.isVector() || VT.getSizeInBits() > 64)
3207 return false;
3208
3209 OutVTs.push_back(VT);
3210 OrigTys.push_back(Val->getType());
3211 }
3212
3213 Address Addr;
3214 if (Callee && !computeCallAddress(Callee, Addr))
3215 return false;
3216
3217 // The weak function target may be zero; in that case we must use indirect
3218 // addressing via a stub on windows as it may be out of range for a
3219 // PC-relative jump.
3220 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3221 Addr.getGlobalValue()->hasExternalWeakLinkage())
3222 return false;
3223
3224 // Handle the arguments now that we've gotten them.
3225 unsigned NumBytes;
3226 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3227 return false;
3228
3229 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3230 if (RegInfo->isAnyArgRegReserved(*MF))
3231 RegInfo->emitReservedArgRegCallError(*MF);
3232
3233 // Issue the call.
3234 MachineInstrBuilder MIB;
3235 if (Subtarget->useSmallAddressing()) {
3236 const MCInstrDesc &II =
3237 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3238 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3239 if (Symbol)
3240 MIB.addSym(Symbol, 0);
3241 else if (Addr.getGlobalValue())
3242 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3243 else if (Addr.getReg()) {
3244 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3245 MIB.addReg(Reg);
3246 } else
3247 return false;
3248 } else {
3249 Register CallReg;
3250 if (Symbol) {
3251 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3253 ADRPReg)
3255
3256 CallReg = createResultReg(&AArch64::GPR64RegClass);
3257 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3258 TII.get(AArch64::LDRXui), CallReg)
3259 .addReg(ADRPReg)
3260 .addSym(Symbol,
3262 } else if (Addr.getGlobalValue())
3263 CallReg = materializeGV(Addr.getGlobalValue());
3264 else if (Addr.getReg())
3265 CallReg = Addr.getReg();
3266
3267 if (!CallReg)
3268 return false;
3269
3270 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3271 CallReg = constrainOperandRegClass(II, CallReg, 0);
3272 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3273 }
3274
3275 // Add implicit physical register uses to the call.
3276 for (auto Reg : CLI.OutRegs)
3278
3279 // Add a register mask with the call-preserved registers.
3280 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3281 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3282
3283 CLI.Call = MIB;
3284
3285 // Finish off the call including any return values.
3286 return finishCall(CLI, NumBytes);
3287}
3288
3289bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3290 if (Alignment)
3291 return Len / Alignment->value() <= 4;
3292 else
3293 return Len < 32;
3294}
3295
3296bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3297 uint64_t Len, MaybeAlign Alignment) {
3298 // Make sure we don't bloat code by inlining very large memcpy's.
3299 if (!isMemCpySmall(Len, Alignment))
3300 return false;
3301
3302 int64_t UnscaledOffset = 0;
3303 Address OrigDest = Dest;
3304 Address OrigSrc = Src;
3305
3306 while (Len) {
3307 MVT VT;
3308 if (!Alignment || *Alignment >= 8) {
3309 if (Len >= 8)
3310 VT = MVT::i64;
3311 else if (Len >= 4)
3312 VT = MVT::i32;
3313 else if (Len >= 2)
3314 VT = MVT::i16;
3315 else {
3316 VT = MVT::i8;
3317 }
3318 } else {
3319 assert(Alignment && "Alignment is set in this branch");
3320 // Bound based on alignment.
3321 if (Len >= 4 && *Alignment == 4)
3322 VT = MVT::i32;
3323 else if (Len >= 2 && *Alignment == 2)
3324 VT = MVT::i16;
3325 else {
3326 VT = MVT::i8;
3327 }
3328 }
3329
3330 Register ResultReg = emitLoad(VT, VT, Src);
3331 if (!ResultReg)
3332 return false;
3333
3334 if (!emitStore(VT, ResultReg, Dest))
3335 return false;
3336
3337 int64_t Size = VT.getSizeInBits() / 8;
3338 Len -= Size;
3339 UnscaledOffset += Size;
3340
3341 // We need to recompute the unscaled offset for each iteration.
3342 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3343 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3344 }
3345
3346 return true;
3347}
3348
3349/// Check if it is possible to fold the condition from the XALU intrinsic
3350/// into the user. The condition code will only be updated on success.
3351bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3352 const Instruction *I,
3353 const Value *Cond) {
3355 return false;
3356
3357 const auto *EV = cast<ExtractValueInst>(Cond);
3358 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3359 return false;
3360
3361 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3362 MVT RetVT;
3363 const Function *Callee = II->getCalledFunction();
3364 Type *RetTy =
3365 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3366 if (!isTypeLegal(RetTy, RetVT))
3367 return false;
3368
3369 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3370 return false;
3371
3372 const Value *LHS = II->getArgOperand(0);
3373 const Value *RHS = II->getArgOperand(1);
3374
3375 // Canonicalize immediate to the RHS.
3376 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3377 std::swap(LHS, RHS);
3378
3379 // Simplify multiplies.
3380 Intrinsic::ID IID = II->getIntrinsicID();
3381 switch (IID) {
3382 default:
3383 break;
3384 case Intrinsic::smul_with_overflow:
3385 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3386 if (C->getValue() == 2)
3387 IID = Intrinsic::sadd_with_overflow;
3388 break;
3389 case Intrinsic::umul_with_overflow:
3390 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3391 if (C->getValue() == 2)
3392 IID = Intrinsic::uadd_with_overflow;
3393 break;
3394 }
3395
3396 AArch64CC::CondCode TmpCC;
3397 switch (IID) {
3398 default:
3399 return false;
3400 case Intrinsic::sadd_with_overflow:
3401 case Intrinsic::ssub_with_overflow:
3402 TmpCC = AArch64CC::VS;
3403 break;
3404 case Intrinsic::uadd_with_overflow:
3405 TmpCC = AArch64CC::HS;
3406 break;
3407 case Intrinsic::usub_with_overflow:
3408 TmpCC = AArch64CC::LO;
3409 break;
3410 case Intrinsic::smul_with_overflow:
3411 case Intrinsic::umul_with_overflow:
3412 TmpCC = AArch64CC::NE;
3413 break;
3414 }
3415
3416 // Check if both instructions are in the same basic block.
3417 if (!isValueAvailable(II))
3418 return false;
3419
3420 // Make sure nothing is in the way
3423 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3424 // We only expect extractvalue instructions between the intrinsic and the
3425 // instruction to be selected.
3426 if (!isa<ExtractValueInst>(Itr))
3427 return false;
3428
3429 // Check that the extractvalue operand comes from the intrinsic.
3430 const auto *EVI = cast<ExtractValueInst>(Itr);
3431 if (EVI->getAggregateOperand() != II)
3432 return false;
3433 }
3434
3435 CC = TmpCC;
3436 return true;
3437}
3438
3439bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3440 // FIXME: Handle more intrinsics.
3441 switch (II->getIntrinsicID()) {
3442 default: return false;
3443 case Intrinsic::frameaddress: {
3444 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3445 MFI.setFrameAddressIsTaken(true);
3446
3447 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3448 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3449 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3451 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3452 // Recursively load frame address
3453 // ldr x0, [fp]
3454 // ldr x0, [x0]
3455 // ldr x0, [x0]
3456 // ...
3457 Register DestReg;
3458 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3459 while (Depth--) {
3460 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3461 SrcReg, 0);
3462 assert(DestReg && "Unexpected LDR instruction emission failure.");
3463 SrcReg = DestReg;
3464 }
3465
3466 updateValueMap(II, SrcReg);
3467 return true;
3468 }
3469 case Intrinsic::sponentry: {
3470 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3471
3472 // SP = FP + Fixed Object + 16
3473 int FI = MFI.CreateFixedObject(4, 0, false);
3474 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3476 TII.get(AArch64::ADDXri), ResultReg)
3477 .addFrameIndex(FI)
3478 .addImm(0)
3479 .addImm(0);
3480
3481 updateValueMap(II, ResultReg);
3482 return true;
3483 }
3484 case Intrinsic::memcpy:
3485 case Intrinsic::memmove: {
3486 const auto *MTI = cast<MemTransferInst>(II);
3487 // Don't handle volatile.
3488 if (MTI->isVolatile())
3489 return false;
3490
3491 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3492 // we would emit dead code because we don't currently handle memmoves.
3493 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3494 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3495 // Small memcpy's are common enough that we want to do them without a call
3496 // if possible.
3497 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3498 MaybeAlign Alignment;
3499 if (MTI->getDestAlign() || MTI->getSourceAlign())
3500 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3501 MTI->getSourceAlign().valueOrOne());
3502 if (isMemCpySmall(Len, Alignment)) {
3503 Address Dest, Src;
3504 if (!computeAddress(MTI->getRawDest(), Dest) ||
3505 !computeAddress(MTI->getRawSource(), Src))
3506 return false;
3507 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3508 return true;
3509 }
3510 }
3511
3512 if (!MTI->getLength()->getType()->isIntegerTy(64))
3513 return false;
3514
3515 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3516 // Fast instruction selection doesn't support the special
3517 // address spaces.
3518 return false;
3519
3520 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3521 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3522 }
3523 case Intrinsic::memset: {
3524 const MemSetInst *MSI = cast<MemSetInst>(II);
3525 // Don't handle volatile.
3526 if (MSI->isVolatile())
3527 return false;
3528
3529 if (!MSI->getLength()->getType()->isIntegerTy(64))
3530 return false;
3531
3532 if (MSI->getDestAddressSpace() > 255)
3533 // Fast instruction selection doesn't support the special
3534 // address spaces.
3535 return false;
3536
3537 return lowerCallTo(II, "memset", II->arg_size() - 1);
3538 }
3539 case Intrinsic::sin:
3540 case Intrinsic::cos:
3541 case Intrinsic::tan:
3542 case Intrinsic::pow: {
3543 MVT RetVT;
3544 if (!isTypeLegal(II->getType(), RetVT))
3545 return false;
3546
3547 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3548 return false;
3549
3550 static const RTLIB::Libcall LibCallTable[4][2] = {
3551 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3552 {RTLIB::COS_F32, RTLIB::COS_F64},
3553 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3554 {RTLIB::POW_F32, RTLIB::POW_F64}};
3555 RTLIB::Libcall LC;
3556 bool Is64Bit = RetVT == MVT::f64;
3557 switch (II->getIntrinsicID()) {
3558 default:
3559 llvm_unreachable("Unexpected intrinsic.");
3560 case Intrinsic::sin:
3561 LC = LibCallTable[0][Is64Bit];
3562 break;
3563 case Intrinsic::cos:
3564 LC = LibCallTable[1][Is64Bit];
3565 break;
3566 case Intrinsic::tan:
3567 LC = LibCallTable[2][Is64Bit];
3568 break;
3569 case Intrinsic::pow:
3570 LC = LibCallTable[3][Is64Bit];
3571 break;
3572 }
3573
3574 ArgListTy Args;
3575 Args.reserve(II->arg_size());
3576
3577 // Populate the argument list.
3578 for (auto &Arg : II->args())
3579 Args.emplace_back(Arg);
3580
3581 CallLoweringInfo CLI;
3582 MCContext &Ctx = MF->getContext();
3583 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3584 TLI.getLibcallName(LC), std::move(Args));
3585 if (!lowerCallTo(CLI))
3586 return false;
3587 updateValueMap(II, CLI.ResultReg);
3588 return true;
3589 }
3590 case Intrinsic::fabs: {
3591 MVT VT;
3592 if (!isTypeLegal(II->getType(), VT))
3593 return false;
3594
3595 unsigned Opc;
3596 switch (VT.SimpleTy) {
3597 default:
3598 return false;
3599 case MVT::f32:
3600 Opc = AArch64::FABSSr;
3601 break;
3602 case MVT::f64:
3603 Opc = AArch64::FABSDr;
3604 break;
3605 }
3606 Register SrcReg = getRegForValue(II->getOperand(0));
3607 if (!SrcReg)
3608 return false;
3609 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3611 .addReg(SrcReg);
3612 updateValueMap(II, ResultReg);
3613 return true;
3614 }
3615 case Intrinsic::trap:
3616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3617 .addImm(1);
3618 return true;
3619 case Intrinsic::debugtrap:
3620 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3621 .addImm(0xF000);
3622 return true;
3623
3624 case Intrinsic::sqrt: {
3625 Type *RetTy = II->getCalledFunction()->getReturnType();
3626
3627 MVT VT;
3628 if (!isTypeLegal(RetTy, VT))
3629 return false;
3630
3631 Register Op0Reg = getRegForValue(II->getOperand(0));
3632 if (!Op0Reg)
3633 return false;
3634
3635 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3636 if (!ResultReg)
3637 return false;
3638
3639 updateValueMap(II, ResultReg);
3640 return true;
3641 }
3642 case Intrinsic::sadd_with_overflow:
3643 case Intrinsic::uadd_with_overflow:
3644 case Intrinsic::ssub_with_overflow:
3645 case Intrinsic::usub_with_overflow:
3646 case Intrinsic::smul_with_overflow:
3647 case Intrinsic::umul_with_overflow: {
3648 // This implements the basic lowering of the xalu with overflow intrinsics.
3649 const Function *Callee = II->getCalledFunction();
3650 auto *Ty = cast<StructType>(Callee->getReturnType());
3651 Type *RetTy = Ty->getTypeAtIndex(0U);
3652
3653 MVT VT;
3654 if (!isTypeLegal(RetTy, VT))
3655 return false;
3656
3657 if (VT != MVT::i32 && VT != MVT::i64)
3658 return false;
3659
3660 const Value *LHS = II->getArgOperand(0);
3661 const Value *RHS = II->getArgOperand(1);
3662 // Canonicalize immediate to the RHS.
3663 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3664 std::swap(LHS, RHS);
3665
3666 // Simplify multiplies.
3667 Intrinsic::ID IID = II->getIntrinsicID();
3668 switch (IID) {
3669 default:
3670 break;
3671 case Intrinsic::smul_with_overflow:
3672 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3673 if (C->getValue() == 2) {
3674 IID = Intrinsic::sadd_with_overflow;
3675 RHS = LHS;
3676 }
3677 break;
3678 case Intrinsic::umul_with_overflow:
3679 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3680 if (C->getValue() == 2) {
3681 IID = Intrinsic::uadd_with_overflow;
3682 RHS = LHS;
3683 }
3684 break;
3685 }
3686
3687 Register ResultReg1, ResultReg2, MulReg;
3689 switch (IID) {
3690 default: llvm_unreachable("Unexpected intrinsic!");
3691 case Intrinsic::sadd_with_overflow:
3692 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3693 CC = AArch64CC::VS;
3694 break;
3695 case Intrinsic::uadd_with_overflow:
3696 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3697 CC = AArch64CC::HS;
3698 break;
3699 case Intrinsic::ssub_with_overflow:
3700 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::VS;
3702 break;
3703 case Intrinsic::usub_with_overflow:
3704 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3705 CC = AArch64CC::LO;
3706 break;
3707 case Intrinsic::smul_with_overflow: {
3708 CC = AArch64CC::NE;
3709 Register LHSReg = getRegForValue(LHS);
3710 if (!LHSReg)
3711 return false;
3712
3713 Register RHSReg = getRegForValue(RHS);
3714 if (!RHSReg)
3715 return false;
3716
3717 if (VT == MVT::i32) {
3718 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3719 Register MulSubReg =
3720 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3721 // cmp xreg, wreg, sxtw
3722 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3723 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3724 /*WantResult=*/false);
3725 MulReg = MulSubReg;
3726 } else {
3727 assert(VT == MVT::i64 && "Unexpected value type.");
3728 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3729 // reused in the next instruction.
3730 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3731 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3732 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3733 /*WantResult=*/false);
3734 }
3735 break;
3736 }
3737 case Intrinsic::umul_with_overflow: {
3738 CC = AArch64CC::NE;
3739 Register LHSReg = getRegForValue(LHS);
3740 if (!LHSReg)
3741 return false;
3742
3743 Register RHSReg = getRegForValue(RHS);
3744 if (!RHSReg)
3745 return false;
3746
3747 if (VT == MVT::i32) {
3748 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3749 // tst xreg, #0xffffffff00000000
3750 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3751 TII.get(AArch64::ANDSXri), AArch64::XZR)
3752 .addReg(MulReg)
3753 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3754 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3755 } else {
3756 assert(VT == MVT::i64 && "Unexpected value type.");
3757 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758 // reused in the next instruction.
3759 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3760 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3761 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3762 }
3763 break;
3764 }
3765 }
3766
3767 if (MulReg) {
3768 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3769 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3770 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3771 }
3772
3773 if (!ResultReg1)
3774 return false;
3775
3776 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3777 AArch64::WZR, AArch64::WZR,
3778 getInvertedCondCode(CC));
3779 (void)ResultReg2;
3780 assert((ResultReg1 + 1) == ResultReg2 &&
3781 "Nonconsecutive result registers.");
3782 updateValueMap(II, ResultReg1, 2);
3783 return true;
3784 }
3785 case Intrinsic::aarch64_crc32b:
3786 case Intrinsic::aarch64_crc32h:
3787 case Intrinsic::aarch64_crc32w:
3788 case Intrinsic::aarch64_crc32x:
3789 case Intrinsic::aarch64_crc32cb:
3790 case Intrinsic::aarch64_crc32ch:
3791 case Intrinsic::aarch64_crc32cw:
3792 case Intrinsic::aarch64_crc32cx: {
3793 if (!Subtarget->hasCRC())
3794 return false;
3795
3796 unsigned Opc;
3797 switch (II->getIntrinsicID()) {
3798 default:
3799 llvm_unreachable("Unexpected intrinsic!");
3800 case Intrinsic::aarch64_crc32b:
3801 Opc = AArch64::CRC32Brr;
3802 break;
3803 case Intrinsic::aarch64_crc32h:
3804 Opc = AArch64::CRC32Hrr;
3805 break;
3806 case Intrinsic::aarch64_crc32w:
3807 Opc = AArch64::CRC32Wrr;
3808 break;
3809 case Intrinsic::aarch64_crc32x:
3810 Opc = AArch64::CRC32Xrr;
3811 break;
3812 case Intrinsic::aarch64_crc32cb:
3813 Opc = AArch64::CRC32CBrr;
3814 break;
3815 case Intrinsic::aarch64_crc32ch:
3816 Opc = AArch64::CRC32CHrr;
3817 break;
3818 case Intrinsic::aarch64_crc32cw:
3819 Opc = AArch64::CRC32CWrr;
3820 break;
3821 case Intrinsic::aarch64_crc32cx:
3822 Opc = AArch64::CRC32CXrr;
3823 break;
3824 }
3825
3826 Register LHSReg = getRegForValue(II->getArgOperand(0));
3827 Register RHSReg = getRegForValue(II->getArgOperand(1));
3828 if (!LHSReg || !RHSReg)
3829 return false;
3830
3831 Register ResultReg =
3832 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3833 updateValueMap(II, ResultReg);
3834 return true;
3835 }
3836 }
3837 return false;
3838}
3839
3840bool AArch64FastISel::selectRet(const Instruction *I) {
3841 const ReturnInst *Ret = cast<ReturnInst>(I);
3842 const Function &F = *I->getParent()->getParent();
3843
3844 if (!FuncInfo.CanLowerReturn)
3845 return false;
3846
3847 if (F.isVarArg())
3848 return false;
3849
3850 if (TLI.supportSwiftError() &&
3851 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3852 return false;
3853
3854 if (TLI.supportSplitCSR(FuncInfo.MF))
3855 return false;
3856
3857 // Build a list of return value registers.
3859
3860 if (Ret->getNumOperands() > 0) {
3861 CallingConv::ID CC = F.getCallingConv();
3863 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3864
3865 // Analyze operands of the call, assigning locations to each operand.
3867 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3868 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3869
3870 // Only handle a single return value for now.
3871 if (ValLocs.size() != 1)
3872 return false;
3873
3874 CCValAssign &VA = ValLocs[0];
3875 const Value *RV = Ret->getOperand(0);
3876
3877 // Don't bother handling odd stuff for now.
3878 if ((VA.getLocInfo() != CCValAssign::Full) &&
3879 (VA.getLocInfo() != CCValAssign::BCvt))
3880 return false;
3881
3882 // Only handle register returns for now.
3883 if (!VA.isRegLoc())
3884 return false;
3885
3886 Register Reg = getRegForValue(RV);
3887 if (!Reg)
3888 return false;
3889
3890 Register SrcReg = Reg + VA.getValNo();
3891 Register DestReg = VA.getLocReg();
3892 // Avoid a cross-class copy. This is very unlikely.
3893 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3894 return false;
3895
3896 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3897 if (!RVEVT.isSimple())
3898 return false;
3899
3900 // Vectors (of > 1 lane) in big endian need tricky handling.
3901 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3902 !Subtarget->isLittleEndian())
3903 return false;
3904
3905 MVT RVVT = RVEVT.getSimpleVT();
3906 if (RVVT == MVT::f128)
3907 return false;
3908
3909 MVT DestVT = VA.getValVT();
3910 // Special handling for extended integers.
3911 if (RVVT != DestVT) {
3912 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3913 return false;
3914
3915 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3916 return false;
3917
3918 bool IsZExt = Outs[0].Flags.isZExt();
3919 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3920 if (!SrcReg)
3921 return false;
3922 }
3923
3924 // "Callee" (i.e. value producer) zero extends pointers at function
3925 // boundary.
3926 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3927 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3928
3929 // Make the copy.
3930 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3931 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3932
3933 // Add register to return instruction.
3934 RetRegs.push_back(VA.getLocReg());
3935 }
3936
3937 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3938 TII.get(AArch64::RET_ReallyLR));
3939 for (Register RetReg : RetRegs)
3940 MIB.addReg(RetReg, RegState::Implicit);
3941 return true;
3942}
3943
3944bool AArch64FastISel::selectTrunc(const Instruction *I) {
3945 Type *DestTy = I->getType();
3946 Value *Op = I->getOperand(0);
3947 Type *SrcTy = Op->getType();
3948
3949 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3950 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3951 if (!SrcEVT.isSimple())
3952 return false;
3953 if (!DestEVT.isSimple())
3954 return false;
3955
3956 MVT SrcVT = SrcEVT.getSimpleVT();
3957 MVT DestVT = DestEVT.getSimpleVT();
3958
3959 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3960 SrcVT != MVT::i8)
3961 return false;
3962 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3963 DestVT != MVT::i1)
3964 return false;
3965
3966 Register SrcReg = getRegForValue(Op);
3967 if (!SrcReg)
3968 return false;
3969
3970 // If we're truncating from i64 to a smaller non-legal type then generate an
3971 // AND. Otherwise, we know the high bits are undefined and a truncate only
3972 // generate a COPY. We cannot mark the source register also as result
3973 // register, because this can incorrectly transfer the kill flag onto the
3974 // source register.
3975 Register ResultReg;
3976 if (SrcVT == MVT::i64) {
3977 uint64_t Mask = 0;
3978 switch (DestVT.SimpleTy) {
3979 default:
3980 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3981 return false;
3982 case MVT::i1:
3983 Mask = 0x1;
3984 break;
3985 case MVT::i8:
3986 Mask = 0xff;
3987 break;
3988 case MVT::i16:
3989 Mask = 0xffff;
3990 break;
3991 }
3992 // Issue an extract_subreg to get the lower 32-bits.
3993 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3994 AArch64::sub_32);
3995 // Create the AND instruction which performs the actual truncation.
3996 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3997 assert(ResultReg && "Unexpected AND instruction emission failure.");
3998 } else {
3999 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4000 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4001 TII.get(TargetOpcode::COPY), ResultReg)
4002 .addReg(SrcReg);
4003 }
4004
4005 updateValueMap(I, ResultReg);
4006 return true;
4007}
4008
4009Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4010 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4011 DestVT == MVT::i64) &&
4012 "Unexpected value type.");
4013 // Handle i8 and i16 as i32.
4014 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4015 DestVT = MVT::i32;
4016
4017 if (IsZExt) {
4018 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4019 assert(ResultReg && "Unexpected AND instruction emission failure.");
4020 if (DestVT == MVT::i64) {
4021 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4022 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4023 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4024 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4025 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4026 .addImm(0)
4027 .addReg(ResultReg)
4028 .addImm(AArch64::sub_32);
4029 ResultReg = Reg64;
4030 }
4031 return ResultReg;
4032 } else {
4033 if (DestVT == MVT::i64) {
4034 // FIXME: We're SExt i1 to i64.
4035 return Register();
4036 }
4037 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4038 0, 0);
4039 }
4040}
4041
4042Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4043 unsigned Opc;
4044 Register ZReg;
4045 switch (RetVT.SimpleTy) {
4046 default:
4047 return Register();
4048 case MVT::i8:
4049 case MVT::i16:
4050 case MVT::i32:
4051 RetVT = MVT::i32;
4052 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4053 case MVT::i64:
4054 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4055 }
4056
4057 const TargetRegisterClass *RC =
4058 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4059 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4060}
4061
4062Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4063 if (RetVT != MVT::i64)
4064 return Register();
4065
4066 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4067 Op0, Op1, AArch64::XZR);
4068}
4069
4070Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4071 if (RetVT != MVT::i64)
4072 return Register();
4073
4074 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4075 Op0, Op1, AArch64::XZR);
4076}
4077
4078Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4079 Register Op1Reg) {
4080 unsigned Opc = 0;
4081 bool NeedTrunc = false;
4082 uint64_t Mask = 0;
4083 switch (RetVT.SimpleTy) {
4084 default:
4085 return Register();
4086 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4087 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4088 case MVT::i32: Opc = AArch64::LSLVWr; break;
4089 case MVT::i64: Opc = AArch64::LSLVXr; break;
4090 }
4091
4092 const TargetRegisterClass *RC =
4093 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4094 if (NeedTrunc)
4095 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4096
4097 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4098 if (NeedTrunc)
4099 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4100 return ResultReg;
4101}
4102
4103Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4104 uint64_t Shift, bool IsZExt) {
4105 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4106 "Unexpected source/return type pair.");
4107 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4108 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4109 "Unexpected source value type.");
4110 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4111 RetVT == MVT::i64) && "Unexpected return value type.");
4112
4113 bool Is64Bit = (RetVT == MVT::i64);
4114 unsigned RegSize = Is64Bit ? 64 : 32;
4115 unsigned DstBits = RetVT.getSizeInBits();
4116 unsigned SrcBits = SrcVT.getSizeInBits();
4117 const TargetRegisterClass *RC =
4118 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4119
4120 // Just emit a copy for "zero" shifts.
4121 if (Shift == 0) {
4122 if (RetVT == SrcVT) {
4123 Register ResultReg = createResultReg(RC);
4124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4125 TII.get(TargetOpcode::COPY), ResultReg)
4126 .addReg(Op0);
4127 return ResultReg;
4128 } else
4129 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4130 }
4131
4132 // Don't deal with undefined shifts.
4133 if (Shift >= DstBits)
4134 return Register();
4135
4136 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4137 // {S|U}BFM Wd, Wn, #r, #s
4138 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4139
4140 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4141 // %2 = shl i16 %1, 4
4142 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4143 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4144 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4145 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4146
4147 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4148 // %2 = shl i16 %1, 8
4149 // Wd<32+7-24,32-24> = Wn<7:0>
4150 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4151 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4152 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4153
4154 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4155 // %2 = shl i16 %1, 12
4156 // Wd<32+3-20,32-20> = Wn<3:0>
4157 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4158 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4159 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4160
4161 unsigned ImmR = RegSize - Shift;
4162 // Limit the width to the length of the source type.
4163 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4164 static const unsigned OpcTable[2][2] = {
4165 {AArch64::SBFMWri, AArch64::SBFMXri},
4166 {AArch64::UBFMWri, AArch64::UBFMXri}
4167 };
4168 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4169 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4170 Register TmpReg = MRI.createVirtualRegister(RC);
4171 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4172 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4173 .addImm(0)
4174 .addReg(Op0)
4175 .addImm(AArch64::sub_32);
4176 Op0 = TmpReg;
4177 }
4178 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4179}
4180
4181Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4182 Register Op1Reg) {
4183 unsigned Opc = 0;
4184 bool NeedTrunc = false;
4185 uint64_t Mask = 0;
4186 switch (RetVT.SimpleTy) {
4187 default:
4188 return Register();
4189 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4190 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4191 case MVT::i32: Opc = AArch64::LSRVWr; break;
4192 case MVT::i64: Opc = AArch64::LSRVXr; break;
4193 }
4194
4195 const TargetRegisterClass *RC =
4196 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4197 if (NeedTrunc) {
4198 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4199 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4200 }
4201 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4202 if (NeedTrunc)
4203 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4204 return ResultReg;
4205}
4206
4207Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4208 uint64_t Shift, bool IsZExt) {
4209 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4210 "Unexpected source/return type pair.");
4211 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4212 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4213 "Unexpected source value type.");
4214 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4215 RetVT == MVT::i64) && "Unexpected return value type.");
4216
4217 bool Is64Bit = (RetVT == MVT::i64);
4218 unsigned RegSize = Is64Bit ? 64 : 32;
4219 unsigned DstBits = RetVT.getSizeInBits();
4220 unsigned SrcBits = SrcVT.getSizeInBits();
4221 const TargetRegisterClass *RC =
4222 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4223
4224 // Just emit a copy for "zero" shifts.
4225 if (Shift == 0) {
4226 if (RetVT == SrcVT) {
4227 Register ResultReg = createResultReg(RC);
4228 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4229 TII.get(TargetOpcode::COPY), ResultReg)
4230 .addReg(Op0);
4231 return ResultReg;
4232 } else
4233 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4234 }
4235
4236 // Don't deal with undefined shifts.
4237 if (Shift >= DstBits)
4238 return Register();
4239
4240 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4241 // {S|U}BFM Wd, Wn, #r, #s
4242 // Wd<s-r:0> = Wn<s:r> when r <= s
4243
4244 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4245 // %2 = lshr i16 %1, 4
4246 // Wd<7-4:0> = Wn<7:4>
4247 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4250
4251 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4252 // %2 = lshr i16 %1, 8
4253 // Wd<7-7,0> = Wn<7:7>
4254 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4257
4258 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4259 // %2 = lshr i16 %1, 12
4260 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4261 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4262 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4264
4265 if (Shift >= SrcBits && IsZExt)
4266 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4267
4268 // It is not possible to fold a sign-extend into the LShr instruction. In this
4269 // case emit a sign-extend.
4270 if (!IsZExt) {
4271 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4272 if (!Op0)
4273 return Register();
4274 SrcVT = RetVT;
4275 SrcBits = SrcVT.getSizeInBits();
4276 IsZExt = true;
4277 }
4278
4279 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4280 unsigned ImmS = SrcBits - 1;
4281 static const unsigned OpcTable[2][2] = {
4282 {AArch64::SBFMWri, AArch64::SBFMXri},
4283 {AArch64::UBFMWri, AArch64::UBFMXri}
4284 };
4285 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4286 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4287 Register TmpReg = MRI.createVirtualRegister(RC);
4288 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4289 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4290 .addImm(0)
4291 .addReg(Op0)
4292 .addImm(AArch64::sub_32);
4293 Op0 = TmpReg;
4294 }
4295 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4296}
4297
4298Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4299 Register Op1Reg) {
4300 unsigned Opc = 0;
4301 bool NeedTrunc = false;
4302 uint64_t Mask = 0;
4303 switch (RetVT.SimpleTy) {
4304 default:
4305 return Register();
4306 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4307 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4308 case MVT::i32: Opc = AArch64::ASRVWr; break;
4309 case MVT::i64: Opc = AArch64::ASRVXr; break;
4310 }
4311
4312 const TargetRegisterClass *RC =
4313 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4314 if (NeedTrunc) {
4315 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4316 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4317 }
4318 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4319 if (NeedTrunc)
4320 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4321 return ResultReg;
4322}
4323
4324Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4325 uint64_t Shift, bool IsZExt) {
4326 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4327 "Unexpected source/return type pair.");
4328 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4329 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4330 "Unexpected source value type.");
4331 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4332 RetVT == MVT::i64) && "Unexpected return value type.");
4333
4334 bool Is64Bit = (RetVT == MVT::i64);
4335 unsigned RegSize = Is64Bit ? 64 : 32;
4336 unsigned DstBits = RetVT.getSizeInBits();
4337 unsigned SrcBits = SrcVT.getSizeInBits();
4338 const TargetRegisterClass *RC =
4339 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4340
4341 // Just emit a copy for "zero" shifts.
4342 if (Shift == 0) {
4343 if (RetVT == SrcVT) {
4344 Register ResultReg = createResultReg(RC);
4345 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4346 TII.get(TargetOpcode::COPY), ResultReg)
4347 .addReg(Op0);
4348 return ResultReg;
4349 } else
4350 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4351 }
4352
4353 // Don't deal with undefined shifts.
4354 if (Shift >= DstBits)
4355 return Register();
4356
4357 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4358 // {S|U}BFM Wd, Wn, #r, #s
4359 // Wd<s-r:0> = Wn<s:r> when r <= s
4360
4361 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362 // %2 = ashr i16 %1, 4
4363 // Wd<7-4:0> = Wn<7:4>
4364 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4366 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4367
4368 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369 // %2 = ashr i16 %1, 8
4370 // Wd<7-7,0> = Wn<7:7>
4371 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4373 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4374
4375 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376 // %2 = ashr i16 %1, 12
4377 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4378 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381
4382 if (Shift >= SrcBits && IsZExt)
4383 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4384
4385 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4386 unsigned ImmS = SrcBits - 1;
4387 static const unsigned OpcTable[2][2] = {
4388 {AArch64::SBFMWri, AArch64::SBFMXri},
4389 {AArch64::UBFMWri, AArch64::UBFMXri}
4390 };
4391 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4392 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4393 Register TmpReg = MRI.createVirtualRegister(RC);
4394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4395 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4396 .addImm(0)
4397 .addReg(Op0)
4398 .addImm(AArch64::sub_32);
4399 Op0 = TmpReg;
4400 }
4401 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4402}
4403
4404Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4405 bool IsZExt) {
4406 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4407
4408 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4409 // DestVT are odd things, so test to make sure that they are both types we can
4410 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4411 // bail out to SelectionDAG.
4412 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4413 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4414 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4415 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4416 return Register();
4417
4418 unsigned Opc;
4419 unsigned Imm = 0;
4420
4421 switch (SrcVT.SimpleTy) {
4422 default:
4423 return Register();
4424 case MVT::i1:
4425 return emiti1Ext(SrcReg, DestVT, IsZExt);
4426 case MVT::i8:
4427 if (DestVT == MVT::i64)
4428 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429 else
4430 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4431 Imm = 7;
4432 break;
4433 case MVT::i16:
4434 if (DestVT == MVT::i64)
4435 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4436 else
4437 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4438 Imm = 15;
4439 break;
4440 case MVT::i32:
4441 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4442 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4443 Imm = 31;
4444 break;
4445 }
4446
4447 // Handle i8 and i16 as i32.
4448 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4449 DestVT = MVT::i32;
4450 else if (DestVT == MVT::i64) {
4451 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4453 TII.get(AArch64::SUBREG_TO_REG), Src64)
4454 .addImm(0)
4455 .addReg(SrcReg)
4456 .addImm(AArch64::sub_32);
4457 SrcReg = Src64;
4458 }
4459
4460 const TargetRegisterClass *RC =
4461 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4462 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4463}
4464
4465static bool isZExtLoad(const MachineInstr *LI) {
4466 switch (LI->getOpcode()) {
4467 default:
4468 return false;
4469 case AArch64::LDURBBi:
4470 case AArch64::LDURHHi:
4471 case AArch64::LDURWi:
4472 case AArch64::LDRBBui:
4473 case AArch64::LDRHHui:
4474 case AArch64::LDRWui:
4475 case AArch64::LDRBBroX:
4476 case AArch64::LDRHHroX:
4477 case AArch64::LDRWroX:
4478 case AArch64::LDRBBroW:
4479 case AArch64::LDRHHroW:
4480 case AArch64::LDRWroW:
4481 return true;
4482 }
4483}
4484
4485static bool isSExtLoad(const MachineInstr *LI) {
4486 switch (LI->getOpcode()) {
4487 default:
4488 return false;
4489 case AArch64::LDURSBWi:
4490 case AArch64::LDURSHWi:
4491 case AArch64::LDURSBXi:
4492 case AArch64::LDURSHXi:
4493 case AArch64::LDURSWi:
4494 case AArch64::LDRSBWui:
4495 case AArch64::LDRSHWui:
4496 case AArch64::LDRSBXui:
4497 case AArch64::LDRSHXui:
4498 case AArch64::LDRSWui:
4499 case AArch64::LDRSBWroX:
4500 case AArch64::LDRSHWroX:
4501 case AArch64::LDRSBXroX:
4502 case AArch64::LDRSHXroX:
4503 case AArch64::LDRSWroX:
4504 case AArch64::LDRSBWroW:
4505 case AArch64::LDRSHWroW:
4506 case AArch64::LDRSBXroW:
4507 case AArch64::LDRSHXroW:
4508 case AArch64::LDRSWroW:
4509 return true;
4510 }
4511}
4512
4513bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4514 MVT SrcVT) {
4515 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4516 if (!LI || !LI->hasOneUse())
4517 return false;
4518
4519 // Check if the load instruction has already been selected.
4520 Register Reg = lookUpRegForValue(LI);
4521 if (!Reg)
4522 return false;
4523
4524 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4525 if (!MI)
4526 return false;
4527
4528 // Check if the correct load instruction has been emitted - SelectionDAG might
4529 // have emitted a zero-extending load, but we need a sign-extending load.
4530 bool IsZExt = isa<ZExtInst>(I);
4531 const auto *LoadMI = MI;
4532 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4533 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4534 Register LoadReg = MI->getOperand(1).getReg();
4535 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4536 assert(LoadMI && "Expected valid instruction");
4537 }
4538 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4539 return false;
4540
4541 // Nothing to be done.
4542 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4543 updateValueMap(I, Reg);
4544 return true;
4545 }
4546
4547 if (IsZExt) {
4548 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4550 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4551 .addImm(0)
4552 .addReg(Reg, getKillRegState(true))
4553 .addImm(AArch64::sub_32);
4554 Reg = Reg64;
4555 } else {
4556 assert((MI->getOpcode() == TargetOpcode::COPY &&
4557 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558 "Expected copy instruction");
4559 Reg = MI->getOperand(1).getReg();
4561 removeDeadCode(I, std::next(I));
4562 }
4563 updateValueMap(I, Reg);
4564 return true;
4565}
4566
4567bool AArch64FastISel::selectIntExt(const Instruction *I) {
4569 "Unexpected integer extend instruction.");
4570 MVT RetVT;
4571 MVT SrcVT;
4572 if (!isTypeSupported(I->getType(), RetVT))
4573 return false;
4574
4575 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576 return false;
4577
4578 // Try to optimize already sign-/zero-extended values from load instructions.
4579 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580 return true;
4581
4582 Register SrcReg = getRegForValue(I->getOperand(0));
4583 if (!SrcReg)
4584 return false;
4585
4586 // Try to optimize already sign-/zero-extended values from function arguments.
4587 bool IsZExt = isa<ZExtInst>(I);
4588 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594 .addImm(0)
4595 .addReg(SrcReg)
4596 .addImm(AArch64::sub_32);
4597 SrcReg = ResultReg;
4598 }
4599
4600 updateValueMap(I, SrcReg);
4601 return true;
4602 }
4603 }
4604
4605 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4606 if (!ResultReg)
4607 return false;
4608
4609 updateValueMap(I, ResultReg);
4610 return true;
4611}
4612
4613bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4614 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4615 if (!DestEVT.isSimple())
4616 return false;
4617
4618 MVT DestVT = DestEVT.getSimpleVT();
4619 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4620 return false;
4621
4622 unsigned DivOpc;
4623 bool Is64bit = (DestVT == MVT::i64);
4624 switch (ISDOpcode) {
4625 default:
4626 return false;
4627 case ISD::SREM:
4628 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4629 break;
4630 case ISD::UREM:
4631 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4632 break;
4633 }
4634 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4635 Register Src0Reg = getRegForValue(I->getOperand(0));
4636 if (!Src0Reg)
4637 return false;
4638
4639 Register Src1Reg = getRegForValue(I->getOperand(1));
4640 if (!Src1Reg)
4641 return false;
4642
4643 const TargetRegisterClass *RC =
4644 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4645 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4646 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4647 // The remainder is computed as numerator - (quotient * denominator) using the
4648 // MSUB instruction.
4649 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4650 updateValueMap(I, ResultReg);
4651 return true;
4652}
4653
4654bool AArch64FastISel::selectMul(const Instruction *I) {
4655 MVT VT;
4656 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4657 return false;
4658
4659 if (VT.isVector())
4660 return selectBinaryOp(I, ISD::MUL);
4661
4662 const Value *Src0 = I->getOperand(0);
4663 const Value *Src1 = I->getOperand(1);
4664 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4665 if (C->getValue().isPowerOf2())
4666 std::swap(Src0, Src1);
4667
4668 // Try to simplify to a shift instruction.
4669 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4670 if (C->getValue().isPowerOf2()) {
4671 uint64_t ShiftVal = C->getValue().logBase2();
4672 MVT SrcVT = VT;
4673 bool IsZExt = true;
4674 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4675 if (!isIntExtFree(ZExt)) {
4676 MVT VT;
4677 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4678 SrcVT = VT;
4679 IsZExt = true;
4680 Src0 = ZExt->getOperand(0);
4681 }
4682 }
4683 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4684 if (!isIntExtFree(SExt)) {
4685 MVT VT;
4686 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4687 SrcVT = VT;
4688 IsZExt = false;
4689 Src0 = SExt->getOperand(0);
4690 }
4691 }
4692 }
4693
4694 Register Src0Reg = getRegForValue(Src0);
4695 if (!Src0Reg)
4696 return false;
4697
4698 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4699
4700 if (ResultReg) {
4701 updateValueMap(I, ResultReg);
4702 return true;
4703 }
4704 }
4705
4706 Register Src0Reg = getRegForValue(I->getOperand(0));
4707 if (!Src0Reg)
4708 return false;
4709
4710 Register Src1Reg = getRegForValue(I->getOperand(1));
4711 if (!Src1Reg)
4712 return false;
4713
4714 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4715
4716 if (!ResultReg)
4717 return false;
4718
4719 updateValueMap(I, ResultReg);
4720 return true;
4721}
4722
4723bool AArch64FastISel::selectShift(const Instruction *I) {
4724 MVT RetVT;
4725 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4726 return false;
4727
4728 if (RetVT.isVector())
4729 return selectOperator(I, I->getOpcode());
4730
4731 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4732 Register ResultReg;
4733 uint64_t ShiftVal = C->getZExtValue();
4734 MVT SrcVT = RetVT;
4735 bool IsZExt = I->getOpcode() != Instruction::AShr;
4736 const Value *Op0 = I->getOperand(0);
4737 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4738 if (!isIntExtFree(ZExt)) {
4739 MVT TmpVT;
4740 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4741 SrcVT = TmpVT;
4742 IsZExt = true;
4743 Op0 = ZExt->getOperand(0);
4744 }
4745 }
4746 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4747 if (!isIntExtFree(SExt)) {
4748 MVT TmpVT;
4749 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4750 SrcVT = TmpVT;
4751 IsZExt = false;
4752 Op0 = SExt->getOperand(0);
4753 }
4754 }
4755 }
4756
4757 Register Op0Reg = getRegForValue(Op0);
4758 if (!Op0Reg)
4759 return false;
4760
4761 switch (I->getOpcode()) {
4762 default: llvm_unreachable("Unexpected instruction.");
4763 case Instruction::Shl:
4764 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4765 break;
4766 case Instruction::AShr:
4767 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4768 break;
4769 case Instruction::LShr:
4770 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4771 break;
4772 }
4773 if (!ResultReg)
4774 return false;
4775
4776 updateValueMap(I, ResultReg);
4777 return true;
4778 }
4779
4780 Register Op0Reg = getRegForValue(I->getOperand(0));
4781 if (!Op0Reg)
4782 return false;
4783
4784 Register Op1Reg = getRegForValue(I->getOperand(1));
4785 if (!Op1Reg)
4786 return false;
4787
4788 Register ResultReg;
4789 switch (I->getOpcode()) {
4790 default: llvm_unreachable("Unexpected instruction.");
4791 case Instruction::Shl:
4792 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4793 break;
4794 case Instruction::AShr:
4795 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4796 break;
4797 case Instruction::LShr:
4798 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4799 break;
4800 }
4801
4802 if (!ResultReg)
4803 return false;
4804
4805 updateValueMap(I, ResultReg);
4806 return true;
4807}
4808
4809bool AArch64FastISel::selectBitCast(const Instruction *I) {
4810 MVT RetVT, SrcVT;
4811
4812 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4813 return false;
4814 if (!isTypeLegal(I->getType(), RetVT))
4815 return false;
4816
4817 unsigned Opc;
4818 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4819 Opc = AArch64::FMOVWSr;
4820 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4821 Opc = AArch64::FMOVXDr;
4822 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4823 Opc = AArch64::FMOVSWr;
4824 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4825 Opc = AArch64::FMOVDXr;
4826 else
4827 return false;
4828
4829 const TargetRegisterClass *RC = nullptr;
4830 switch (RetVT.SimpleTy) {
4831 default: llvm_unreachable("Unexpected value type.");
4832 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4833 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4834 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4835 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4836 }
4837 Register Op0Reg = getRegForValue(I->getOperand(0));
4838 if (!Op0Reg)
4839 return false;
4840
4841 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4842 if (!ResultReg)
4843 return false;
4844
4845 updateValueMap(I, ResultReg);
4846 return true;
4847}
4848
4849bool AArch64FastISel::selectFRem(const Instruction *I) {
4850 MVT RetVT;
4851 if (!isTypeLegal(I->getType(), RetVT))
4852 return false;
4853
4854 RTLIB::Libcall LC;
4855 switch (RetVT.SimpleTy) {
4856 default:
4857 return false;
4858 case MVT::f32:
4859 LC = RTLIB::REM_F32;
4860 break;
4861 case MVT::f64:
4862 LC = RTLIB::REM_F64;
4863 break;
4864 }
4865
4866 ArgListTy Args;
4867 Args.reserve(I->getNumOperands());
4868
4869 // Populate the argument list.
4870 for (auto &Arg : I->operands())
4871 Args.emplace_back(Arg);
4872
4873 CallLoweringInfo CLI;
4874 MCContext &Ctx = MF->getContext();
4875 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4876 TLI.getLibcallName(LC), std::move(Args));
4877 if (!lowerCallTo(CLI))
4878 return false;
4879 updateValueMap(I, CLI.ResultReg);
4880 return true;
4881}
4882
4883bool AArch64FastISel::selectSDiv(const Instruction *I) {
4884 MVT VT;
4885 if (!isTypeLegal(I->getType(), VT))
4886 return false;
4887
4888 if (!isa<ConstantInt>(I->getOperand(1)))
4889 return selectBinaryOp(I, ISD::SDIV);
4890
4891 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4892 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4893 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4894 return selectBinaryOp(I, ISD::SDIV);
4895
4896 unsigned Lg2 = C.countr_zero();
4897 Register Src0Reg = getRegForValue(I->getOperand(0));
4898 if (!Src0Reg)
4899 return false;
4900
4901 if (cast<BinaryOperator>(I)->isExact()) {
4902 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4903 if (!ResultReg)
4904 return false;
4905 updateValueMap(I, ResultReg);
4906 return true;
4907 }
4908
4909 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4910 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4911 if (!AddReg)
4912 return false;
4913
4914 // (Src0 < 0) ? Pow2 - 1 : 0;
4915 if (!emitICmp_ri(VT, Src0Reg, 0))
4916 return false;
4917
4918 unsigned SelectOpc;
4919 const TargetRegisterClass *RC;
4920 if (VT == MVT::i64) {
4921 SelectOpc = AArch64::CSELXr;
4922 RC = &AArch64::GPR64RegClass;
4923 } else {
4924 SelectOpc = AArch64::CSELWr;
4925 RC = &AArch64::GPR32RegClass;
4926 }
4927 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4929 if (!SelectReg)
4930 return false;
4931
4932 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4933 // negate the result.
4934 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4935 Register ResultReg;
4936 if (C.isNegative())
4937 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4938 AArch64_AM::ASR, Lg2);
4939 else
4940 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4941
4942 if (!ResultReg)
4943 return false;
4944
4945 updateValueMap(I, ResultReg);
4946 return true;
4947}
4948
4949/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4950/// have to duplicate it for AArch64, because otherwise we would fail during the
4951/// sign-extend emission.
4952Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4953 Register IdxN = getRegForValue(Idx);
4954 if (!IdxN)
4955 // Unhandled operand. Halt "fast" selection and bail.
4956 return Register();
4957
4958 // If the index is smaller or larger than intptr_t, truncate or extend it.
4959 MVT PtrVT = TLI.getPointerTy(DL);
4960 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4961 if (IdxVT.bitsLT(PtrVT)) {
4962 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4963 } else if (IdxVT.bitsGT(PtrVT))
4964 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4965 return IdxN;
4966}
4967
4968/// This is mostly a copy of the existing FastISel GEP code, but we have to
4969/// duplicate it for AArch64, because otherwise we would bail out even for
4970/// simple cases. This is because the standard fastEmit functions don't cover
4971/// MUL at all and ADD is lowered very inefficientily.
4972bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4973 if (Subtarget->isTargetILP32())
4974 return false;
4975
4976 Register N = getRegForValue(I->getOperand(0));
4977 if (!N)
4978 return false;
4979
4980 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4981 // into a single N = N + TotalOffset.
4982 uint64_t TotalOffs = 0;
4983 MVT VT = TLI.getPointerTy(DL);
4985 GTI != E; ++GTI) {
4986 const Value *Idx = GTI.getOperand();
4987 if (auto *StTy = GTI.getStructTypeOrNull()) {
4988 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4989 // N = N + Offset
4990 if (Field)
4991 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4992 } else {
4993 // If this is a constant subscript, handle it quickly.
4994 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4995 if (CI->isZero())
4996 continue;
4997 // N = N + Offset
4998 TotalOffs += GTI.getSequentialElementStride(DL) *
4999 cast<ConstantInt>(CI)->getSExtValue();
5000 continue;
5001 }
5002 if (TotalOffs) {
5003 N = emitAdd_ri_(VT, N, TotalOffs);
5004 if (!N)
5005 return false;
5006 TotalOffs = 0;
5007 }
5008
5009 // N = N + Idx * ElementSize;
5010 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5011 Register IdxN = getRegForGEPIndex(Idx);
5012 if (!IdxN)
5013 return false;
5014
5015 if (ElementSize != 1) {
5016 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5017 if (!C)
5018 return false;
5019 IdxN = emitMul_rr(VT, IdxN, C);
5020 if (!IdxN)
5021 return false;
5022 }
5023 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5024 if (!N)
5025 return false;
5026 }
5027 }
5028 if (TotalOffs) {
5029 N = emitAdd_ri_(VT, N, TotalOffs);
5030 if (!N)
5031 return false;
5032 }
5033 updateValueMap(I, N);
5034 return true;
5035}
5036
5037bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5038 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5039 "cmpxchg survived AtomicExpand at optlevel > -O0");
5040
5041 auto *RetPairTy = cast<StructType>(I->getType());
5042 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5043 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5044 "cmpxchg has a non-i1 status result");
5045
5046 MVT VT;
5047 if (!isTypeLegal(RetTy, VT))
5048 return false;
5049
5050 const TargetRegisterClass *ResRC;
5051 unsigned Opc, CmpOpc;
5052 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5053 // extractvalue selection doesn't support that.
5054 if (VT == MVT::i32) {
5055 Opc = AArch64::CMP_SWAP_32;
5056 CmpOpc = AArch64::SUBSWrs;
5057 ResRC = &AArch64::GPR32RegClass;
5058 } else if (VT == MVT::i64) {
5059 Opc = AArch64::CMP_SWAP_64;
5060 CmpOpc = AArch64::SUBSXrs;
5061 ResRC = &AArch64::GPR64RegClass;
5062 } else {
5063 return false;
5064 }
5065
5066 const MCInstrDesc &II = TII.get(Opc);
5067
5068 Register AddrReg = getRegForValue(I->getPointerOperand());
5069 Register DesiredReg = getRegForValue(I->getCompareOperand());
5070 Register NewReg = getRegForValue(I->getNewValOperand());
5071
5072 if (!AddrReg || !DesiredReg || !NewReg)
5073 return false;
5074
5075 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5076 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5077 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5078
5079 const Register ResultReg1 = createResultReg(ResRC);
5080 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5081 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5082
5083 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5085 .addDef(ResultReg1)
5086 .addDef(ScratchReg)
5087 .addUse(AddrReg)
5088 .addUse(DesiredReg)
5089 .addUse(NewReg);
5090
5091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5092 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5093 .addUse(ResultReg1)
5094 .addUse(DesiredReg)
5095 .addImm(0);
5096
5097 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5098 .addDef(ResultReg2)
5099 .addUse(AArch64::WZR)
5100 .addUse(AArch64::WZR)
5102
5103 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5104 updateValueMap(I, ResultReg1, 2);
5105 return true;
5106}
5107
5108bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5109 if (TLI.fallBackToDAGISel(*I))
5110 return false;
5111 switch (I->getOpcode()) {
5112 default:
5113 break;
5114 case Instruction::Add:
5115 case Instruction::Sub:
5116 return selectAddSub(I);
5117 case Instruction::Mul:
5118 return selectMul(I);
5119 case Instruction::SDiv:
5120 return selectSDiv(I);
5121 case Instruction::SRem:
5122 if (!selectBinaryOp(I, ISD::SREM))
5123 return selectRem(I, ISD::SREM);
5124 return true;
5125 case Instruction::URem:
5126 if (!selectBinaryOp(I, ISD::UREM))
5127 return selectRem(I, ISD::UREM);
5128 return true;
5129 case Instruction::Shl:
5130 case Instruction::LShr:
5131 case Instruction::AShr:
5132 return selectShift(I);
5133 case Instruction::And:
5134 case Instruction::Or:
5135 case Instruction::Xor:
5136 return selectLogicalOp(I);
5137 case Instruction::Br:
5138 return selectBranch(I);
5139 case Instruction::IndirectBr:
5140 return selectIndirectBr(I);
5141 case Instruction::BitCast:
5143 return selectBitCast(I);
5144 return true;
5145 case Instruction::FPToSI:
5146 if (!selectCast(I, ISD::FP_TO_SINT))
5147 return selectFPToInt(I, /*Signed=*/true);
5148 return true;
5149 case Instruction::FPToUI:
5150 return selectFPToInt(I, /*Signed=*/false);
5151 case Instruction::ZExt:
5152 case Instruction::SExt:
5153 return selectIntExt(I);
5154 case Instruction::Trunc:
5155 if (!selectCast(I, ISD::TRUNCATE))
5156 return selectTrunc(I);
5157 return true;
5158 case Instruction::FPExt:
5159 return selectFPExt(I);
5160 case Instruction::FPTrunc:
5161 return selectFPTrunc(I);
5162 case Instruction::SIToFP:
5163 if (!selectCast(I, ISD::SINT_TO_FP))
5164 return selectIntToFP(I, /*Signed=*/true);
5165 return true;
5166 case Instruction::UIToFP:
5167 return selectIntToFP(I, /*Signed=*/false);
5168 case Instruction::Load:
5169 return selectLoad(I);
5170 case Instruction::Store:
5171 return selectStore(I);
5172 case Instruction::FCmp:
5173 case Instruction::ICmp:
5174 return selectCmp(I);
5175 case Instruction::Select:
5176 return selectSelect(I);
5177 case Instruction::Ret:
5178 return selectRet(I);
5179 case Instruction::FRem:
5180 return selectFRem(I);
5181 case Instruction::GetElementPtr:
5182 return selectGetElementPtr(I);
5183 case Instruction::AtomicCmpXchg:
5184 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5185 }
5186
5187 // fall-back to target-independent instruction selection.
5188 return selectOperator(I, I->getOpcode());
5189}
5190
5192 const TargetLibraryInfo *LibInfo) {
5193
5194 SMEAttrs CallerAttrs =
5195 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5196 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5197 CallerAttrs.hasStreamingInterfaceOrBody() ||
5198 CallerAttrs.hasStreamingCompatibleInterface() ||
5199 CallerAttrs.hasAgnosticZAInterface())
5200 return nullptr;
5201 return new AArch64FastISel(FuncInfo, LibInfo);
5202}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
const APFloat & getValueAPF() const
Definition Constants.h:320
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:327
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:324
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:325
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:95
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:743
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:264
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.