LLVM 17.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
40#include "llvm/IR/Argument.h"
41#include "llvm/IR/Attributes.h"
42#include "llvm/IR/BasicBlock.h"
43#include "llvm/IR/CallingConv.h"
44#include "llvm/IR/Constant.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
48#include "llvm/IR/Function.h"
50#include "llvm/IR/GlobalValue.h"
51#include "llvm/IR/InstrTypes.h"
52#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Intrinsics.h"
56#include "llvm/IR/Operator.h"
57#include "llvm/IR/Type.h"
58#include "llvm/IR/User.h"
59#include "llvm/IR/Value.h"
60#include "llvm/MC/MCInstrDesc.h"
62#include "llvm/MC/MCSymbol.h"
70#include <algorithm>
71#include <cassert>
72#include <cstdint>
73#include <iterator>
74#include <utility>
75
76using namespace llvm;
77
78namespace {
79
80class AArch64FastISel final : public FastISel {
81 class Address {
82 public:
83 using BaseKind = enum {
84 RegBase,
85 FrameIndexBase
86 };
87
88 private:
89 BaseKind Kind = RegBase;
91 union {
92 unsigned Reg;
93 int FI;
94 } Base;
95 unsigned OffsetReg = 0;
96 unsigned Shift = 0;
97 int64_t Offset = 0;
98 const GlobalValue *GV = nullptr;
99
100 public:
101 Address() { Base.Reg = 0; }
102
103 void setKind(BaseKind K) { Kind = K; }
104 BaseKind getKind() const { return Kind; }
105 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107 bool isRegBase() const { return Kind == RegBase; }
108 bool isFIBase() const { return Kind == FrameIndexBase; }
109
110 void setReg(unsigned Reg) {
111 assert(isRegBase() && "Invalid base register access!");
112 Base.Reg = Reg;
113 }
114
115 unsigned getReg() const {
116 assert(isRegBase() && "Invalid base register access!");
117 return Base.Reg;
118 }
119
120 void setOffsetReg(unsigned Reg) {
121 OffsetReg = Reg;
122 }
123
124 unsigned getOffsetReg() const {
125 return OffsetReg;
126 }
127
128 void setFI(unsigned FI) {
129 assert(isFIBase() && "Invalid base frame index access!");
130 Base.FI = FI;
131 }
132
133 unsigned getFI() const {
134 assert(isFIBase() && "Invalid base frame index access!");
135 return Base.FI;
136 }
137
138 void setOffset(int64_t O) { Offset = O; }
139 int64_t getOffset() { return Offset; }
140 void setShift(unsigned S) { Shift = S; }
141 unsigned getShift() { return Shift; }
142
143 void setGlobalValue(const GlobalValue *G) { GV = G; }
144 const GlobalValue *getGlobalValue() { return GV; }
145 };
146
147 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148 /// make the right decision when generating code for different targets.
149 const AArch64Subtarget *Subtarget;
151
152 bool fastLowerArguments() override;
153 bool fastLowerCall(CallLoweringInfo &CLI) override;
154 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155
156private:
157 // Selection routines.
158 bool selectAddSub(const Instruction *I);
159 bool selectLogicalOp(const Instruction *I);
160 bool selectLoad(const Instruction *I);
161 bool selectStore(const Instruction *I);
162 bool selectBranch(const Instruction *I);
163 bool selectIndirectBr(const Instruction *I);
164 bool selectCmp(const Instruction *I);
165 bool selectSelect(const Instruction *I);
166 bool selectFPExt(const Instruction *I);
167 bool selectFPTrunc(const Instruction *I);
168 bool selectFPToInt(const Instruction *I, bool Signed);
169 bool selectIntToFP(const Instruction *I, bool Signed);
170 bool selectRem(const Instruction *I, unsigned ISDOpcode);
171 bool selectRet(const Instruction *I);
172 bool selectTrunc(const Instruction *I);
173 bool selectIntExt(const Instruction *I);
174 bool selectMul(const Instruction *I);
175 bool selectShift(const Instruction *I);
176 bool selectBitCast(const Instruction *I);
177 bool selectFRem(const Instruction *I);
178 bool selectSDiv(const Instruction *I);
179 bool selectGetElementPtr(const Instruction *I);
180 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181
182 // Utility helper routines.
183 bool isTypeLegal(Type *Ty, MVT &VT);
184 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185 bool isValueAvailable(const Value *V) const;
186 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187 bool computeCallAddress(const Value *V, Address &Addr);
188 bool simplifyAddress(Address &Addr, MVT VT);
189 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191 unsigned ScaleFactor, MachineMemOperand *MMO);
192 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
193 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194 MaybeAlign Alignment);
195 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196 const Value *Cond);
197 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198 bool optimizeSelect(const SelectInst *SI);
199 unsigned getRegForGEPIndex(const Value *Idx);
200
201 // Emit helper routines.
202 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203 const Value *RHS, bool SetFlags = false,
204 bool WantResult = true, bool IsZExt = false);
205 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206 unsigned RHSReg, bool SetFlags = false,
207 bool WantResult = true);
208 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209 uint64_t Imm, bool SetFlags = false,
210 bool WantResult = true);
211 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213 uint64_t ShiftImm, bool SetFlags = false,
214 bool WantResult = true);
215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217 uint64_t ShiftImm, bool SetFlags = false,
218 bool WantResult = true);
219
220 // Emit functions.
221 bool emitCompareAndBranch(const BranchInst *BI);
222 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227 MachineMemOperand *MMO = nullptr);
228 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229 MachineMemOperand *MMO = nullptr);
230 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231 MachineMemOperand *MMO = nullptr);
232 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235 bool SetFlags = false, bool WantResult = true,
236 bool IsZExt = false);
237 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239 bool SetFlags = false, bool WantResult = true,
240 bool IsZExt = false);
241 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242 bool WantResult = true);
243 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245 bool WantResult = true);
246 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247 const Value *RHS);
248 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249 uint64_t Imm);
250 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251 unsigned RHSReg, uint64_t ShiftImm);
252 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258 bool IsZExt = true);
259 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261 bool IsZExt = true);
262 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264 bool IsZExt = false);
265
266 unsigned materializeInt(const ConstantInt *CI, MVT VT);
267 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268 unsigned materializeGV(const GlobalValue *GV);
269
270 // Call handling routines.
271private:
272 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274 unsigned &NumBytes);
275 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276
277public:
278 // Backend specific FastISel code.
279 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280 unsigned fastMaterializeConstant(const Constant *C) override;
281 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282
283 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284 const TargetLibraryInfo *LibInfo)
285 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
287 Context = &FuncInfo.Fn->getContext();
288 }
289
290 bool fastSelectInstruction(const Instruction *I) override;
291
292#include "AArch64GenFastISel.inc"
293};
294
295} // end anonymous namespace
296
297/// Check if the sign-/zero-extend will be a noop.
298static bool isIntExtFree(const Instruction *I) {
299 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300 "Unexpected integer extend instruction.");
301 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302 "Unexpected value type.");
303 bool IsZExt = isa<ZExtInst>(I);
304
305 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306 if (LI->hasOneUse())
307 return true;
308
309 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311 return true;
312
313 return false;
314}
315
316/// Determine the implicit scale factor that is applied by a memory
317/// operation for a given value type.
318static unsigned getImplicitScaleFactor(MVT VT) {
319 switch (VT.SimpleTy) {
320 default:
321 return 0; // invalid
322 case MVT::i1: // fall-through
323 case MVT::i8:
324 return 1;
325 case MVT::i16:
326 return 2;
327 case MVT::i32: // fall-through
328 case MVT::f32:
329 return 4;
330 case MVT::i64: // fall-through
331 case MVT::f64:
332 return 8;
333 }
334}
335
336CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339 if (CC == CallingConv::GHC)
340 return CC_AArch64_GHC;
343 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344}
345
346unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348 "Alloca should always return a pointer.");
349
350 // Don't handle dynamic allocas.
351 if (!FuncInfo.StaticAllocaMap.count(AI))
352 return 0;
353
355 FuncInfo.StaticAllocaMap.find(AI);
356
357 if (SI != FuncInfo.StaticAllocaMap.end()) {
358 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
360 ResultReg)
361 .addFrameIndex(SI->second)
362 .addImm(0)
363 .addImm(0);
364 return ResultReg;
365 }
366
367 return 0;
368}
369
370unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371 if (VT > MVT::i64)
372 return 0;
373
374 if (!CI->isZero())
375 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376
377 // Create a copy from the zero register to materialize a "0" value.
378 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379 : &AArch64::GPR32RegClass;
380 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381 Register ResultReg = createResultReg(RC);
382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
383 ResultReg).addReg(ZeroReg, getKillRegState(true));
384 return ResultReg;
385}
386
387unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388 // Positive zero (+0.0) has to be materialized with a fmov from the zero
389 // register, because the immediate version of fmov cannot encode zero.
390 if (CFP->isNullValue())
391 return fastMaterializeFloatZero(CFP);
392
393 if (VT != MVT::f32 && VT != MVT::f64)
394 return 0;
395
396 const APFloat Val = CFP->getValueAPF();
397 bool Is64Bit = (VT == MVT::f64);
398 // This checks to see if we can use FMOV instructions to materialize
399 // a constant, otherwise we have to materialize via the constant pool.
400 int Imm =
401 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402 if (Imm != -1) {
403 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405 }
406
407 // For the large code model materialize the FP constant in code.
408 if (TM.getCodeModel() == CodeModel::Large) {
409 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410 const TargetRegisterClass *RC = Is64Bit ?
411 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412
413 Register TmpReg = createResultReg(RC);
414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
416
417 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
419 TII.get(TargetOpcode::COPY), ResultReg)
420 .addReg(TmpReg, getKillRegState(true));
421
422 return ResultReg;
423 }
424
425 // Materialize via constant pool. MachineConstantPool wants an explicit
426 // alignment.
427 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428
429 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
433
434 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
437 .addReg(ADRPReg)
439 return ResultReg;
440}
441
442unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443 // We can't handle thread-local variables quickly yet.
444 if (GV->isThreadLocal())
445 return 0;
446
447 // MachO still uses GOT for large code-model accesses, but ELF requires
448 // movz/movk sequences, which FastISel doesn't handle yet.
449 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450 return 0;
451
452 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453
454 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455 if (!DestEVT.isSimple())
456 return 0;
457
458 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459 unsigned ResultReg;
460
461 if (OpFlags & AArch64II::MO_GOT) {
462 // ADRP + LDRX
463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
464 ADRPReg)
465 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466
467 unsigned LdrOpc;
468 if (Subtarget->isTargetILP32()) {
469 ResultReg = createResultReg(&AArch64::GPR32RegClass);
470 LdrOpc = AArch64::LDRWui;
471 } else {
472 ResultReg = createResultReg(&AArch64::GPR64RegClass);
473 LdrOpc = AArch64::LDRXui;
474 }
475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
476 ResultReg)
477 .addReg(ADRPReg)
479 AArch64II::MO_NC | OpFlags);
480 if (!Subtarget->isTargetILP32())
481 return ResultReg;
482
483 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484 // so we must extend the result on ILP32.
485 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
487 TII.get(TargetOpcode::SUBREG_TO_REG))
488 .addDef(Result64)
489 .addImm(0)
490 .addReg(ResultReg, RegState::Kill)
491 .addImm(AArch64::sub_32);
492 return Result64;
493 } else {
494 // ADRP + ADDX
495 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
496 ADRPReg)
497 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498
499 if (OpFlags & AArch64II::MO_TAGGED) {
500 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
501 // We do so by creating a MOVK that sets bits 48-63 of the register to
502 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
503 // the small code model so we can assume a binary size of <= 4GB, which
504 // makes the untagged PC relative offset positive. The binary must also be
505 // loaded into address range [0, 2^48). Both of these properties need to
506 // be ensured at runtime when using tagged addresses.
507 //
508 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
509 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
510 // are not exactly 1:1 with FastISel so we cannot easily abstract this
511 // out. At some point, it would be nice to find a way to not have this
512 // duplciate code.
513 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
514 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
515 DstReg)
516 .addReg(ADRPReg)
517 .addGlobalAddress(GV, /*Offset=*/0x100000000,
519 .addImm(48);
520 ADRPReg = DstReg;
521 }
522
523 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
525 ResultReg)
526 .addReg(ADRPReg)
527 .addGlobalAddress(GV, 0,
529 .addImm(0);
530 }
531 return ResultReg;
532}
533
534unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
535 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
536
537 // Only handle simple types.
538 if (!CEVT.isSimple())
539 return 0;
540 MVT VT = CEVT.getSimpleVT();
541 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
542 // 'null' pointers need to have a somewhat special treatment.
543 if (isa<ConstantPointerNull>(C)) {
544 assert(VT == MVT::i64 && "Expected 64-bit pointers");
545 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
546 }
547
548 if (const auto *CI = dyn_cast<ConstantInt>(C))
549 return materializeInt(CI, VT);
550 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
551 return materializeFP(CFP, VT);
552 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
553 return materializeGV(GV);
554
555 return 0;
556}
557
558unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
559 assert(CFP->isNullValue() &&
560 "Floating-point constant is not a positive zero.");
561 MVT VT;
562 if (!isTypeLegal(CFP->getType(), VT))
563 return 0;
564
565 if (VT != MVT::f32 && VT != MVT::f64)
566 return 0;
567
568 bool Is64Bit = (VT == MVT::f64);
569 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
570 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
571 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
572}
573
574/// Check if the multiply is by a power-of-2 constant.
575static bool isMulPowOf2(const Value *I) {
576 if (const auto *MI = dyn_cast<MulOperator>(I)) {
577 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
578 if (C->getValue().isPowerOf2())
579 return true;
580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
581 if (C->getValue().isPowerOf2())
582 return true;
583 }
584 return false;
585}
586
587// Computes the address to get to an object.
588bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
589{
590 const User *U = nullptr;
591 unsigned Opcode = Instruction::UserOp1;
592 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
593 // Don't walk into other basic blocks unless the object is an alloca from
594 // another block, otherwise it may not have a virtual register assigned.
595 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
596 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
597 Opcode = I->getOpcode();
598 U = I;
599 }
600 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
601 Opcode = C->getOpcode();
602 U = C;
603 }
604
605 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
606 if (Ty->getAddressSpace() > 255)
607 // Fast instruction selection doesn't support the special
608 // address spaces.
609 return false;
610
611 switch (Opcode) {
612 default:
613 break;
614 case Instruction::BitCast:
615 // Look through bitcasts.
616 return computeAddress(U->getOperand(0), Addr, Ty);
617
618 case Instruction::IntToPtr:
619 // Look past no-op inttoptrs.
620 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
621 TLI.getPointerTy(DL))
622 return computeAddress(U->getOperand(0), Addr, Ty);
623 break;
624
625 case Instruction::PtrToInt:
626 // Look past no-op ptrtoints.
627 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
628 return computeAddress(U->getOperand(0), Addr, Ty);
629 break;
630
631 case Instruction::GetElementPtr: {
632 Address SavedAddr = Addr;
633 uint64_t TmpOffset = Addr.getOffset();
634
635 // Iterate through the GEP folding the constants into offsets where
636 // we can.
638 GTI != E; ++GTI) {
639 const Value *Op = GTI.getOperand();
640 if (StructType *STy = GTI.getStructTypeOrNull()) {
641 const StructLayout *SL = DL.getStructLayout(STy);
642 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
643 TmpOffset += SL->getElementOffset(Idx);
644 } else {
645 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
646 while (true) {
647 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
648 // Constant-offset addressing.
649 TmpOffset += CI->getSExtValue() * S;
650 break;
651 }
652 if (canFoldAddIntoGEP(U, Op)) {
653 // A compatible add with a constant operand. Fold the constant.
654 ConstantInt *CI =
655 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
656 TmpOffset += CI->getSExtValue() * S;
657 // Iterate on the other operand.
658 Op = cast<AddOperator>(Op)->getOperand(0);
659 continue;
660 }
661 // Unsupported
662 goto unsupported_gep;
663 }
664 }
665 }
666
667 // Try to grab the base operand now.
668 Addr.setOffset(TmpOffset);
669 if (computeAddress(U->getOperand(0), Addr, Ty))
670 return true;
671
672 // We failed, restore everything and try the other options.
673 Addr = SavedAddr;
674
675 unsupported_gep:
676 break;
677 }
678 case Instruction::Alloca: {
679 const AllocaInst *AI = cast<AllocaInst>(Obj);
681 FuncInfo.StaticAllocaMap.find(AI);
682 if (SI != FuncInfo.StaticAllocaMap.end()) {
683 Addr.setKind(Address::FrameIndexBase);
684 Addr.setFI(SI->second);
685 return true;
686 }
687 break;
688 }
689 case Instruction::Add: {
690 // Adds of constants are common and easy enough.
691 const Value *LHS = U->getOperand(0);
692 const Value *RHS = U->getOperand(1);
693
694 if (isa<ConstantInt>(LHS))
695 std::swap(LHS, RHS);
696
697 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
698 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
699 return computeAddress(LHS, Addr, Ty);
700 }
701
702 Address Backup = Addr;
703 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
704 return true;
705 Addr = Backup;
706
707 break;
708 }
709 case Instruction::Sub: {
710 // Subs of constants are common and easy enough.
711 const Value *LHS = U->getOperand(0);
712 const Value *RHS = U->getOperand(1);
713
714 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
715 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
716 return computeAddress(LHS, Addr, Ty);
717 }
718 break;
719 }
720 case Instruction::Shl: {
721 if (Addr.getOffsetReg())
722 break;
723
724 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
725 if (!CI)
726 break;
727
728 unsigned Val = CI->getZExtValue();
729 if (Val < 1 || Val > 3)
730 break;
731
732 uint64_t NumBytes = 0;
733 if (Ty && Ty->isSized()) {
734 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
735 NumBytes = NumBits / 8;
736 if (!isPowerOf2_64(NumBits))
737 NumBytes = 0;
738 }
739
740 if (NumBytes != (1ULL << Val))
741 break;
742
743 Addr.setShift(Val);
744 Addr.setExtendType(AArch64_AM::LSL);
745
746 const Value *Src = U->getOperand(0);
747 if (const auto *I = dyn_cast<Instruction>(Src)) {
748 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
749 // Fold the zext or sext when it won't become a noop.
750 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
751 if (!isIntExtFree(ZE) &&
752 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
753 Addr.setExtendType(AArch64_AM::UXTW);
754 Src = ZE->getOperand(0);
755 }
756 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
757 if (!isIntExtFree(SE) &&
758 SE->getOperand(0)->getType()->isIntegerTy(32)) {
759 Addr.setExtendType(AArch64_AM::SXTW);
760 Src = SE->getOperand(0);
761 }
762 }
763 }
764 }
765
766 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
767 if (AI->getOpcode() == Instruction::And) {
768 const Value *LHS = AI->getOperand(0);
769 const Value *RHS = AI->getOperand(1);
770
771 if (const auto *C = dyn_cast<ConstantInt>(LHS))
772 if (C->getValue() == 0xffffffff)
773 std::swap(LHS, RHS);
774
775 if (const auto *C = dyn_cast<ConstantInt>(RHS))
776 if (C->getValue() == 0xffffffff) {
777 Addr.setExtendType(AArch64_AM::UXTW);
778 Register Reg = getRegForValue(LHS);
779 if (!Reg)
780 return false;
781 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
782 Addr.setOffsetReg(Reg);
783 return true;
784 }
785 }
786
787 Register Reg = getRegForValue(Src);
788 if (!Reg)
789 return false;
790 Addr.setOffsetReg(Reg);
791 return true;
792 }
793 case Instruction::Mul: {
794 if (Addr.getOffsetReg())
795 break;
796
797 if (!isMulPowOf2(U))
798 break;
799
800 const Value *LHS = U->getOperand(0);
801 const Value *RHS = U->getOperand(1);
802
803 // Canonicalize power-of-2 value to the RHS.
804 if (const auto *C = dyn_cast<ConstantInt>(LHS))
805 if (C->getValue().isPowerOf2())
806 std::swap(LHS, RHS);
807
808 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
809 const auto *C = cast<ConstantInt>(RHS);
810 unsigned Val = C->getValue().logBase2();
811 if (Val < 1 || Val > 3)
812 break;
813
814 uint64_t NumBytes = 0;
815 if (Ty && Ty->isSized()) {
816 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
817 NumBytes = NumBits / 8;
818 if (!isPowerOf2_64(NumBits))
819 NumBytes = 0;
820 }
821
822 if (NumBytes != (1ULL << Val))
823 break;
824
825 Addr.setShift(Val);
826 Addr.setExtendType(AArch64_AM::LSL);
827
828 const Value *Src = LHS;
829 if (const auto *I = dyn_cast<Instruction>(Src)) {
830 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
831 // Fold the zext or sext when it won't become a noop.
832 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
833 if (!isIntExtFree(ZE) &&
834 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
835 Addr.setExtendType(AArch64_AM::UXTW);
836 Src = ZE->getOperand(0);
837 }
838 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
839 if (!isIntExtFree(SE) &&
840 SE->getOperand(0)->getType()->isIntegerTy(32)) {
841 Addr.setExtendType(AArch64_AM::SXTW);
842 Src = SE->getOperand(0);
843 }
844 }
845 }
846 }
847
848 Register Reg = getRegForValue(Src);
849 if (!Reg)
850 return false;
851 Addr.setOffsetReg(Reg);
852 return true;
853 }
854 case Instruction::And: {
855 if (Addr.getOffsetReg())
856 break;
857
858 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
859 break;
860
861 const Value *LHS = U->getOperand(0);
862 const Value *RHS = U->getOperand(1);
863
864 if (const auto *C = dyn_cast<ConstantInt>(LHS))
865 if (C->getValue() == 0xffffffff)
866 std::swap(LHS, RHS);
867
868 if (const auto *C = dyn_cast<ConstantInt>(RHS))
869 if (C->getValue() == 0xffffffff) {
870 Addr.setShift(0);
871 Addr.setExtendType(AArch64_AM::LSL);
872 Addr.setExtendType(AArch64_AM::UXTW);
873
874 Register Reg = getRegForValue(LHS);
875 if (!Reg)
876 return false;
877 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
878 Addr.setOffsetReg(Reg);
879 return true;
880 }
881 break;
882 }
883 case Instruction::SExt:
884 case Instruction::ZExt: {
885 if (!Addr.getReg() || Addr.getOffsetReg())
886 break;
887
888 const Value *Src = nullptr;
889 // Fold the zext or sext when it won't become a noop.
890 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
891 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
892 Addr.setExtendType(AArch64_AM::UXTW);
893 Src = ZE->getOperand(0);
894 }
895 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
896 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
897 Addr.setExtendType(AArch64_AM::SXTW);
898 Src = SE->getOperand(0);
899 }
900 }
901
902 if (!Src)
903 break;
904
905 Addr.setShift(0);
906 Register Reg = getRegForValue(Src);
907 if (!Reg)
908 return false;
909 Addr.setOffsetReg(Reg);
910 return true;
911 }
912 } // end switch
913
914 if (Addr.isRegBase() && !Addr.getReg()) {
915 Register Reg = getRegForValue(Obj);
916 if (!Reg)
917 return false;
918 Addr.setReg(Reg);
919 return true;
920 }
921
922 if (!Addr.getOffsetReg()) {
923 Register Reg = getRegForValue(Obj);
924 if (!Reg)
925 return false;
926 Addr.setOffsetReg(Reg);
927 return true;
928 }
929
930 return false;
931}
932
933bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
934 const User *U = nullptr;
935 unsigned Opcode = Instruction::UserOp1;
936 bool InMBB = true;
937
938 if (const auto *I = dyn_cast<Instruction>(V)) {
939 Opcode = I->getOpcode();
940 U = I;
941 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
942 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
943 Opcode = C->getOpcode();
944 U = C;
945 }
946
947 switch (Opcode) {
948 default: break;
949 case Instruction::BitCast:
950 // Look past bitcasts if its operand is in the same BB.
951 if (InMBB)
952 return computeCallAddress(U->getOperand(0), Addr);
953 break;
954 case Instruction::IntToPtr:
955 // Look past no-op inttoptrs if its operand is in the same BB.
956 if (InMBB &&
957 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
958 TLI.getPointerTy(DL))
959 return computeCallAddress(U->getOperand(0), Addr);
960 break;
961 case Instruction::PtrToInt:
962 // Look past no-op ptrtoints if its operand is in the same BB.
963 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
964 return computeCallAddress(U->getOperand(0), Addr);
965 break;
966 }
967
968 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
969 Addr.setGlobalValue(GV);
970 return true;
971 }
972
973 // If all else fails, try to materialize the value in a register.
974 if (!Addr.getGlobalValue()) {
975 Addr.setReg(getRegForValue(V));
976 return Addr.getReg() != 0;
977 }
978
979 return false;
980}
981
982bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
983 EVT evt = TLI.getValueType(DL, Ty, true);
984
985 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
986 return false;
987
988 // Only handle simple types.
989 if (evt == MVT::Other || !evt.isSimple())
990 return false;
991 VT = evt.getSimpleVT();
992
993 // This is a legal type, but it's not something we handle in fast-isel.
994 if (VT == MVT::f128)
995 return false;
996
997 // Handle all other legal types, i.e. a register that will directly hold this
998 // value.
999 return TLI.isTypeLegal(VT);
1000}
1001
1002/// Determine if the value type is supported by FastISel.
1003///
1004/// FastISel for AArch64 can handle more value types than are legal. This adds
1005/// simple value type such as i1, i8, and i16.
1006bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1007 if (Ty->isVectorTy() && !IsVectorAllowed)
1008 return false;
1009
1010 if (isTypeLegal(Ty, VT))
1011 return true;
1012
1013 // If this is a type than can be sign or zero-extended to a basic operation
1014 // go ahead and accept it now.
1015 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1016 return true;
1017
1018 return false;
1019}
1020
1021bool AArch64FastISel::isValueAvailable(const Value *V) const {
1022 if (!isa<Instruction>(V))
1023 return true;
1024
1025 const auto *I = cast<Instruction>(V);
1026 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1027}
1028
1029bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1030 if (Subtarget->isTargetILP32())
1031 return false;
1032
1033 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1034 if (!ScaleFactor)
1035 return false;
1036
1037 bool ImmediateOffsetNeedsLowering = false;
1038 bool RegisterOffsetNeedsLowering = false;
1039 int64_t Offset = Addr.getOffset();
1040 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1041 ImmediateOffsetNeedsLowering = true;
1042 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1043 !isUInt<12>(Offset / ScaleFactor))
1044 ImmediateOffsetNeedsLowering = true;
1045
1046 // Cannot encode an offset register and an immediate offset in the same
1047 // instruction. Fold the immediate offset into the load/store instruction and
1048 // emit an additional add to take care of the offset register.
1049 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1050 RegisterOffsetNeedsLowering = true;
1051
1052 // Cannot encode zero register as base.
1053 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1054 RegisterOffsetNeedsLowering = true;
1055
1056 // If this is a stack pointer and the offset needs to be simplified then put
1057 // the alloca address into a register, set the base type back to register and
1058 // continue. This should almost never happen.
1059 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1060 {
1061 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1063 ResultReg)
1064 .addFrameIndex(Addr.getFI())
1065 .addImm(0)
1066 .addImm(0);
1067 Addr.setKind(Address::RegBase);
1068 Addr.setReg(ResultReg);
1069 }
1070
1071 if (RegisterOffsetNeedsLowering) {
1072 unsigned ResultReg = 0;
1073 if (Addr.getReg()) {
1074 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1075 Addr.getExtendType() == AArch64_AM::UXTW )
1076 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1077 Addr.getOffsetReg(), Addr.getExtendType(),
1078 Addr.getShift());
1079 else
1080 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081 Addr.getOffsetReg(), AArch64_AM::LSL,
1082 Addr.getShift());
1083 } else {
1084 if (Addr.getExtendType() == AArch64_AM::UXTW)
1085 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1086 Addr.getShift(), /*IsZExt=*/true);
1087 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089 Addr.getShift(), /*IsZExt=*/false);
1090 else
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1092 Addr.getShift());
1093 }
1094 if (!ResultReg)
1095 return false;
1096
1097 Addr.setReg(ResultReg);
1098 Addr.setOffsetReg(0);
1099 Addr.setShift(0);
1100 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1101 }
1102
1103 // Since the offset is too large for the load/store instruction get the
1104 // reg+offset into a register.
1105 if (ImmediateOffsetNeedsLowering) {
1106 unsigned ResultReg;
1107 if (Addr.getReg())
1108 // Try to fold the immediate into the add instruction.
1109 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1110 else
1111 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1112
1113 if (!ResultReg)
1114 return false;
1115 Addr.setReg(ResultReg);
1116 Addr.setOffset(0);
1117 }
1118 return true;
1119}
1120
1121void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1122 const MachineInstrBuilder &MIB,
1124 unsigned ScaleFactor,
1125 MachineMemOperand *MMO) {
1126 int64_t Offset = Addr.getOffset() / ScaleFactor;
1127 // Frame base works a bit differently. Handle it separately.
1128 if (Addr.isFIBase()) {
1129 int FI = Addr.getFI();
1130 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1131 // and alignment should be based on the VT.
1132 MMO = FuncInfo.MF->getMachineMemOperand(
1133 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1134 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1135 // Now add the rest of the operands.
1136 MIB.addFrameIndex(FI).addImm(Offset);
1137 } else {
1138 assert(Addr.isRegBase() && "Unexpected address kind.");
1139 const MCInstrDesc &II = MIB->getDesc();
1140 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1141 Addr.setReg(
1142 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1143 Addr.setOffsetReg(
1144 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1145 if (Addr.getOffsetReg()) {
1146 assert(Addr.getOffset() == 0 && "Unexpected offset");
1147 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1148 Addr.getExtendType() == AArch64_AM::SXTX;
1149 MIB.addReg(Addr.getReg());
1150 MIB.addReg(Addr.getOffsetReg());
1151 MIB.addImm(IsSigned);
1152 MIB.addImm(Addr.getShift() != 0);
1153 } else
1154 MIB.addReg(Addr.getReg()).addImm(Offset);
1155 }
1156
1157 if (MMO)
1158 MIB.addMemOperand(MMO);
1159}
1160
1161unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1162 const Value *RHS, bool SetFlags,
1163 bool WantResult, bool IsZExt) {
1165 bool NeedExtend = false;
1166 switch (RetVT.SimpleTy) {
1167 default:
1168 return 0;
1169 case MVT::i1:
1170 NeedExtend = true;
1171 break;
1172 case MVT::i8:
1173 NeedExtend = true;
1174 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1175 break;
1176 case MVT::i16:
1177 NeedExtend = true;
1178 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1179 break;
1180 case MVT::i32: // fall-through
1181 case MVT::i64:
1182 break;
1183 }
1184 MVT SrcVT = RetVT;
1185 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1186
1187 // Canonicalize immediates to the RHS first.
1188 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1189 std::swap(LHS, RHS);
1190
1191 // Canonicalize mul by power of 2 to the RHS.
1192 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1193 if (isMulPowOf2(LHS))
1194 std::swap(LHS, RHS);
1195
1196 // Canonicalize shift immediate to the RHS.
1197 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1198 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1199 if (isa<ConstantInt>(SI->getOperand(1)))
1200 if (SI->getOpcode() == Instruction::Shl ||
1201 SI->getOpcode() == Instruction::LShr ||
1202 SI->getOpcode() == Instruction::AShr )
1203 std::swap(LHS, RHS);
1204
1205 Register LHSReg = getRegForValue(LHS);
1206 if (!LHSReg)
1207 return 0;
1208
1209 if (NeedExtend)
1210 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1211
1212 unsigned ResultReg = 0;
1213 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1214 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1215 if (C->isNegative())
1216 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1217 WantResult);
1218 else
1219 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1220 WantResult);
1221 } else if (const auto *C = dyn_cast<Constant>(RHS))
1222 if (C->isNullValue())
1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1224
1225 if (ResultReg)
1226 return ResultReg;
1227
1228 // Only extend the RHS within the instruction if there is a valid extend type.
1229 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1230 isValueAvailable(RHS)) {
1231 if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1232 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1233 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1234 Register RHSReg = getRegForValue(SI->getOperand(0));
1235 if (!RHSReg)
1236 return 0;
1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1238 C->getZExtValue(), SetFlags, WantResult);
1239 }
1240 Register RHSReg = getRegForValue(RHS);
1241 if (!RHSReg)
1242 return 0;
1243 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1244 SetFlags, WantResult);
1245 }
1246
1247 // Check if the mul can be folded into the instruction.
1248 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1249 if (isMulPowOf2(RHS)) {
1250 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1251 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1252
1253 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1254 if (C->getValue().isPowerOf2())
1255 std::swap(MulLHS, MulRHS);
1256
1257 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1258 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1259 Register RHSReg = getRegForValue(MulLHS);
1260 if (!RHSReg)
1261 return 0;
1262 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1263 ShiftVal, SetFlags, WantResult);
1264 if (ResultReg)
1265 return ResultReg;
1266 }
1267 }
1268
1269 // Check if the shift can be folded into the instruction.
1270 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1271 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1272 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1274 switch (SI->getOpcode()) {
1275 default: break;
1276 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1277 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1278 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1279 }
1280 uint64_t ShiftVal = C->getZExtValue();
1281 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1282 Register RHSReg = getRegForValue(SI->getOperand(0));
1283 if (!RHSReg)
1284 return 0;
1285 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1286 ShiftVal, SetFlags, WantResult);
1287 if (ResultReg)
1288 return ResultReg;
1289 }
1290 }
1291 }
1292 }
1293
1294 Register RHSReg = getRegForValue(RHS);
1295 if (!RHSReg)
1296 return 0;
1297
1298 if (NeedExtend)
1299 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1300
1301 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1302}
1303
1304unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1305 unsigned RHSReg, bool SetFlags,
1306 bool WantResult) {
1307 assert(LHSReg && RHSReg && "Invalid register number.");
1308
1309 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1310 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1311 return 0;
1312
1313 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1314 return 0;
1315
1316 static const unsigned OpcTable[2][2][2] = {
1317 { { AArch64::SUBWrr, AArch64::SUBXrr },
1318 { AArch64::ADDWrr, AArch64::ADDXrr } },
1319 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1320 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1321 };
1322 bool Is64Bit = RetVT == MVT::i64;
1323 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1324 const TargetRegisterClass *RC =
1325 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1326 unsigned ResultReg;
1327 if (WantResult)
1328 ResultReg = createResultReg(RC);
1329 else
1330 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1331
1332 const MCInstrDesc &II = TII.get(Opc);
1333 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1334 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1335 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1336 .addReg(LHSReg)
1337 .addReg(RHSReg);
1338 return ResultReg;
1339}
1340
1341unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1342 uint64_t Imm, bool SetFlags,
1343 bool WantResult) {
1344 assert(LHSReg && "Invalid register number.");
1345
1346 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1347 return 0;
1348
1349 unsigned ShiftImm;
1350 if (isUInt<12>(Imm))
1351 ShiftImm = 0;
1352 else if ((Imm & 0xfff000) == Imm) {
1353 ShiftImm = 12;
1354 Imm >>= 12;
1355 } else
1356 return 0;
1357
1358 static const unsigned OpcTable[2][2][2] = {
1359 { { AArch64::SUBWri, AArch64::SUBXri },
1360 { AArch64::ADDWri, AArch64::ADDXri } },
1361 { { AArch64::SUBSWri, AArch64::SUBSXri },
1362 { AArch64::ADDSWri, AArch64::ADDSXri } }
1363 };
1364 bool Is64Bit = RetVT == MVT::i64;
1365 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1366 const TargetRegisterClass *RC;
1367 if (SetFlags)
1368 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1369 else
1370 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1371 unsigned ResultReg;
1372 if (WantResult)
1373 ResultReg = createResultReg(RC);
1374 else
1375 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1376
1377 const MCInstrDesc &II = TII.get(Opc);
1378 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1379 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1380 .addReg(LHSReg)
1381 .addImm(Imm)
1382 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1383 return ResultReg;
1384}
1385
1386unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1387 unsigned RHSReg,
1389 uint64_t ShiftImm, bool SetFlags,
1390 bool WantResult) {
1391 assert(LHSReg && RHSReg && "Invalid register number.");
1392 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1393 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1394
1395 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1396 return 0;
1397
1398 // Don't deal with undefined shifts.
1399 if (ShiftImm >= RetVT.getSizeInBits())
1400 return 0;
1401
1402 static const unsigned OpcTable[2][2][2] = {
1403 { { AArch64::SUBWrs, AArch64::SUBXrs },
1404 { AArch64::ADDWrs, AArch64::ADDXrs } },
1405 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1406 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1407 };
1408 bool Is64Bit = RetVT == MVT::i64;
1409 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1410 const TargetRegisterClass *RC =
1411 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1412 unsigned ResultReg;
1413 if (WantResult)
1414 ResultReg = createResultReg(RC);
1415 else
1416 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1417
1418 const MCInstrDesc &II = TII.get(Opc);
1419 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1420 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1422 .addReg(LHSReg)
1423 .addReg(RHSReg)
1424 .addImm(getShifterImm(ShiftType, ShiftImm));
1425 return ResultReg;
1426}
1427
1428unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1429 unsigned RHSReg,
1431 uint64_t ShiftImm, bool SetFlags,
1432 bool WantResult) {
1433 assert(LHSReg && RHSReg && "Invalid register number.");
1434 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1435 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1436
1437 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1438 return 0;
1439
1440 if (ShiftImm >= 4)
1441 return 0;
1442
1443 static const unsigned OpcTable[2][2][2] = {
1444 { { AArch64::SUBWrx, AArch64::SUBXrx },
1445 { AArch64::ADDWrx, AArch64::ADDXrx } },
1446 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1447 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1448 };
1449 bool Is64Bit = RetVT == MVT::i64;
1450 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1451 const TargetRegisterClass *RC = nullptr;
1452 if (SetFlags)
1453 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1454 else
1455 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1456 unsigned ResultReg;
1457 if (WantResult)
1458 ResultReg = createResultReg(RC);
1459 else
1460 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1461
1462 const MCInstrDesc &II = TII.get(Opc);
1463 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1464 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1465 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1466 .addReg(LHSReg)
1467 .addReg(RHSReg)
1468 .addImm(getArithExtendImm(ExtType, ShiftImm));
1469 return ResultReg;
1470}
1471
1472bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1473 Type *Ty = LHS->getType();
1474 EVT EVT = TLI.getValueType(DL, Ty, true);
1475 if (!EVT.isSimple())
1476 return false;
1477 MVT VT = EVT.getSimpleVT();
1478
1479 switch (VT.SimpleTy) {
1480 default:
1481 return false;
1482 case MVT::i1:
1483 case MVT::i8:
1484 case MVT::i16:
1485 case MVT::i32:
1486 case MVT::i64:
1487 return emitICmp(VT, LHS, RHS, IsZExt);
1488 case MVT::f32:
1489 case MVT::f64:
1490 return emitFCmp(VT, LHS, RHS);
1491 }
1492}
1493
1494bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1495 bool IsZExt) {
1496 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1497 IsZExt) != 0;
1498}
1499
1500bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1501 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1502 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1503}
1504
1505bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1506 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1507 return false;
1508
1509 // Check to see if the 2nd operand is a constant that we can encode directly
1510 // in the compare.
1511 bool UseImm = false;
1512 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1513 if (CFP->isZero() && !CFP->isNegative())
1514 UseImm = true;
1515
1516 Register LHSReg = getRegForValue(LHS);
1517 if (!LHSReg)
1518 return false;
1519
1520 if (UseImm) {
1521 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1523 .addReg(LHSReg);
1524 return true;
1525 }
1526
1527 Register RHSReg = getRegForValue(RHS);
1528 if (!RHSReg)
1529 return false;
1530
1531 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1532 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1533 .addReg(LHSReg)
1534 .addReg(RHSReg);
1535 return true;
1536}
1537
1538unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1539 bool SetFlags, bool WantResult, bool IsZExt) {
1540 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1541 IsZExt);
1542}
1543
1544/// This method is a wrapper to simplify add emission.
1545///
1546/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1547/// that fails, then try to materialize the immediate into a register and use
1548/// emitAddSub_rr instead.
1549unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1550 unsigned ResultReg;
1551 if (Imm < 0)
1552 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1553 else
1554 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1555
1556 if (ResultReg)
1557 return ResultReg;
1558
1559 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1560 if (!CReg)
1561 return 0;
1562
1563 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1564 return ResultReg;
1565}
1566
1567unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1568 bool SetFlags, bool WantResult, bool IsZExt) {
1569 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1570 IsZExt);
1571}
1572
1573unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1574 unsigned RHSReg, bool WantResult) {
1575 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1576 /*SetFlags=*/true, WantResult);
1577}
1578
1579unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1580 unsigned RHSReg,
1582 uint64_t ShiftImm, bool WantResult) {
1583 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1584 ShiftImm, /*SetFlags=*/true, WantResult);
1585}
1586
1587unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1588 const Value *LHS, const Value *RHS) {
1589 // Canonicalize immediates to the RHS first.
1590 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1591 std::swap(LHS, RHS);
1592
1593 // Canonicalize mul by power-of-2 to the RHS.
1594 if (LHS->hasOneUse() && isValueAvailable(LHS))
1595 if (isMulPowOf2(LHS))
1596 std::swap(LHS, RHS);
1597
1598 // Canonicalize shift immediate to the RHS.
1599 if (LHS->hasOneUse() && isValueAvailable(LHS))
1600 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1601 if (isa<ConstantInt>(SI->getOperand(1)))
1602 std::swap(LHS, RHS);
1603
1604 Register LHSReg = getRegForValue(LHS);
1605 if (!LHSReg)
1606 return 0;
1607
1608 unsigned ResultReg = 0;
1609 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1610 uint64_t Imm = C->getZExtValue();
1611 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1612 }
1613 if (ResultReg)
1614 return ResultReg;
1615
1616 // Check if the mul can be folded into the instruction.
1617 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1618 if (isMulPowOf2(RHS)) {
1619 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1620 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1621
1622 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1623 if (C->getValue().isPowerOf2())
1624 std::swap(MulLHS, MulRHS);
1625
1626 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1627 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1628
1629 Register RHSReg = getRegForValue(MulLHS);
1630 if (!RHSReg)
1631 return 0;
1632 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1633 if (ResultReg)
1634 return ResultReg;
1635 }
1636 }
1637
1638 // Check if the shift can be folded into the instruction.
1639 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1640 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1641 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1642 uint64_t ShiftVal = C->getZExtValue();
1643 Register RHSReg = getRegForValue(SI->getOperand(0));
1644 if (!RHSReg)
1645 return 0;
1646 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1647 if (ResultReg)
1648 return ResultReg;
1649 }
1650 }
1651
1652 Register RHSReg = getRegForValue(RHS);
1653 if (!RHSReg)
1654 return 0;
1655
1656 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1657 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1658 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1659 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1660 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1661 }
1662 return ResultReg;
1663}
1664
1665unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1666 unsigned LHSReg, uint64_t Imm) {
1667 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1668 "ISD nodes are not consecutive!");
1669 static const unsigned OpcTable[3][2] = {
1670 { AArch64::ANDWri, AArch64::ANDXri },
1671 { AArch64::ORRWri, AArch64::ORRXri },
1672 { AArch64::EORWri, AArch64::EORXri }
1673 };
1674 const TargetRegisterClass *RC;
1675 unsigned Opc;
1676 unsigned RegSize;
1677 switch (RetVT.SimpleTy) {
1678 default:
1679 return 0;
1680 case MVT::i1:
1681 case MVT::i8:
1682 case MVT::i16:
1683 case MVT::i32: {
1684 unsigned Idx = ISDOpc - ISD::AND;
1685 Opc = OpcTable[Idx][0];
1686 RC = &AArch64::GPR32spRegClass;
1687 RegSize = 32;
1688 break;
1689 }
1690 case MVT::i64:
1691 Opc = OpcTable[ISDOpc - ISD::AND][1];
1692 RC = &AArch64::GPR64spRegClass;
1693 RegSize = 64;
1694 break;
1695 }
1696
1698 return 0;
1699
1700 Register ResultReg =
1701 fastEmitInst_ri(Opc, RC, LHSReg,
1703 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1704 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1705 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1706 }
1707 return ResultReg;
1708}
1709
1710unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1711 unsigned LHSReg, unsigned RHSReg,
1712 uint64_t ShiftImm) {
1713 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1714 "ISD nodes are not consecutive!");
1715 static const unsigned OpcTable[3][2] = {
1716 { AArch64::ANDWrs, AArch64::ANDXrs },
1717 { AArch64::ORRWrs, AArch64::ORRXrs },
1718 { AArch64::EORWrs, AArch64::EORXrs }
1719 };
1720
1721 // Don't deal with undefined shifts.
1722 if (ShiftImm >= RetVT.getSizeInBits())
1723 return 0;
1724
1725 const TargetRegisterClass *RC;
1726 unsigned Opc;
1727 switch (RetVT.SimpleTy) {
1728 default:
1729 return 0;
1730 case MVT::i1:
1731 case MVT::i8:
1732 case MVT::i16:
1733 case MVT::i32:
1734 Opc = OpcTable[ISDOpc - ISD::AND][0];
1735 RC = &AArch64::GPR32RegClass;
1736 break;
1737 case MVT::i64:
1738 Opc = OpcTable[ISDOpc - ISD::AND][1];
1739 RC = &AArch64::GPR64RegClass;
1740 break;
1741 }
1742 Register ResultReg =
1743 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1745 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1746 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1747 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1748 }
1749 return ResultReg;
1750}
1751
1752unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1753 uint64_t Imm) {
1754 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1755}
1756
1757unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1758 bool WantZExt, MachineMemOperand *MMO) {
1759 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1760 return 0;
1761
1762 // Simplify this down to something we can handle.
1763 if (!simplifyAddress(Addr, VT))
1764 return 0;
1765
1766 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1767 if (!ScaleFactor)
1768 llvm_unreachable("Unexpected value type.");
1769
1770 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1771 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1772 bool UseScaled = true;
1773 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1774 UseScaled = false;
1775 ScaleFactor = 1;
1776 }
1777
1778 static const unsigned GPOpcTable[2][8][4] = {
1779 // Sign-extend.
1780 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1781 AArch64::LDURXi },
1782 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1783 AArch64::LDURXi },
1784 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1785 AArch64::LDRXui },
1786 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1787 AArch64::LDRXui },
1788 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1789 AArch64::LDRXroX },
1790 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1791 AArch64::LDRXroX },
1792 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1793 AArch64::LDRXroW },
1794 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1795 AArch64::LDRXroW }
1796 },
1797 // Zero-extend.
1798 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1799 AArch64::LDURXi },
1800 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1801 AArch64::LDURXi },
1802 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1803 AArch64::LDRXui },
1804 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1805 AArch64::LDRXui },
1806 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1807 AArch64::LDRXroX },
1808 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1809 AArch64::LDRXroX },
1810 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1811 AArch64::LDRXroW },
1812 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1813 AArch64::LDRXroW }
1814 }
1815 };
1816
1817 static const unsigned FPOpcTable[4][2] = {
1818 { AArch64::LDURSi, AArch64::LDURDi },
1819 { AArch64::LDRSui, AArch64::LDRDui },
1820 { AArch64::LDRSroX, AArch64::LDRDroX },
1821 { AArch64::LDRSroW, AArch64::LDRDroW }
1822 };
1823
1824 unsigned Opc;
1825 const TargetRegisterClass *RC;
1826 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1827 Addr.getOffsetReg();
1828 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1829 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1830 Addr.getExtendType() == AArch64_AM::SXTW)
1831 Idx++;
1832
1833 bool IsRet64Bit = RetVT == MVT::i64;
1834 switch (VT.SimpleTy) {
1835 default:
1836 llvm_unreachable("Unexpected value type.");
1837 case MVT::i1: // Intentional fall-through.
1838 case MVT::i8:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i16:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1845 RC = (IsRet64Bit && !WantZExt) ?
1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847 break;
1848 case MVT::i32:
1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1850 RC = (IsRet64Bit && !WantZExt) ?
1851 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1852 break;
1853 case MVT::i64:
1854 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1855 RC = &AArch64::GPR64RegClass;
1856 break;
1857 case MVT::f32:
1858 Opc = FPOpcTable[Idx][0];
1859 RC = &AArch64::FPR32RegClass;
1860 break;
1861 case MVT::f64:
1862 Opc = FPOpcTable[Idx][1];
1863 RC = &AArch64::FPR64RegClass;
1864 break;
1865 }
1866
1867 // Create the base instruction, then add the operands.
1868 Register ResultReg = createResultReg(RC);
1869 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1870 TII.get(Opc), ResultReg);
1871 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1872
1873 // Loading an i1 requires special handling.
1874 if (VT == MVT::i1) {
1875 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1876 assert(ANDReg && "Unexpected AND instruction emission failure.");
1877 ResultReg = ANDReg;
1878 }
1879
1880 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1881 // the 32bit reg to a 64bit reg.
1882 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1883 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1884 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1885 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1886 .addImm(0)
1887 .addReg(ResultReg, getKillRegState(true))
1888 .addImm(AArch64::sub_32);
1889 ResultReg = Reg64;
1890 }
1891 return ResultReg;
1892}
1893
1894bool AArch64FastISel::selectAddSub(const Instruction *I) {
1895 MVT VT;
1896 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1897 return false;
1898
1899 if (VT.isVector())
1900 return selectOperator(I, I->getOpcode());
1901
1902 unsigned ResultReg;
1903 switch (I->getOpcode()) {
1904 default:
1905 llvm_unreachable("Unexpected instruction.");
1906 case Instruction::Add:
1907 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1908 break;
1909 case Instruction::Sub:
1910 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1911 break;
1912 }
1913 if (!ResultReg)
1914 return false;
1915
1916 updateValueMap(I, ResultReg);
1917 return true;
1918}
1919
1920bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1921 MVT VT;
1922 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1923 return false;
1924
1925 if (VT.isVector())
1926 return selectOperator(I, I->getOpcode());
1927
1928 unsigned ResultReg;
1929 switch (I->getOpcode()) {
1930 default:
1931 llvm_unreachable("Unexpected instruction.");
1932 case Instruction::And:
1933 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1934 break;
1935 case Instruction::Or:
1936 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1937 break;
1938 case Instruction::Xor:
1939 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1940 break;
1941 }
1942 if (!ResultReg)
1943 return false;
1944
1945 updateValueMap(I, ResultReg);
1946 return true;
1947}
1948
1949bool AArch64FastISel::selectLoad(const Instruction *I) {
1950 MVT VT;
1951 // Verify we have a legal type before going any further. Currently, we handle
1952 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1953 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1954 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1955 cast<LoadInst>(I)->isAtomic())
1956 return false;
1957
1958 const Value *SV = I->getOperand(0);
1959 if (TLI.supportSwiftError()) {
1960 // Swifterror values can come from either a function parameter with
1961 // swifterror attribute or an alloca with swifterror attribute.
1962 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1963 if (Arg->hasSwiftErrorAttr())
1964 return false;
1965 }
1966
1967 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1968 if (Alloca->isSwiftError())
1969 return false;
1970 }
1971 }
1972
1973 // See if we can handle this address.
1974 Address Addr;
1975 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1976 return false;
1977
1978 // Fold the following sign-/zero-extend into the load instruction.
1979 bool WantZExt = true;
1980 MVT RetVT = VT;
1981 const Value *IntExtVal = nullptr;
1982 if (I->hasOneUse()) {
1983 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1984 if (isTypeSupported(ZE->getType(), RetVT))
1985 IntExtVal = ZE;
1986 else
1987 RetVT = VT;
1988 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1989 if (isTypeSupported(SE->getType(), RetVT))
1990 IntExtVal = SE;
1991 else
1992 RetVT = VT;
1993 WantZExt = false;
1994 }
1995 }
1996
1997 unsigned ResultReg =
1998 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1999 if (!ResultReg)
2000 return false;
2001
2002 // There are a few different cases we have to handle, because the load or the
2003 // sign-/zero-extend might not be selected by FastISel if we fall-back to
2004 // SelectionDAG. There is also an ordering issue when both instructions are in
2005 // different basic blocks.
2006 // 1.) The load instruction is selected by FastISel, but the integer extend
2007 // not. This usually happens when the integer extend is in a different
2008 // basic block and SelectionDAG took over for that basic block.
2009 // 2.) The load instruction is selected before the integer extend. This only
2010 // happens when the integer extend is in a different basic block.
2011 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2012 // by FastISel. This happens if there are instructions between the load
2013 // and the integer extend that couldn't be selected by FastISel.
2014 if (IntExtVal) {
2015 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2016 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2017 // it when it selects the integer extend.
2018 Register Reg = lookUpRegForValue(IntExtVal);
2019 auto *MI = MRI.getUniqueVRegDef(Reg);
2020 if (!MI) {
2021 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2022 if (WantZExt) {
2023 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2024 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2025 ResultReg = std::prev(I)->getOperand(0).getReg();
2026 removeDeadCode(I, std::next(I));
2027 } else
2028 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2029 AArch64::sub_32);
2030 }
2031 updateValueMap(I, ResultReg);
2032 return true;
2033 }
2034
2035 // The integer extend has already been emitted - delete all the instructions
2036 // that have been emitted by the integer extend lowering code and use the
2037 // result from the load instruction directly.
2038 while (MI) {
2039 Reg = 0;
2040 for (auto &Opnd : MI->uses()) {
2041 if (Opnd.isReg()) {
2042 Reg = Opnd.getReg();
2043 break;
2044 }
2045 }
2047 removeDeadCode(I, std::next(I));
2048 MI = nullptr;
2049 if (Reg)
2050 MI = MRI.getUniqueVRegDef(Reg);
2051 }
2052 updateValueMap(IntExtVal, ResultReg);
2053 return true;
2054 }
2055
2056 updateValueMap(I, ResultReg);
2057 return true;
2058}
2059
2060bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2061 unsigned AddrReg,
2062 MachineMemOperand *MMO) {
2063 unsigned Opc;
2064 switch (VT.SimpleTy) {
2065 default: return false;
2066 case MVT::i8: Opc = AArch64::STLRB; break;
2067 case MVT::i16: Opc = AArch64::STLRH; break;
2068 case MVT::i32: Opc = AArch64::STLRW; break;
2069 case MVT::i64: Opc = AArch64::STLRX; break;
2070 }
2071
2072 const MCInstrDesc &II = TII.get(Opc);
2073 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2074 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2075 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2076 .addReg(SrcReg)
2077 .addReg(AddrReg)
2078 .addMemOperand(MMO);
2079 return true;
2080}
2081
2082bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2083 MachineMemOperand *MMO) {
2084 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2085 return false;
2086
2087 // Simplify this down to something we can handle.
2088 if (!simplifyAddress(Addr, VT))
2089 return false;
2090
2091 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2092 if (!ScaleFactor)
2093 llvm_unreachable("Unexpected value type.");
2094
2095 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2096 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2097 bool UseScaled = true;
2098 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2099 UseScaled = false;
2100 ScaleFactor = 1;
2101 }
2102
2103 static const unsigned OpcTable[4][6] = {
2104 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2105 AArch64::STURSi, AArch64::STURDi },
2106 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2107 AArch64::STRSui, AArch64::STRDui },
2108 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2109 AArch64::STRSroX, AArch64::STRDroX },
2110 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2111 AArch64::STRSroW, AArch64::STRDroW }
2112 };
2113
2114 unsigned Opc;
2115 bool VTIsi1 = false;
2116 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2117 Addr.getOffsetReg();
2118 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2119 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2120 Addr.getExtendType() == AArch64_AM::SXTW)
2121 Idx++;
2122
2123 switch (VT.SimpleTy) {
2124 default: llvm_unreachable("Unexpected value type.");
2125 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2126 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2127 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2128 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2129 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2130 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2131 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2132 }
2133
2134 // Storing an i1 requires special handling.
2135 if (VTIsi1 && SrcReg != AArch64::WZR) {
2136 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2137 assert(ANDReg && "Unexpected AND instruction emission failure.");
2138 SrcReg = ANDReg;
2139 }
2140 // Create the base instruction, then add the operands.
2141 const MCInstrDesc &II = TII.get(Opc);
2142 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2144 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2145 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2146
2147 return true;
2148}
2149
2150bool AArch64FastISel::selectStore(const Instruction *I) {
2151 MVT VT;
2152 const Value *Op0 = I->getOperand(0);
2153 // Verify we have a legal type before going any further. Currently, we handle
2154 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2155 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2156 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2157 return false;
2158
2159 const Value *PtrV = I->getOperand(1);
2160 if (TLI.supportSwiftError()) {
2161 // Swifterror values can come from either a function parameter with
2162 // swifterror attribute or an alloca with swifterror attribute.
2163 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2164 if (Arg->hasSwiftErrorAttr())
2165 return false;
2166 }
2167
2168 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2169 if (Alloca->isSwiftError())
2170 return false;
2171 }
2172 }
2173
2174 // Get the value to be stored into a register. Use the zero register directly
2175 // when possible to avoid an unnecessary copy and a wasted register.
2176 unsigned SrcReg = 0;
2177 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2178 if (CI->isZero())
2179 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2180 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2181 if (CF->isZero() && !CF->isNegative()) {
2183 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2184 }
2185 }
2186
2187 if (!SrcReg)
2188 SrcReg = getRegForValue(Op0);
2189
2190 if (!SrcReg)
2191 return false;
2192
2193 auto *SI = cast<StoreInst>(I);
2194
2195 // Try to emit a STLR for seq_cst/release.
2196 if (SI->isAtomic()) {
2197 AtomicOrdering Ord = SI->getOrdering();
2198 // The non-atomic instructions are sufficient for relaxed stores.
2199 if (isReleaseOrStronger(Ord)) {
2200 // The STLR addressing mode only supports a base reg; pass that directly.
2201 Register AddrReg = getRegForValue(PtrV);
2202 return emitStoreRelease(VT, SrcReg, AddrReg,
2203 createMachineMemOperandFor(I));
2204 }
2205 }
2206
2207 // See if we can handle this address.
2208 Address Addr;
2209 if (!computeAddress(PtrV, Addr, Op0->getType()))
2210 return false;
2211
2212 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2213 return false;
2214 return true;
2215}
2216
2218 switch (Pred) {
2219 case CmpInst::FCMP_ONE:
2220 case CmpInst::FCMP_UEQ:
2221 default:
2222 // AL is our "false" for now. The other two need more compares.
2223 return AArch64CC::AL;
2224 case CmpInst::ICMP_EQ:
2225 case CmpInst::FCMP_OEQ:
2226 return AArch64CC::EQ;
2227 case CmpInst::ICMP_SGT:
2228 case CmpInst::FCMP_OGT:
2229 return AArch64CC::GT;
2230 case CmpInst::ICMP_SGE:
2231 case CmpInst::FCMP_OGE:
2232 return AArch64CC::GE;
2233 case CmpInst::ICMP_UGT:
2234 case CmpInst::FCMP_UGT:
2235 return AArch64CC::HI;
2236 case CmpInst::FCMP_OLT:
2237 return AArch64CC::MI;
2238 case CmpInst::ICMP_ULE:
2239 case CmpInst::FCMP_OLE:
2240 return AArch64CC::LS;
2241 case CmpInst::FCMP_ORD:
2242 return AArch64CC::VC;
2243 case CmpInst::FCMP_UNO:
2244 return AArch64CC::VS;
2245 case CmpInst::FCMP_UGE:
2246 return AArch64CC::PL;
2247 case CmpInst::ICMP_SLT:
2248 case CmpInst::FCMP_ULT:
2249 return AArch64CC::LT;
2250 case CmpInst::ICMP_SLE:
2251 case CmpInst::FCMP_ULE:
2252 return AArch64CC::LE;
2253 case CmpInst::FCMP_UNE:
2254 case CmpInst::ICMP_NE:
2255 return AArch64CC::NE;
2256 case CmpInst::ICMP_UGE:
2257 return AArch64CC::HS;
2258 case CmpInst::ICMP_ULT:
2259 return AArch64CC::LO;
2260 }
2261}
2262
2263/// Try to emit a combined compare-and-branch instruction.
2264bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2265 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2266 // will not be produced, as they are conditional branch instructions that do
2267 // not set flags.
2268 if (FuncInfo.MF->getFunction().hasFnAttribute(
2269 Attribute::SpeculativeLoadHardening))
2270 return false;
2271
2272 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2273 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2274 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2275
2276 const Value *LHS = CI->getOperand(0);
2277 const Value *RHS = CI->getOperand(1);
2278
2279 MVT VT;
2280 if (!isTypeSupported(LHS->getType(), VT))
2281 return false;
2282
2283 unsigned BW = VT.getSizeInBits();
2284 if (BW > 64)
2285 return false;
2286
2287 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2288 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2289
2290 // Try to take advantage of fallthrough opportunities.
2291 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2292 std::swap(TBB, FBB);
2294 }
2295
2296 int TestBit = -1;
2297 bool IsCmpNE;
2298 switch (Predicate) {
2299 default:
2300 return false;
2301 case CmpInst::ICMP_EQ:
2302 case CmpInst::ICMP_NE:
2303 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2304 std::swap(LHS, RHS);
2305
2306 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2307 return false;
2308
2309 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2310 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2311 const Value *AndLHS = AI->getOperand(0);
2312 const Value *AndRHS = AI->getOperand(1);
2313
2314 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2315 if (C->getValue().isPowerOf2())
2316 std::swap(AndLHS, AndRHS);
2317
2318 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2319 if (C->getValue().isPowerOf2()) {
2320 TestBit = C->getValue().logBase2();
2321 LHS = AndLHS;
2322 }
2323 }
2324
2325 if (VT == MVT::i1)
2326 TestBit = 0;
2327
2328 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2329 break;
2330 case CmpInst::ICMP_SLT:
2331 case CmpInst::ICMP_SGE:
2332 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2333 return false;
2334
2335 TestBit = BW - 1;
2336 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2337 break;
2338 case CmpInst::ICMP_SGT:
2339 case CmpInst::ICMP_SLE:
2340 if (!isa<ConstantInt>(RHS))
2341 return false;
2342
2343 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2344 return false;
2345
2346 TestBit = BW - 1;
2347 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2348 break;
2349 } // end switch
2350
2351 static const unsigned OpcTable[2][2][2] = {
2352 { {AArch64::CBZW, AArch64::CBZX },
2353 {AArch64::CBNZW, AArch64::CBNZX} },
2354 { {AArch64::TBZW, AArch64::TBZX },
2355 {AArch64::TBNZW, AArch64::TBNZX} }
2356 };
2357
2358 bool IsBitTest = TestBit != -1;
2359 bool Is64Bit = BW == 64;
2360 if (TestBit < 32 && TestBit >= 0)
2361 Is64Bit = false;
2362
2363 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2364 const MCInstrDesc &II = TII.get(Opc);
2365
2366 Register SrcReg = getRegForValue(LHS);
2367 if (!SrcReg)
2368 return false;
2369
2370 if (BW == 64 && !Is64Bit)
2371 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2372
2373 if ((BW < 32) && !IsBitTest)
2374 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2375
2376 // Emit the combined compare and branch instruction.
2377 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2379 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2380 .addReg(SrcReg);
2381 if (IsBitTest)
2382 MIB.addImm(TestBit);
2383 MIB.addMBB(TBB);
2384
2385 finishCondBranch(BI->getParent(), TBB, FBB);
2386 return true;
2387}
2388
2389bool AArch64FastISel::selectBranch(const Instruction *I) {
2390 const BranchInst *BI = cast<BranchInst>(I);
2391 if (BI->isUnconditional()) {
2392 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393 fastEmitBranch(MSucc, BI->getDebugLoc());
2394 return true;
2395 }
2396
2397 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2398 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2399
2400 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2401 if (CI->hasOneUse() && isValueAvailable(CI)) {
2402 // Try to optimize or fold the cmp.
2403 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2404 switch (Predicate) {
2405 default:
2406 break;
2408 fastEmitBranch(FBB, MIMD.getDL());
2409 return true;
2410 case CmpInst::FCMP_TRUE:
2411 fastEmitBranch(TBB, MIMD.getDL());
2412 return true;
2413 }
2414
2415 // Try to emit a combined compare-and-branch first.
2416 if (emitCompareAndBranch(BI))
2417 return true;
2418
2419 // Try to take advantage of fallthrough opportunities.
2420 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2421 std::swap(TBB, FBB);
2423 }
2424
2425 // Emit the cmp.
2426 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2427 return false;
2428
2429 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2430 // instruction.
2431 AArch64CC::CondCode CC = getCompareCC(Predicate);
2433 switch (Predicate) {
2434 default:
2435 break;
2436 case CmpInst::FCMP_UEQ:
2437 ExtraCC = AArch64CC::EQ;
2438 CC = AArch64CC::VS;
2439 break;
2440 case CmpInst::FCMP_ONE:
2441 ExtraCC = AArch64CC::MI;
2442 CC = AArch64CC::GT;
2443 break;
2444 }
2445 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2446
2447 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2448 if (ExtraCC != AArch64CC::AL) {
2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450 .addImm(ExtraCC)
2451 .addMBB(TBB);
2452 }
2453
2454 // Emit the branch.
2455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2456 .addImm(CC)
2457 .addMBB(TBB);
2458
2459 finishCondBranch(BI->getParent(), TBB, FBB);
2460 return true;
2461 }
2462 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2463 uint64_t Imm = CI->getZExtValue();
2464 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2465 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2466 .addMBB(Target);
2467
2468 // Obtain the branch probability and add the target to the successor list.
2469 if (FuncInfo.BPI) {
2470 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2471 BI->getParent(), Target->getBasicBlock());
2472 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2473 } else
2474 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2475 return true;
2476 } else {
2478 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2479 // Fake request the condition, otherwise the intrinsic might be completely
2480 // optimized away.
2481 Register CondReg = getRegForValue(BI->getCondition());
2482 if (!CondReg)
2483 return false;
2484
2485 // Emit the branch.
2486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2487 .addImm(CC)
2488 .addMBB(TBB);
2489
2490 finishCondBranch(BI->getParent(), TBB, FBB);
2491 return true;
2492 }
2493 }
2494
2495 Register CondReg = getRegForValue(BI->getCondition());
2496 if (CondReg == 0)
2497 return false;
2498
2499 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2500 unsigned Opcode = AArch64::TBNZW;
2501 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2502 std::swap(TBB, FBB);
2503 Opcode = AArch64::TBZW;
2504 }
2505
2506 const MCInstrDesc &II = TII.get(Opcode);
2507 Register ConstrainedCondReg
2508 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2509 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2510 .addReg(ConstrainedCondReg)
2511 .addImm(0)
2512 .addMBB(TBB);
2513
2514 finishCondBranch(BI->getParent(), TBB, FBB);
2515 return true;
2516}
2517
2518bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2519 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2520 Register AddrReg = getRegForValue(BI->getOperand(0));
2521 if (AddrReg == 0)
2522 return false;
2523
2524 // Emit the indirect branch.
2525 const MCInstrDesc &II = TII.get(AArch64::BR);
2526 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2528
2529 // Make sure the CFG is up-to-date.
2530 for (const auto *Succ : BI->successors())
2531 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2532
2533 return true;
2534}
2535
2536bool AArch64FastISel::selectCmp(const Instruction *I) {
2537 const CmpInst *CI = cast<CmpInst>(I);
2538
2539 // Vectors of i1 are weird: bail out.
2540 if (CI->getType()->isVectorTy())
2541 return false;
2542
2543 // Try to optimize or fold the cmp.
2544 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2545 unsigned ResultReg = 0;
2546 switch (Predicate) {
2547 default:
2548 break;
2550 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2551 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2552 TII.get(TargetOpcode::COPY), ResultReg)
2553 .addReg(AArch64::WZR, getKillRegState(true));
2554 break;
2555 case CmpInst::FCMP_TRUE:
2556 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2557 break;
2558 }
2559
2560 if (ResultReg) {
2561 updateValueMap(I, ResultReg);
2562 return true;
2563 }
2564
2565 // Emit the cmp.
2566 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2567 return false;
2568
2569 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2570
2571 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2572 // condition codes are inverted, because they are used by CSINC.
2573 static unsigned CondCodeTable[2][2] = {
2576 };
2577 unsigned *CondCodes = nullptr;
2578 switch (Predicate) {
2579 default:
2580 break;
2581 case CmpInst::FCMP_UEQ:
2582 CondCodes = &CondCodeTable[0][0];
2583 break;
2584 case CmpInst::FCMP_ONE:
2585 CondCodes = &CondCodeTable[1][0];
2586 break;
2587 }
2588
2589 if (CondCodes) {
2590 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2591 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2592 TmpReg1)
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addReg(AArch64::WZR, getKillRegState(true))
2595 .addImm(CondCodes[0]);
2596 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2597 ResultReg)
2598 .addReg(TmpReg1, getKillRegState(true))
2599 .addReg(AArch64::WZR, getKillRegState(true))
2600 .addImm(CondCodes[1]);
2601
2602 updateValueMap(I, ResultReg);
2603 return true;
2604 }
2605
2606 // Now set a register based on the comparison.
2607 AArch64CC::CondCode CC = getCompareCC(Predicate);
2608 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2609 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2611 ResultReg)
2612 .addReg(AArch64::WZR, getKillRegState(true))
2613 .addReg(AArch64::WZR, getKillRegState(true))
2614 .addImm(invertedCC);
2615
2616 updateValueMap(I, ResultReg);
2617 return true;
2618}
2619
2620/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2621/// value.
2622bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2623 if (!SI->getType()->isIntegerTy(1))
2624 return false;
2625
2626 const Value *Src1Val, *Src2Val;
2627 unsigned Opc = 0;
2628 bool NeedExtraOp = false;
2629 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2630 if (CI->isOne()) {
2631 Src1Val = SI->getCondition();
2632 Src2Val = SI->getFalseValue();
2633 Opc = AArch64::ORRWrr;
2634 } else {
2635 assert(CI->isZero());
2636 Src1Val = SI->getFalseValue();
2637 Src2Val = SI->getCondition();
2638 Opc = AArch64::BICWrr;
2639 }
2640 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2641 if (CI->isOne()) {
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ORRWrr;
2645 NeedExtraOp = true;
2646 } else {
2647 assert(CI->isZero());
2648 Src1Val = SI->getCondition();
2649 Src2Val = SI->getTrueValue();
2650 Opc = AArch64::ANDWrr;
2651 }
2652 }
2653
2654 if (!Opc)
2655 return false;
2656
2657 Register Src1Reg = getRegForValue(Src1Val);
2658 if (!Src1Reg)
2659 return false;
2660
2661 Register Src2Reg = getRegForValue(Src2Val);
2662 if (!Src2Reg)
2663 return false;
2664
2665 if (NeedExtraOp)
2666 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2667
2668 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2669 Src2Reg);
2670 updateValueMap(SI, ResultReg);
2671 return true;
2672}
2673
2674bool AArch64FastISel::selectSelect(const Instruction *I) {
2675 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2676 MVT VT;
2677 if (!isTypeSupported(I->getType(), VT))
2678 return false;
2679
2680 unsigned Opc;
2681 const TargetRegisterClass *RC;
2682 switch (VT.SimpleTy) {
2683 default:
2684 return false;
2685 case MVT::i1:
2686 case MVT::i8:
2687 case MVT::i16:
2688 case MVT::i32:
2689 Opc = AArch64::CSELWr;
2690 RC = &AArch64::GPR32RegClass;
2691 break;
2692 case MVT::i64:
2693 Opc = AArch64::CSELXr;
2694 RC = &AArch64::GPR64RegClass;
2695 break;
2696 case MVT::f32:
2697 Opc = AArch64::FCSELSrrr;
2698 RC = &AArch64::FPR32RegClass;
2699 break;
2700 case MVT::f64:
2701 Opc = AArch64::FCSELDrrr;
2702 RC = &AArch64::FPR64RegClass;
2703 break;
2704 }
2705
2706 const SelectInst *SI = cast<SelectInst>(I);
2707 const Value *Cond = SI->getCondition();
2710
2711 if (optimizeSelect(SI))
2712 return true;
2713
2714 // Try to pickup the flags, so we don't have to emit another compare.
2715 if (foldXALUIntrinsic(CC, I, Cond)) {
2716 // Fake request the condition to force emission of the XALU intrinsic.
2717 Register CondReg = getRegForValue(Cond);
2718 if (!CondReg)
2719 return false;
2720 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2721 isValueAvailable(Cond)) {
2722 const auto *Cmp = cast<CmpInst>(Cond);
2723 // Try to optimize or fold the cmp.
2724 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2725 const Value *FoldSelect = nullptr;
2726 switch (Predicate) {
2727 default:
2728 break;
2730 FoldSelect = SI->getFalseValue();
2731 break;
2732 case CmpInst::FCMP_TRUE:
2733 FoldSelect = SI->getTrueValue();
2734 break;
2735 }
2736
2737 if (FoldSelect) {
2738 Register SrcReg = getRegForValue(FoldSelect);
2739 if (!SrcReg)
2740 return false;
2741
2742 updateValueMap(I, SrcReg);
2743 return true;
2744 }
2745
2746 // Emit the cmp.
2747 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2748 return false;
2749
2750 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2751 CC = getCompareCC(Predicate);
2752 switch (Predicate) {
2753 default:
2754 break;
2755 case CmpInst::FCMP_UEQ:
2756 ExtraCC = AArch64CC::EQ;
2757 CC = AArch64CC::VS;
2758 break;
2759 case CmpInst::FCMP_ONE:
2760 ExtraCC = AArch64CC::MI;
2761 CC = AArch64CC::GT;
2762 break;
2763 }
2764 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2765 } else {
2766 Register CondReg = getRegForValue(Cond);
2767 if (!CondReg)
2768 return false;
2769
2770 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2771 CondReg = constrainOperandRegClass(II, CondReg, 1);
2772
2773 // Emit a TST instruction (ANDS wzr, reg, #imm).
2774 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2775 AArch64::WZR)
2776 .addReg(CondReg)
2778 }
2779
2780 Register Src1Reg = getRegForValue(SI->getTrueValue());
2781 Register Src2Reg = getRegForValue(SI->getFalseValue());
2782
2783 if (!Src1Reg || !Src2Reg)
2784 return false;
2785
2786 if (ExtraCC != AArch64CC::AL)
2787 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2788
2789 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2790 updateValueMap(I, ResultReg);
2791 return true;
2792}
2793
2794bool AArch64FastISel::selectFPExt(const Instruction *I) {
2795 Value *V = I->getOperand(0);
2796 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2797 return false;
2798
2799 Register Op = getRegForValue(V);
2800 if (Op == 0)
2801 return false;
2802
2803 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2804 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2805 ResultReg).addReg(Op);
2806 updateValueMap(I, ResultReg);
2807 return true;
2808}
2809
2810bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2811 Value *V = I->getOperand(0);
2812 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2813 return false;
2814
2815 Register Op = getRegForValue(V);
2816 if (Op == 0)
2817 return false;
2818
2819 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2820 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2821 ResultReg).addReg(Op);
2822 updateValueMap(I, ResultReg);
2823 return true;
2824}
2825
2826// FPToUI and FPToSI
2827bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2828 MVT DestVT;
2829 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2830 return false;
2831
2832 Register SrcReg = getRegForValue(I->getOperand(0));
2833 if (SrcReg == 0)
2834 return false;
2835
2836 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2837 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2838 return false;
2839
2840 unsigned Opc;
2841 if (SrcVT == MVT::f64) {
2842 if (Signed)
2843 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2844 else
2845 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2846 } else {
2847 if (Signed)
2848 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2849 else
2850 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2851 }
2852 Register ResultReg = createResultReg(
2853 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2855 .addReg(SrcReg);
2856 updateValueMap(I, ResultReg);
2857 return true;
2858}
2859
2860bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2861 MVT DestVT;
2862 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2863 return false;
2864 // Let regular ISEL handle FP16
2865 if (DestVT == MVT::f16)
2866 return false;
2867
2868 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2869 "Unexpected value type.");
2870
2871 Register SrcReg = getRegForValue(I->getOperand(0));
2872 if (!SrcReg)
2873 return false;
2874
2875 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2876
2877 // Handle sign-extension.
2878 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2879 SrcReg =
2880 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2881 if (!SrcReg)
2882 return false;
2883 }
2884
2885 unsigned Opc;
2886 if (SrcVT == MVT::i64) {
2887 if (Signed)
2888 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2889 else
2890 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2891 } else {
2892 if (Signed)
2893 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2894 else
2895 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2896 }
2897
2898 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2899 updateValueMap(I, ResultReg);
2900 return true;
2901}
2902
2903bool AArch64FastISel::fastLowerArguments() {
2904 if (!FuncInfo.CanLowerReturn)
2905 return false;
2906
2907 const Function *F = FuncInfo.Fn;
2908 if (F->isVarArg())
2909 return false;
2910
2911 CallingConv::ID CC = F->getCallingConv();
2913 return false;
2914
2915 if (Subtarget->hasCustomCallingConv())
2916 return false;
2917
2918 // Only handle simple cases of up to 8 GPR and FPR each.
2919 unsigned GPRCnt = 0;
2920 unsigned FPRCnt = 0;
2921 for (auto const &Arg : F->args()) {
2922 if (Arg.hasAttribute(Attribute::ByVal) ||
2923 Arg.hasAttribute(Attribute::InReg) ||
2924 Arg.hasAttribute(Attribute::StructRet) ||
2925 Arg.hasAttribute(Attribute::SwiftSelf) ||
2926 Arg.hasAttribute(Attribute::SwiftAsync) ||
2927 Arg.hasAttribute(Attribute::SwiftError) ||
2928 Arg.hasAttribute(Attribute::Nest))
2929 return false;
2930
2931 Type *ArgTy = Arg.getType();
2932 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2933 return false;
2934
2935 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2936 if (!ArgVT.isSimple())
2937 return false;
2938
2939 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2940 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2941 return false;
2942
2943 if (VT.isVector() &&
2944 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2945 return false;
2946
2947 if (VT >= MVT::i1 && VT <= MVT::i64)
2948 ++GPRCnt;
2949 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2950 VT.is128BitVector())
2951 ++FPRCnt;
2952 else
2953 return false;
2954
2955 if (GPRCnt > 8 || FPRCnt > 8)
2956 return false;
2957 }
2958
2959 static const MCPhysReg Registers[6][8] = {
2960 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2961 AArch64::W5, AArch64::W6, AArch64::W7 },
2962 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2963 AArch64::X5, AArch64::X6, AArch64::X7 },
2964 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2965 AArch64::H5, AArch64::H6, AArch64::H7 },
2966 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2967 AArch64::S5, AArch64::S6, AArch64::S7 },
2968 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2969 AArch64::D5, AArch64::D6, AArch64::D7 },
2970 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2971 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2972 };
2973
2974 unsigned GPRIdx = 0;
2975 unsigned FPRIdx = 0;
2976 for (auto const &Arg : F->args()) {
2977 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2978 unsigned SrcReg;
2979 const TargetRegisterClass *RC;
2980 if (VT >= MVT::i1 && VT <= MVT::i32) {
2981 SrcReg = Registers[0][GPRIdx++];
2982 RC = &AArch64::GPR32RegClass;
2983 VT = MVT::i32;
2984 } else if (VT == MVT::i64) {
2985 SrcReg = Registers[1][GPRIdx++];
2986 RC = &AArch64::GPR64RegClass;
2987 } else if (VT == MVT::f16) {
2988 SrcReg = Registers[2][FPRIdx++];
2989 RC = &AArch64::FPR16RegClass;
2990 } else if (VT == MVT::f32) {
2991 SrcReg = Registers[3][FPRIdx++];
2992 RC = &AArch64::FPR32RegClass;
2993 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2994 SrcReg = Registers[4][FPRIdx++];
2995 RC = &AArch64::FPR64RegClass;
2996 } else if (VT.is128BitVector()) {
2997 SrcReg = Registers[5][FPRIdx++];
2998 RC = &AArch64::FPR128RegClass;
2999 } else
3000 llvm_unreachable("Unexpected value type.");
3001
3002 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3003 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3004 // Without this, EmitLiveInCopies may eliminate the livein if its only
3005 // use is a bitcast (which isn't turned into an instruction).
3006 Register ResultReg = createResultReg(RC);
3007 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3008 TII.get(TargetOpcode::COPY), ResultReg)
3009 .addReg(DstReg, getKillRegState(true));
3010 updateValueMap(&Arg, ResultReg);
3011 }
3012 return true;
3013}
3014
3015bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3016 SmallVectorImpl<MVT> &OutVTs,
3017 unsigned &NumBytes) {
3018 CallingConv::ID CC = CLI.CallConv;
3020 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3021 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3022
3023 // Get a count of how many bytes are to be pushed on the stack.
3024 NumBytes = CCInfo.getNextStackOffset();
3025
3026 // Issue CALLSEQ_START
3027 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3028 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3029 .addImm(NumBytes).addImm(0);
3030
3031 // Process the args.
3032 for (CCValAssign &VA : ArgLocs) {
3033 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3034 MVT ArgVT = OutVTs[VA.getValNo()];
3035
3036 Register ArgReg = getRegForValue(ArgVal);
3037 if (!ArgReg)
3038 return false;
3039
3040 // Handle arg promotion: SExt, ZExt, AExt.
3041 switch (VA.getLocInfo()) {
3042 case CCValAssign::Full:
3043 break;
3044 case CCValAssign::SExt: {
3045 MVT DestVT = VA.getLocVT();
3046 MVT SrcVT = ArgVT;
3047 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3048 if (!ArgReg)
3049 return false;
3050 break;
3051 }
3052 case CCValAssign::AExt:
3053 // Intentional fall-through.
3054 case CCValAssign::ZExt: {
3055 MVT DestVT = VA.getLocVT();
3056 MVT SrcVT = ArgVT;
3057 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3058 if (!ArgReg)
3059 return false;
3060 break;
3061 }
3062 default:
3063 llvm_unreachable("Unknown arg promotion!");
3064 }
3065
3066 // Now copy/store arg to correct locations.
3067 if (VA.isRegLoc() && !VA.needsCustom()) {
3068 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3069 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3070 CLI.OutRegs.push_back(VA.getLocReg());
3071 } else if (VA.needsCustom()) {
3072 // FIXME: Handle custom args.
3073 return false;
3074 } else {
3075 assert(VA.isMemLoc() && "Assuming store on stack.");
3076
3077 // Don't emit stores for undef values.
3078 if (isa<UndefValue>(ArgVal))
3079 continue;
3080
3081 // Need to store on the stack.
3082 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3083
3084 unsigned BEAlign = 0;
3085 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3086 BEAlign = 8 - ArgSize;
3087
3088 Address Addr;
3089 Addr.setKind(Address::RegBase);
3090 Addr.setReg(AArch64::SP);
3091 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3092
3093 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3094 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3095 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3096 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3097
3098 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3099 return false;
3100 }
3101 }
3102 return true;
3103}
3104
3105bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3106 unsigned NumBytes) {
3107 CallingConv::ID CC = CLI.CallConv;
3108
3109 // Issue CALLSEQ_END
3110 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3112 .addImm(NumBytes).addImm(0);
3113
3114 // Now the return value.
3115 if (RetVT != MVT::isVoid) {
3117 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3118 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3119
3120 // Only handle a single return value.
3121 if (RVLocs.size() != 1)
3122 return false;
3123
3124 // Copy all of the result registers out of their specified physreg.
3125 MVT CopyVT = RVLocs[0].getValVT();
3126
3127 // TODO: Handle big-endian results
3128 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3129 return false;
3130
3131 Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3133 TII.get(TargetOpcode::COPY), ResultReg)
3134 .addReg(RVLocs[0].getLocReg());
3135 CLI.InRegs.push_back(RVLocs[0].getLocReg());
3136
3137 CLI.ResultReg = ResultReg;
3138 CLI.NumResultRegs = 1;
3139 }
3140
3141 return true;
3142}
3143
3144bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3145 CallingConv::ID CC = CLI.CallConv;
3146 bool IsTailCall = CLI.IsTailCall;
3147 bool IsVarArg = CLI.IsVarArg;
3148 const Value *Callee = CLI.Callee;
3149 MCSymbol *Symbol = CLI.Symbol;
3150
3151 if (!Callee && !Symbol)
3152 return false;
3153
3154 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3155 // a bti instruction following the call.
3156 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3157 !Subtarget->noBTIAtReturnTwice() &&
3159 return false;
3160
3161 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3162 if (CLI.CB && CLI.CB->isIndirectCall() &&
3163 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3164 return false;
3165
3166 // Allow SelectionDAG isel to handle tail calls.
3167 if (IsTailCall)
3168 return false;
3169
3170 // FIXME: we could and should support this, but for now correctness at -O0 is
3171 // more important.
3172 if (Subtarget->isTargetILP32())
3173 return false;
3174
3175 CodeModel::Model CM = TM.getCodeModel();
3176 // Only support the small-addressing and large code models.
3177 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3178 return false;
3179
3180 // FIXME: Add large code model support for ELF.
3181 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3182 return false;
3183
3184 // Let SDISel handle vararg functions.
3185 if (IsVarArg)
3186 return false;
3187
3188 // FIXME: Only handle *simple* calls for now.
3189 MVT RetVT;
3190 if (CLI.RetTy->isVoidTy())
3191 RetVT = MVT::isVoid;
3192 else if (!isTypeLegal(CLI.RetTy, RetVT))
3193 return false;
3194
3195 for (auto Flag : CLI.OutFlags)
3196 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3197 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3198 return false;
3199
3200 // Set up the argument vectors.
3201 SmallVector<MVT, 16> OutVTs;
3202 OutVTs.reserve(CLI.OutVals.size());
3203
3204 for (auto *Val : CLI.OutVals) {
3205 MVT VT;
3206 if (!isTypeLegal(Val->getType(), VT) &&
3207 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3208 return false;
3209
3210 // We don't handle vector parameters yet.
3211 if (VT.isVector() || VT.getSizeInBits() > 64)
3212 return false;
3213
3214 OutVTs.push_back(VT);
3215 }
3216
3217 Address Addr;
3218 if (Callee && !computeCallAddress(Callee, Addr))
3219 return false;
3220
3221 // The weak function target may be zero; in that case we must use indirect
3222 // addressing via a stub on windows as it may be out of range for a
3223 // PC-relative jump.
3224 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3225 Addr.getGlobalValue()->hasExternalWeakLinkage())
3226 return false;
3227
3228 // Handle the arguments now that we've gotten them.
3229 unsigned NumBytes;
3230 if (!processCallArgs(CLI, OutVTs, NumBytes))
3231 return false;
3232
3233 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3234 if (RegInfo->isAnyArgRegReserved(*MF))
3235 RegInfo->emitReservedArgRegCallError(*MF);
3236
3237 // Issue the call.
3239 if (Subtarget->useSmallAddressing()) {
3240 const MCInstrDesc &II =
3241 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3242 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3243 if (Symbol)
3244 MIB.addSym(Symbol, 0);
3245 else if (Addr.getGlobalValue())
3246 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3247 else if (Addr.getReg()) {
3248 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3249 MIB.addReg(Reg);
3250 } else
3251 return false;
3252 } else {
3253 unsigned CallReg = 0;
3254 if (Symbol) {
3255 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3256 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3257 ADRPReg)
3259
3260 CallReg = createResultReg(&AArch64::GPR64RegClass);
3261 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3262 TII.get(AArch64::LDRXui), CallReg)
3263 .addReg(ADRPReg)
3264 .addSym(Symbol,
3266 } else if (Addr.getGlobalValue())
3267 CallReg = materializeGV(Addr.getGlobalValue());
3268 else if (Addr.getReg())
3269 CallReg = Addr.getReg();
3270
3271 if (!CallReg)
3272 return false;
3273
3274 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3275 CallReg = constrainOperandRegClass(II, CallReg, 0);
3276 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3277 }
3278
3279 // Add implicit physical register uses to the call.
3280 for (auto Reg : CLI.OutRegs)
3281 MIB.addReg(Reg, RegState::Implicit);
3282
3283 // Add a register mask with the call-preserved registers.
3284 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3285 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3286
3287 CLI.Call = MIB;
3288
3289 // Finish off the call including any return values.
3290 return finishCall(CLI, RetVT, NumBytes);
3291}
3292
3293bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3294 if (Alignment)
3295 return Len / Alignment->value() <= 4;
3296 else
3297 return Len < 32;
3298}
3299
3300bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3301 uint64_t Len, MaybeAlign Alignment) {
3302 // Make sure we don't bloat code by inlining very large memcpy's.
3303 if (!isMemCpySmall(Len, Alignment))
3304 return false;
3305
3306 int64_t UnscaledOffset = 0;
3307 Address OrigDest = Dest;
3308 Address OrigSrc = Src;
3309
3310 while (Len) {
3311 MVT VT;
3312 if (!Alignment || *Alignment >= 8) {
3313 if (Len >= 8)
3314 VT = MVT::i64;
3315 else if (Len >= 4)
3316 VT = MVT::i32;
3317 else if (Len >= 2)
3318 VT = MVT::i16;
3319 else {
3320 VT = MVT::i8;
3321 }
3322 } else {
3323 assert(Alignment && "Alignment is set in this branch");
3324 // Bound based on alignment.
3325 if (Len >= 4 && *Alignment == 4)
3326 VT = MVT::i32;
3327 else if (Len >= 2 && *Alignment == 2)
3328 VT = MVT::i16;
3329 else {
3330 VT = MVT::i8;
3331 }
3332 }
3333
3334 unsigned ResultReg = emitLoad(VT, VT, Src);
3335 if (!ResultReg)
3336 return false;
3337
3338 if (!emitStore(VT, ResultReg, Dest))
3339 return false;
3340
3341 int64_t Size = VT.getSizeInBits() / 8;
3342 Len -= Size;
3343 UnscaledOffset += Size;
3344
3345 // We need to recompute the unscaled offset for each iteration.
3346 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3347 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3348 }
3349
3350 return true;
3351}
3352
3353/// Check if it is possible to fold the condition from the XALU intrinsic
3354/// into the user. The condition code will only be updated on success.
3355bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3356 const Instruction *I,
3357 const Value *Cond) {
3358 if (!isa<ExtractValueInst>(Cond))
3359 return false;
3360
3361 const auto *EV = cast<ExtractValueInst>(Cond);
3362 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3363 return false;
3364
3365 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3366 MVT RetVT;
3367 const Function *Callee = II->getCalledFunction();
3368 Type *RetTy =
3369 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3370 if (!isTypeLegal(RetTy, RetVT))
3371 return false;
3372
3373 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3374 return false;
3375
3376 const Value *LHS = II->getArgOperand(0);
3377 const Value *RHS = II->getArgOperand(1);
3378
3379 // Canonicalize immediate to the RHS.
3380 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3381 std::swap(LHS, RHS);
3382
3383 // Simplify multiplies.
3384 Intrinsic::ID IID = II->getIntrinsicID();
3385 switch (IID) {
3386 default:
3387 break;
3388 case Intrinsic::smul_with_overflow:
3389 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3390 if (C->getValue() == 2)
3391 IID = Intrinsic::sadd_with_overflow;
3392 break;
3393 case Intrinsic::umul_with_overflow:
3394 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3395 if (C->getValue() == 2)
3396 IID = Intrinsic::uadd_with_overflow;
3397 break;
3398 }
3399
3400 AArch64CC::CondCode TmpCC;
3401 switch (IID) {
3402 default:
3403 return false;
3404 case Intrinsic::sadd_with_overflow:
3405 case Intrinsic::ssub_with_overflow:
3406 TmpCC = AArch64CC::VS;
3407 break;
3408 case Intrinsic::uadd_with_overflow:
3409 TmpCC = AArch64CC::HS;
3410 break;
3411 case Intrinsic::usub_with_overflow:
3412 TmpCC = AArch64CC::LO;
3413 break;
3414 case Intrinsic::smul_with_overflow:
3415 case Intrinsic::umul_with_overflow:
3416 TmpCC = AArch64CC::NE;
3417 break;
3418 }
3419
3420 // Check if both instructions are in the same basic block.
3421 if (!isValueAvailable(II))
3422 return false;
3423
3424 // Make sure nothing is in the way
3427 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3428 // We only expect extractvalue instructions between the intrinsic and the
3429 // instruction to be selected.
3430 if (!isa<ExtractValueInst>(Itr))
3431 return false;
3432
3433 // Check that the extractvalue operand comes from the intrinsic.
3434 const auto *EVI = cast<ExtractValueInst>(Itr);
3435 if (EVI->getAggregateOperand() != II)
3436 return false;
3437 }
3438
3439 CC = TmpCC;
3440 return true;
3441}
3442
3443bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3444 // FIXME: Handle more intrinsics.
3445 switch (II->getIntrinsicID()) {
3446 default: return false;
3447 case Intrinsic::frameaddress: {
3448 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3449 MFI.setFrameAddressIsTaken(true);
3450
3451 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3452 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3453 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3455 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3456 // Recursively load frame address
3457 // ldr x0, [fp]
3458 // ldr x0, [x0]
3459 // ldr x0, [x0]
3460 // ...
3461 unsigned DestReg;
3462 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3463 while (Depth--) {
3464 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3465 SrcReg, 0);
3466 assert(DestReg && "Unexpected LDR instruction emission failure.");
3467 SrcReg = DestReg;
3468 }
3469
3470 updateValueMap(II, SrcReg);
3471 return true;
3472 }
3473 case Intrinsic::sponentry: {
3474 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3475
3476 // SP = FP + Fixed Object + 16
3477 int FI = MFI.CreateFixedObject(4, 0, false);
3478 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3480 TII.get(AArch64::ADDXri), ResultReg)
3481 .addFrameIndex(FI)
3482 .addImm(0)
3483 .addImm(0);
3484
3485 updateValueMap(II, ResultReg);
3486 return true;
3487 }
3488 case Intrinsic::memcpy:
3489 case Intrinsic::memmove: {
3490 const auto *MTI = cast<MemTransferInst>(II);
3491 // Don't handle volatile.
3492 if (MTI->isVolatile())
3493 return false;
3494
3495 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3496 // we would emit dead code because we don't currently handle memmoves.
3497 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3498 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3499 // Small memcpy's are common enough that we want to do them without a call
3500 // if possible.
3501 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3502 MaybeAlign Alignment;
3503 if (MTI->getDestAlign() || MTI->getSourceAlign())
3504 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3505 MTI->getSourceAlign().valueOrOne());
3506 if (isMemCpySmall(Len, Alignment)) {
3507 Address Dest, Src;
3508 if (!computeAddress(MTI->getRawDest(), Dest) ||
3509 !computeAddress(MTI->getRawSource(), Src))
3510 return false;
3511 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3512 return true;
3513 }
3514 }
3515
3516 if (!MTI->getLength()->getType()->isIntegerTy(64))
3517 return false;
3518
3519 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3520 // Fast instruction selection doesn't support the special
3521 // address spaces.
3522 return false;
3523
3524 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3525 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3526 }
3527 case Intrinsic::memset: {
3528 const MemSetInst *MSI = cast<MemSetInst>(II);
3529 // Don't handle volatile.
3530 if (MSI->isVolatile())
3531 return false;
3532
3533 if (!MSI->getLength()->getType()->isIntegerTy(64))
3534 return false;
3535
3536 if (MSI->getDestAddressSpace() > 255)
3537 // Fast instruction selection doesn't support the special
3538 // address spaces.
3539 return false;
3540
3541 return lowerCallTo(II, "memset", II->arg_size() - 1);
3542 }
3543 case Intrinsic::sin:
3544 case Intrinsic::cos:
3545 case Intrinsic::pow: {
3546 MVT RetVT;
3547 if (!isTypeLegal(II->getType(), RetVT))
3548 return false;
3549
3550 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3551 return false;
3552
3553 static const RTLIB::Libcall LibCallTable[3][2] = {
3554 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3555 { RTLIB::COS_F32, RTLIB::COS_F64 },
3556 { RTLIB::POW_F32, RTLIB::POW_F64 }
3557 };
3558 RTLIB::Libcall LC;
3559 bool Is64Bit = RetVT == MVT::f64;
3560 switch (II->getIntrinsicID()) {
3561 default:
3562 llvm_unreachable("Unexpected intrinsic.");
3563 case Intrinsic::sin:
3564 LC = LibCallTable[0][Is64Bit];
3565 break;
3566 case Intrinsic::cos:
3567 LC = LibCallTable[1][Is64Bit];
3568 break;
3569 case Intrinsic::pow:
3570 LC = LibCallTable[2][Is64Bit];
3571 break;
3572 }
3573
3574 ArgListTy Args;
3575 Args.reserve(II->arg_size());
3576
3577 // Populate the argument list.
3578 for (auto &Arg : II->args()) {
3579 ArgListEntry Entry;
3580 Entry.Val = Arg;
3581 Entry.Ty = Arg->getType();
3582 Args.push_back(Entry);
3583 }
3584
3585 CallLoweringInfo CLI;
3586 MCContext &Ctx = MF->getContext();
3587 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3588 TLI.getLibcallName(LC), std::move(Args));
3589 if (!lowerCallTo(CLI))
3590 return false;
3591 updateValueMap(II, CLI.ResultReg);
3592 return true;
3593 }
3594 case Intrinsic::fabs: {
3595 MVT VT;
3596 if (!isTypeLegal(II->getType(), VT))
3597 return false;
3598
3599 unsigned Opc;
3600 switch (VT.SimpleTy) {
3601 default:
3602 return false;
3603 case MVT::f32:
3604 Opc = AArch64::FABSSr;
3605 break;
3606 case MVT::f64:
3607 Opc = AArch64::FABSDr;
3608 break;
3609 }
3610 Register SrcReg = getRegForValue(II->getOperand(0));
3611 if (!SrcReg)
3612 return false;
3613 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3614 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3615 .addReg(SrcReg);
3616 updateValueMap(II, ResultReg);
3617 return true;
3618 }
3619 case Intrinsic::trap:
3620 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3621 .addImm(1);
3622 return true;
3623 case Intrinsic::debugtrap:
3624 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3625 .addImm(0xF000);
3626 return true;
3627
3628 case Intrinsic::sqrt: {
3630
3631 MVT VT;
3632 if (!isTypeLegal(RetTy, VT))
3633 return false;
3634
3635 Register Op0Reg = getRegForValue(II->getOperand(0));
3636 if (!Op0Reg)
3637 return false;
3638
3639 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3640 if (!ResultReg)
3641 return false;
3642
3643 updateValueMap(II, ResultReg);
3644 return true;
3645 }
3646 case Intrinsic::sadd_with_overflow:
3647 case Intrinsic::uadd_with_overflow:
3648 case Intrinsic::ssub_with_overflow:
3649 case Intrinsic::usub_with_overflow:
3650 case Intrinsic::smul_with_overflow:
3651 case Intrinsic::umul_with_overflow: {
3652 // This implements the basic lowering of the xalu with overflow intrinsics.
3653 const Function *Callee = II->getCalledFunction();
3654 auto *Ty = cast<StructType>(Callee->getReturnType());
3655 Type *RetTy = Ty->getTypeAtIndex(0U);
3656
3657 MVT VT;
3658 if (!isTypeLegal(RetTy, VT))
3659 return false;
3660
3661 if (VT != MVT::i32 && VT != MVT::i64)
3662 return false;
3663
3664 const Value *LHS = II->getArgOperand(0);
3665 const Value *RHS = II->getArgOperand(1);
3666 // Canonicalize immediate to the RHS.
3667 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3668 std::swap(LHS, RHS);
3669
3670 // Simplify multiplies.
3671 Intrinsic::ID IID = II->getIntrinsicID();
3672 switch (IID) {
3673 default:
3674 break;
3675 case Intrinsic::smul_with_overflow:
3676 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3677 if (C->getValue() == 2) {
3678 IID = Intrinsic::sadd_with_overflow;
3679 RHS = LHS;
3680 }
3681 break;
3682 case Intrinsic::umul_with_overflow:
3683 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3684 if (C->getValue() == 2) {
3685 IID = Intrinsic::uadd_with_overflow;
3686 RHS = LHS;
3687 }
3688 break;
3689 }
3690
3691 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3693 switch (IID) {
3694 default: llvm_unreachable("Unexpected intrinsic!");
3695 case Intrinsic::sadd_with_overflow:
3696 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3697 CC = AArch64CC::VS;
3698 break;
3699 case Intrinsic::uadd_with_overflow:
3700 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::HS;
3702 break;
3703 case Intrinsic::ssub_with_overflow:
3704 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3705 CC = AArch64CC::VS;
3706 break;
3707 case Intrinsic::usub_with_overflow:
3708 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3709 CC = AArch64CC::LO;
3710 break;
3711 case Intrinsic::smul_with_overflow: {
3712 CC = AArch64CC::NE;
3713 Register LHSReg = getRegForValue(LHS);
3714 if (!LHSReg)
3715 return false;
3716
3717 Register RHSReg = getRegForValue(RHS);
3718 if (!RHSReg)
3719 return false;
3720
3721 if (VT == MVT::i32) {
3722 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3723 Register MulSubReg =
3724 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3725 // cmp xreg, wreg, sxtw
3726 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3727 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3728 /*WantResult=*/false);
3729 MulReg = MulSubReg;
3730 } else {
3731 assert(VT == MVT::i64 && "Unexpected value type.");
3732 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3733 // reused in the next instruction.
3734 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3735 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3736 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3737 /*WantResult=*/false);
3738 }
3739 break;
3740 }
3741 case Intrinsic::umul_with_overflow: {
3742 CC = AArch64CC::NE;
3743 Register LHSReg = getRegForValue(LHS);
3744 if (!LHSReg)
3745 return false;
3746
3747 Register RHSReg = getRegForValue(RHS);
3748 if (!RHSReg)
3749 return false;
3750
3751 if (VT == MVT::i32) {
3752 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3753 // tst xreg, #0xffffffff00000000
3754 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3755 TII.get(AArch64::ANDSXri), AArch64::XZR)
3756 .addReg(MulReg)
3757 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3758 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3759 } else {
3760 assert(VT == MVT::i64 && "Unexpected value type.");
3761 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3762 // reused in the next instruction.
3763 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3764 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3765 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3766 }
3767 break;
3768 }
3769 }
3770
3771 if (MulReg) {
3772 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3773 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3774 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3775 }
3776
3777 if (!ResultReg1)
3778 return false;
3779
3780 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3781 AArch64::WZR, AArch64::WZR,
3782 getInvertedCondCode(CC));
3783 (void)ResultReg2;
3784 assert((ResultReg1 + 1) == ResultReg2 &&
3785 "Nonconsecutive result registers.");
3786 updateValueMap(II, ResultReg1, 2);
3787 return true;
3788 }
3789 }
3790 return false;
3791}
3792
3793bool AArch64FastISel::selectRet(const Instruction *I) {
3794 const ReturnInst *Ret = cast<ReturnInst>(I);
3795 const Function &F = *I->getParent()->getParent();
3796
3797 if (!FuncInfo.CanLowerReturn)
3798 return false;
3799
3800 if (F.isVarArg())
3801 return false;
3802
3803 if (TLI.supportSwiftError() &&
3804 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3805 return false;
3806
3807 if (TLI.supportSplitCSR(FuncInfo.MF))
3808 return false;
3809
3810 // Build a list of return value registers.
3812
3813 if (Ret->getNumOperands() > 0) {
3814 CallingConv::ID CC = F.getCallingConv();
3816 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3817
3818 // Analyze operands of the call, assigning locations to each operand.
3820 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3823 CCInfo.AnalyzeReturn(Outs, RetCC);
3824
3825 // Only handle a single return value for now.
3826 if (ValLocs.size() != 1)
3827 return false;
3828
3829 CCValAssign &VA = ValLocs[0];
3830 const Value *RV = Ret->getOperand(0);
3831
3832 // Don't bother handling odd stuff for now.
3833 if ((VA.getLocInfo() != CCValAssign::Full) &&
3834 (VA.getLocInfo() != CCValAssign::BCvt))
3835 return false;
3836
3837 // Only handle register returns for now.
3838 if (!VA.isRegLoc())
3839 return false;
3840
3841 Register Reg = getRegForValue(RV);
3842 if (Reg == 0)
3843 return false;
3844
3845 unsigned SrcReg = Reg + VA.getValNo();
3846 Register DestReg = VA.getLocReg();
3847 // Avoid a cross-class copy. This is very unlikely.
3848 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3849 return false;
3850
3851 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3852 if (!RVEVT.isSimple())
3853 return false;
3854
3855 // Vectors (of > 1 lane) in big endian need tricky handling.
3856 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3857 !Subtarget->isLittleEndian())
3858 return false;
3859
3860 MVT RVVT = RVEVT.getSimpleVT();
3861 if (RVVT == MVT::f128)
3862 return false;
3863
3864 MVT DestVT = VA.getValVT();
3865 // Special handling for extended integers.
3866 if (RVVT != DestVT) {
3867 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3868 return false;
3869
3870 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3871 return false;
3872
3873 bool IsZExt = Outs[0].Flags.isZExt();
3874 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3875 if (SrcReg == 0)
3876 return false;
3877 }
3878
3879 // "Callee" (i.e. value producer) zero extends pointers at function
3880 // boundary.
3881 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3882 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3883
3884 // Make the copy.
3885 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3886 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3887
3888 // Add register to return instruction.
3889 RetRegs.push_back(VA.getLocReg());
3890 }
3891
3892 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3893 TII.get(AArch64::RET_ReallyLR));
3894 for (unsigned RetReg : RetRegs)
3895 MIB.addReg(RetReg, RegState::Implicit);
3896 return true;
3897}
3898
3899bool AArch64FastISel::selectTrunc(const Instruction *I) {
3900 Type *DestTy = I->getType();
3901 Value *Op = I->getOperand(0);
3902 Type *SrcTy = Op->getType();
3903
3904 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3905 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3906 if (!SrcEVT.isSimple())
3907 return false;
3908 if (!DestEVT.isSimple())
3909 return false;
3910
3911 MVT SrcVT = SrcEVT.getSimpleVT();
3912 MVT DestVT = DestEVT.getSimpleVT();
3913
3914 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3915 SrcVT != MVT::i8)
3916 return false;
3917 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3918 DestVT != MVT::i1)
3919 return false;
3920
3921 Register SrcReg = getRegForValue(Op);
3922 if (!SrcReg)
3923 return false;
3924
3925 // If we're truncating from i64 to a smaller non-legal type then generate an
3926 // AND. Otherwise, we know the high bits are undefined and a truncate only
3927 // generate a COPY. We cannot mark the source register also as result
3928 // register, because this can incorrectly transfer the kill flag onto the
3929 // source register.
3930 unsigned ResultReg;
3931 if (SrcVT == MVT::i64) {
3932 uint64_t Mask = 0;
3933 switch (DestVT.SimpleTy) {
3934 default:
3935 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3936 return false;
3937 case MVT::i1:
3938 Mask = 0x1;
3939 break;
3940 case MVT::i8:
3941 Mask = 0xff;
3942 break;
3943 case MVT::i16:
3944 Mask = 0xffff;
3945 break;
3946 }
3947 // Issue an extract_subreg to get the lower 32-bits.
3948 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3949 AArch64::sub_32);
3950 // Create the AND instruction which performs the actual truncation.
3951 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3952 assert(ResultReg && "Unexpected AND instruction emission failure.");
3953 } else {
3954 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3955 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3956 TII.get(TargetOpcode::COPY), ResultReg)
3957 .addReg(SrcReg);
3958 }
3959
3960 updateValueMap(I, ResultReg);
3961 return true;
3962}
3963
3964unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3965 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3966 DestVT == MVT::i64) &&
3967 "Unexpected value type.");
3968 // Handle i8 and i16 as i32.
3969 if (DestVT == MVT::i8 || DestVT == MVT::i16)
3970 DestVT = MVT::i32;
3971
3972 if (IsZExt) {
3973 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3974 assert(ResultReg && "Unexpected AND instruction emission failure.");
3975 if (DestVT == MVT::i64) {
3976 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3977 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3978 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3979 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3980 TII.get(AArch64::SUBREG_TO_REG), Reg64)
3981 .addImm(0)
3982 .addReg(ResultReg)
3983 .addImm(AArch64::sub_32);
3984 ResultReg = Reg64;
3985 }
3986 return ResultReg;
3987 } else {
3988 if (DestVT == MVT::i64) {
3989 // FIXME: We're SExt i1 to i64.
3990 return 0;
3991 }
3992 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3993 0, 0);
3994 }
3995}
3996
3997unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3998 unsigned Opc, ZReg;
3999 switch (RetVT.SimpleTy) {
4000 default: return 0;
4001 case MVT::i8:
4002 case MVT::i16:
4003 case MVT::i32:
4004 RetVT = MVT::i32;
4005 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4006 case MVT::i64:
4007 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4008 }
4009
4010 const TargetRegisterClass *RC =
4011 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4012 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4013}
4014
4015unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4016 if (RetVT != MVT::i64)
4017 return 0;
4018
4019 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4020 Op0, Op1, AArch64::XZR);
4021}
4022
4023unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4024 if (RetVT != MVT::i64)
4025 return 0;
4026
4027 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028 Op0, Op1, AArch64::XZR);
4029}
4030
4031unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4032 unsigned Op1Reg) {
4033 unsigned Opc = 0;
4034 bool NeedTrunc = false;
4035 uint64_t Mask = 0;
4036 switch (RetVT.SimpleTy) {
4037 default: return 0;
4038 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4039 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4040 case MVT::i32: Opc = AArch64::LSLVWr; break;
4041 case MVT::i64: Opc = AArch64::LSLVXr; break;
4042 }
4043
4044 const TargetRegisterClass *RC =
4045 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4046 if (NeedTrunc)
4047 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4048
4049 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4050 if (NeedTrunc)
4051 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4052 return ResultReg;
4053}
4054
4055unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4056 uint64_t Shift, bool IsZExt) {
4057 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4058 "Unexpected source/return type pair.");
4059 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4060 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4061 "Unexpected source value type.");
4062 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4063 RetVT == MVT::i64) && "Unexpected return value type.");
4064
4065 bool Is64Bit = (RetVT == MVT::i64);
4066 unsigned RegSize = Is64Bit ? 64 : 32;
4067 unsigned DstBits = RetVT.getSizeInBits();
4068 unsigned SrcBits = SrcVT.getSizeInBits();
4069 const TargetRegisterClass *RC =
4070 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4071
4072 // Just emit a copy for "zero" shifts.
4073 if (Shift == 0) {
4074 if (RetVT == SrcVT) {
4075 Register ResultReg = createResultReg(RC);
4076 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4077 TII.get(TargetOpcode::COPY), ResultReg)
4078 .addReg(Op0);
4079 return ResultReg;
4080 } else
4081 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4082 }
4083
4084 // Don't deal with undefined shifts.
4085 if (Shift >= DstBits)
4086 return 0;
4087
4088 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4089 // {S|U}BFM Wd, Wn, #r, #s
4090 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4091
4092 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4093 // %2 = shl i16 %1, 4
4094 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4095 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4096 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4097 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4098
4099 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4100 // %2 = shl i16 %1, 8
4101 // Wd<32+7-24,32-24> = Wn<7:0>
4102 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4103 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4104 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4105
4106 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4107 // %2 = shl i16 %1, 12
4108 // Wd<32+3-20,32-20> = Wn<3:0>
4109 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4110 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4111 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4112
4113 unsigned ImmR = RegSize - Shift;
4114 // Limit the width to the length of the source type.
4115 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4116 static const unsigned OpcTable[2][2] = {
4117 {AArch64::SBFMWri, AArch64::SBFMXri},
4118 {AArch64::UBFMWri, AArch64::UBFMXri}
4119 };
4120 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4121 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4122 Register TmpReg = MRI.createVirtualRegister(RC);
4123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4124 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4125 .addImm(0)
4126 .addReg(Op0)
4127 .addImm(AArch64::sub_32);
4128 Op0 = TmpReg;
4129 }
4130 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4131}
4132
4133unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4134 unsigned Op1Reg) {
4135 unsigned Opc = 0;
4136 bool NeedTrunc = false;
4137 uint64_t Mask = 0;
4138 switch (RetVT.SimpleTy) {
4139 default: return 0;
4140 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4141 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4142 case MVT::i32: Opc = AArch64::LSRVWr; break;
4143 case MVT::i64: Opc = AArch64::LSRVXr; break;
4144 }
4145
4146 const TargetRegisterClass *RC =
4147 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4148 if (NeedTrunc) {
4149 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4150 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4151 }
4152 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4153 if (NeedTrunc)
4154 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4155 return ResultReg;
4156}
4157
4158unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4159 uint64_t Shift, bool IsZExt) {
4160 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4161 "Unexpected source/return type pair.");
4162 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4163 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4164 "Unexpected source value type.");
4165 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4166 RetVT == MVT::i64) && "Unexpected return value type.");
4167
4168 bool Is64Bit = (RetVT == MVT::i64);
4169 unsigned RegSize = Is64Bit ? 64 : 32;
4170 unsigned DstBits = RetVT.getSizeInBits();
4171 unsigned SrcBits = SrcVT.getSizeInBits();
4172 const TargetRegisterClass *RC =
4173 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4174
4175 // Just emit a copy for "zero" shifts.
4176 if (Shift == 0) {
4177 if (RetVT == SrcVT) {
4178 Register ResultReg = createResultReg(RC);
4179 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4180 TII.get(TargetOpcode::COPY), ResultReg)
4181 .addReg(Op0);
4182 return ResultReg;
4183 } else
4184 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4185 }
4186
4187 // Don't deal with undefined shifts.
4188 if (Shift >= DstBits)
4189 return 0;
4190
4191 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4192 // {S|U}BFM Wd, Wn, #r, #s
4193 // Wd<s-r:0> = Wn<s:r> when r <= s
4194
4195 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4196 // %2 = lshr i16 %1, 4
4197 // Wd<7-4:0> = Wn<7:4>
4198 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4199 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4200 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4201
4202 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4203 // %2 = lshr i16 %1, 8
4204 // Wd<7-7,0> = Wn<7:7>
4205 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4206 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4207 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4208
4209 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4210 // %2 = lshr i16 %1, 12
4211 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4212 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4213 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4214 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4215
4216 if (Shift >= SrcBits && IsZExt)
4217 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4218
4219 // It is not possible to fold a sign-extend into the LShr instruction. In this
4220 // case emit a sign-extend.
4221 if (!IsZExt) {
4222 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4223 if (!Op0)
4224 return 0;
4225 SrcVT = RetVT;
4226 SrcBits = SrcVT.getSizeInBits();
4227 IsZExt = true;
4228 }
4229
4230 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4231 unsigned ImmS = SrcBits - 1;
4232 static const unsigned OpcTable[2][2] = {
4233 {AArch64::SBFMWri, AArch64::SBFMXri},
4234 {AArch64::UBFMWri, AArch64::UBFMXri}
4235 };
4236 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4237 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4238 Register TmpReg = MRI.createVirtualRegister(RC);
4239 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4240 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4241 .addImm(0)
4242 .addReg(Op0)
4243 .addImm(AArch64::sub_32);
4244 Op0 = TmpReg;
4245 }
4246 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4247}
4248
4249unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4250 unsigned Op1Reg) {
4251 unsigned Opc = 0;
4252 bool NeedTrunc = false;
4253 uint64_t Mask = 0;
4254 switch (RetVT.SimpleTy) {
4255 default: return 0;
4256 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4257 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4258 case MVT::i32: Opc = AArch64::ASRVWr; break;
4259 case MVT::i64: Opc = AArch64::ASRVXr; break;
4260 }
4261
4262 const TargetRegisterClass *RC =
4263 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4264 if (NeedTrunc) {
4265 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4266 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4267 }
4268 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4269 if (NeedTrunc)
4270 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4271 return ResultReg;
4272}
4273
4274unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4275 uint64_t Shift, bool IsZExt) {
4276 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4277 "Unexpected source/return type pair.");
4278 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4279 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4280 "Unexpected source value type.");
4281 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4282 RetVT == MVT::i64) && "Unexpected return value type.");
4283
4284 bool Is64Bit = (RetVT == MVT::i64);
4285 unsigned RegSize = Is64Bit ? 64 : 32;
4286 unsigned DstBits = RetVT.getSizeInBits();
4287 unsigned SrcBits = SrcVT.getSizeInBits();
4288 const TargetRegisterClass *RC =
4289 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4290
4291 // Just emit a copy for "zero" shifts.
4292 if (Shift == 0) {
4293 if (RetVT == SrcVT) {
4294 Register ResultReg = createResultReg(RC);
4295 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4296 TII.get(TargetOpcode::COPY), ResultReg)
4297 .addReg(Op0);
4298 return ResultReg;
4299 } else
4300 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4301 }
4302
4303 // Don't deal with undefined shifts.
4304 if (Shift >= DstBits)
4305 return 0;
4306
4307 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4308 // {S|U}BFM Wd, Wn, #r, #s
4309 // Wd<s-r:0> = Wn<s:r> when r <= s
4310
4311 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4312 // %2 = ashr i16 %1, 4
4313 // Wd<7-4:0> = Wn<7:4>
4314 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4315 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4316 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4317
4318 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4319 // %2 = ashr i16 %1, 8
4320 // Wd<7-7,0> = Wn<7:7>
4321 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4322 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4323 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4324
4325 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4326 // %2 = ashr i16 %1, 12
4327 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4328 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4329 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4330 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4331
4332 if (Shift >= SrcBits && IsZExt)
4333 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4334
4335 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4336 unsigned ImmS = SrcBits - 1;
4337 static const unsigned OpcTable[2][2] = {
4338 {AArch64::SBFMWri, AArch64::SBFMXri},
4339 {AArch64::UBFMWri, AArch64::UBFMXri}
4340 };
4341 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4342 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4343 Register TmpReg = MRI.createVirtualRegister(RC);
4344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4345 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4346 .addImm(0)
4347 .addReg(Op0)
4348 .addImm(AArch64::sub_32);
4349 Op0 = TmpReg;
4350 }
4351 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4352}
4353
4354unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4355 bool IsZExt) {
4356 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4357
4358 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4359 // DestVT are odd things, so test to make sure that they are both types we can
4360 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4361 // bail out to SelectionDAG.
4362 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4363 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4364 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4365 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4366 return 0;
4367
4368 unsigned Opc;
4369 unsigned Imm = 0;
4370
4371 switch (SrcVT.SimpleTy) {
4372 default:
4373 return 0;
4374 case MVT::i1:
4375 return emiti1Ext(SrcReg, DestVT, IsZExt);
4376 case MVT::i8:
4377 if (DestVT == MVT::i64)
4378 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4379 else
4380 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4381 Imm = 7;
4382 break;
4383 case MVT::i16:
4384 if (DestVT == MVT::i64)
4385 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4386 else
4387 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4388 Imm = 15;
4389 break;
4390 case MVT::i32:
4391 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4392 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393 Imm = 31;
4394 break;
4395 }
4396
4397 // Handle i8 and i16 as i32.
4398 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4399 DestVT = MVT::i32;
4400 else if (DestVT == MVT::i64) {
4401 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4402 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4403 TII.get(AArch64::SUBREG_TO_REG), Src64)
4404 .addImm(0)
4405 .addReg(SrcReg)
4406 .addImm(AArch64::sub_32);
4407 SrcReg = Src64;
4408 }
4409
4410 const TargetRegisterClass *RC =
4411 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4412 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4413}
4414
4415static bool isZExtLoad(const MachineInstr *LI) {
4416 switch (LI->getOpcode()) {
4417 default:
4418 return false;
4419 case AArch64::LDURBBi:
4420 case AArch64::LDURHHi:
4421 case AArch64::LDURWi:
4422 case AArch64::LDRBBui:
4423 case AArch64::LDRHHui:
4424 case AArch64::LDRWui:
4425 case AArch64::LDRBBroX:
4426 case AArch64::LDRHHroX:
4427 case AArch64::LDRWroX:
4428 case AArch64::LDRBBroW:
4429 case AArch64::LDRHHroW:
4430 case AArch64::LDRWroW:
4431 return true;
4432 }
4433}
4434
4435static bool isSExtLoad(const MachineInstr *LI) {
4436 switch (LI->getOpcode()) {
4437 default:
4438 return false;
4439 case AArch64::LDURSBWi:
4440 case AArch64::LDURSHWi:
4441 case AArch64::LDURSBXi:
4442 case AArch64::LDURSHXi:
4443 case AArch64::LDURSWi:
4444 case AArch64::LDRSBWui:
4445 case AArch64::LDRSHWui:
4446 case AArch64::LDRSBXui:
4447 case AArch64::LDRSHXui:
4448 case AArch64::LDRSWui:
4449 case AArch64::LDRSBWroX:
4450 case AArch64::LDRSHWroX:
4451 case AArch64::LDRSBXroX:
4452 case AArch64::LDRSHXroX:
4453 case AArch64::LDRSWroX:
4454 case AArch64::LDRSBWroW:
4455 case AArch64::LDRSHWroW:
4456 case AArch64::LDRSBXroW:
4457 case AArch64::LDRSHXroW:
4458 case AArch64::LDRSWroW:
4459 return true;
4460 }
4461}
4462
4463bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4464 MVT SrcVT) {
4465 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4466 if (!LI || !LI->hasOneUse())
4467 return false;
4468
4469 // Check if the load instruction has already been selected.
4470 Register Reg = lookUpRegForValue(LI);
4471 if (!Reg)
4472 return false;
4473
4474 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4475 if (!MI)
4476 return false;
4477
4478 // Check if the correct load instruction has been emitted - SelectionDAG might
4479 // have emitted a zero-extending load, but we need a sign-extending load.
4480 bool IsZExt = isa<ZExtInst>(I);
4481 const auto *LoadMI = MI;
4482 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4483 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4484 Register LoadReg = MI->getOperand(1).getReg();
4485 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4486 assert(LoadMI && "Expected valid instruction");
4487 }
4488 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4489 return false;
4490
4491 // Nothing to be done.
4492 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4493 updateValueMap(I, Reg);
4494 return true;
4495 }
4496
4497 if (IsZExt) {
4498 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4500 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4501 .addImm(0)
4502 .addReg(Reg, getKillRegState(true))
4503 .addImm(AArch64::sub_32);
4504 Reg = Reg64;
4505 } else {
4506 assert((MI->getOpcode() == TargetOpcode::COPY &&
4507 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4508 "Expected copy instruction");
4509 Reg = MI->getOperand(1).getReg();
4511 removeDeadCode(I, std::next(I));
4512 }
4513 updateValueMap(I, Reg);
4514 return true;
4515}
4516
4517bool AArch64FastISel::selectIntExt(const Instruction *I) {
4518 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4519 "Unexpected integer extend instruction.");
4520 MVT RetVT;
4521 MVT SrcVT;
4522 if (!isTypeSupported(I->getType(), RetVT))
4523 return false;
4524
4525 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4526 return false;
4527
4528 // Try to optimize already sign-/zero-extended values from load instructions.
4529 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4530 return true;
4531
4532 Register SrcReg = getRegForValue(I->getOperand(0));
4533 if (!SrcReg)
4534 return false;
4535
4536 // Try to optimize already sign-/zero-extended values from function arguments.
4537 bool IsZExt = isa<ZExtInst>(I);
4538 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4539 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4540 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4541 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4542 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4543 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4544 .addImm(0)
4545 .addReg(SrcReg)
4546 .addImm(AArch64::sub_32);
4547 SrcReg = ResultReg;
4548 }
4549
4550 updateValueMap(I, SrcReg);
4551 return true;
4552 }
4553 }
4554
4555 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4556 if (!ResultReg)
4557 return false;
4558
4559 updateValueMap(I, ResultReg);
4560 return true;
4561}
4562
4563bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4564 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4565 if (!DestEVT.isSimple())
4566 return false;
4567
4568 MVT DestVT = DestEVT.getSimpleVT();
4569 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4570 return false;
4571
4572 unsigned DivOpc;
4573 bool Is64bit = (DestVT == MVT::i64);
4574 switch (ISDOpcode) {
4575 default:
4576 return false;
4577 case ISD::SREM:
4578 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4579 break;
4580 case ISD::UREM:
4581 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4582 break;
4583 }
4584 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4585 Register Src0Reg = getRegForValue(I->getOperand(0));
4586 if (!Src0Reg)
4587 return false;
4588
4589 Register Src1Reg = getRegForValue(I->getOperand(1));
4590 if (!Src1Reg)
4591 return false;
4592
4593 const TargetRegisterClass *RC =
4594 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4595 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4596 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4597 // The remainder is computed as numerator - (quotient * denominator) using the
4598 // MSUB instruction.
4599 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4600 updateValueMap(I, ResultReg);
4601 return true;
4602}
4603
4604bool AArch64FastISel::selectMul(const Instruction *I) {
4605 MVT VT;
4606 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4607 return false;
4608
4609 if (VT.isVector())
4610 return selectBinaryOp(I, ISD::MUL);
4611
4612 const Value *Src0 = I->getOperand(0);
4613 const Value *Src1 = I->getOperand(1);
4614 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4615 if (C->getValue().isPowerOf2())
4616 std::swap(Src0, Src1);
4617
4618 // Try to simplify to a shift instruction.
4619 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4620 if (C->getValue().isPowerOf2()) {
4621 uint64_t ShiftVal = C->getValue().logBase2();
4622 MVT SrcVT = VT;
4623 bool IsZExt = true;
4624 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4625 if (!isIntExtFree(ZExt)) {
4626 MVT VT;
4627 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4628 SrcVT = VT;
4629 IsZExt = true;
4630 Src0 = ZExt->getOperand(0);
4631 }
4632 }
4633 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4634 if (!isIntExtFree(SExt)) {
4635 MVT VT;
4636 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4637 SrcVT = VT;
4638 IsZExt = false;
4639 Src0 = SExt->getOperand(0);
4640 }
4641 }
4642 }
4643
4644 Register Src0Reg = getRegForValue(Src0);
4645 if (!Src0Reg)
4646 return false;
4647
4648 unsigned ResultReg =
4649 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4650
4651 if (ResultReg) {
4652 updateValueMap(I, ResultReg);
4653 return true;
4654 }
4655 }
4656
4657 Register Src0Reg = getRegForValue(I->getOperand(0));
4658 if (!Src0Reg)
4659 return false;
4660
4661 Register Src1Reg = getRegForValue(I->getOperand(1));
4662 if (!Src1Reg)
4663 return false;
4664
4665 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4666
4667 if (!ResultReg)
4668 return false;
4669
4670 updateValueMap(I, ResultReg);
4671 return true;
4672}
4673
4674bool AArch64FastISel::selectShift(const Instruction *I) {
4675 MVT RetVT;
4676 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4677 return false;
4678
4679 if (RetVT.isVector())
4680 return selectOperator(I, I->getOpcode());
4681
4682 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4683 unsigned ResultReg = 0;
4684 uint64_t ShiftVal = C->getZExtValue();
4685 MVT SrcVT = RetVT;
4686 bool IsZExt = I->getOpcode() != Instruction::AShr;
4687 const Value *Op0 = I->getOperand(0);
4688 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4689 if (!isIntExtFree(ZExt)) {
4690 MVT TmpVT;
4691 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4692 SrcVT = TmpVT;
4693 IsZExt = true;
4694 Op0 = ZExt->getOperand(0);
4695 }
4696 }
4697 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4698 if (!isIntExtFree(SExt)) {
4699 MVT TmpVT;
4700 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4701 SrcVT = TmpVT;
4702 IsZExt = false;
4703 Op0 = SExt->getOperand(0);
4704 }
4705 }
4706 }
4707
4708 Register Op0Reg = getRegForValue(Op0);
4709 if (!Op0Reg)
4710 return false;
4711
4712 switch (I->getOpcode()) {
4713 default: llvm_unreachable("Unexpected instruction.");
4714 case Instruction::Shl:
4715 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4716 break;
4717 case Instruction::AShr:
4718 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4719 break;
4720 case Instruction::LShr:
4721 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4722 break;
4723 }
4724 if (!ResultReg)
4725 return false;
4726
4727 updateValueMap(I, ResultReg);
4728 return true;
4729 }
4730
4731 Register Op0Reg = getRegForValue(I->getOperand(0));
4732 if (!Op0Reg)
4733 return false;
4734
4735 Register Op1Reg = getRegForValue(I->getOperand(1));
4736 if (!Op1Reg)
4737 return false;
4738
4739 unsigned ResultReg = 0;
4740 switch (I->getOpcode()) {
4741 default: llvm_unreachable("Unexpected instruction.");
4742 case Instruction::Shl:
4743 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4744 break;
4745 case Instruction::AShr:
4746 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4747 break;
4748 case Instruction::LShr:
4749 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4750 break;
4751 }
4752
4753 if (!ResultReg)
4754 return false;
4755
4756 updateValueMap(I, ResultReg);
4757 return true;
4758}
4759
4760bool AArch64FastISel::selectBitCast(const Instruction *I) {
4761 MVT RetVT, SrcVT;
4762
4763 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4764 return false;
4765 if (!isTypeLegal(I->getType(), RetVT))
4766 return false;
4767
4768 unsigned Opc;
4769 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4770 Opc = AArch64::FMOVWSr;
4771 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4772 Opc = AArch64::FMOVXDr;
4773 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4774 Opc = AArch64::FMOVSWr;
4775 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4776 Opc = AArch64::FMOVDXr;
4777 else
4778 return false;
4779
4780 const TargetRegisterClass *RC = nullptr;
4781 switch (RetVT.SimpleTy) {
4782 default: llvm_unreachable("Unexpected value type.");
4783 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4784 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4785 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4786 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4787 }
4788 Register Op0Reg = getRegForValue(I->getOperand(0));
4789 if (!Op0Reg)
4790 return false;
4791
4792 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4793 if (!ResultReg)
4794 return false;
4795
4796 updateValueMap(I, ResultReg);
4797 return true;
4798}
4799
4800bool AArch64FastISel::selectFRem(const Instruction *I) {
4801 MVT RetVT;
4802 if (!isTypeLegal(I->getType(), RetVT))
4803 return false;
4804
4805 RTLIB::Libcall LC;
4806 switch (RetVT.SimpleTy) {
4807 default:
4808 return false;
4809 case MVT::f32:
4810 LC = RTLIB::REM_F32;
4811 break;
4812 case MVT::f64:
4813 LC = RTLIB::REM_F64;
4814 break;
4815 }
4816
4817 ArgListTy Args;
4818 Args.reserve(I->getNumOperands());
4819
4820 // Populate the argument list.
4821 for (auto &Arg : I->operands()) {
4822 ArgListEntry Entry;
4823 Entry.Val = Arg;
4824 Entry.Ty = Arg->getType();
4825 Args.push_back(Entry);
4826 }
4827
4828 CallLoweringInfo CLI;
4829 MCContext &Ctx = MF->getContext();
4830 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4831 TLI.getLibcallName(LC), std::move(Args));
4832 if (!lowerCallTo(CLI))
4833 return false;
4834 updateValueMap(I, CLI.ResultReg);
4835 return true;
4836}
4837
4838bool AArch64FastISel::selectSDiv(const Instruction *I) {
4839 MVT VT;
4840 if (!isTypeLegal(I->getType(), VT))
4841 return false;
4842
4843 if (!isa<ConstantInt>(I->getOperand(1)))
4844 return selectBinaryOp(I, ISD::SDIV);
4845
4846 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4847 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4848 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4849 return selectBinaryOp(I, ISD::SDIV);
4850
4851 unsigned Lg2 = C.countr_zero();
4852 Register Src0Reg = getRegForValue(I->getOperand(0));
4853 if (!Src0Reg)
4854 return false;
4855
4856 if (cast<BinaryOperator>(I)->isExact()) {
4857 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4858 if (!ResultReg)
4859 return false;
4860 updateValueMap(I, ResultReg);
4861 return true;
4862 }
4863
4864 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4865 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4866 if (!AddReg)
4867 return false;
4868
4869 // (Src0 < 0) ? Pow2 - 1 : 0;
4870 if (!emitICmp_ri(VT, Src0Reg, 0))
4871 return false;
4872
4873 unsigned SelectOpc;
4874 const TargetRegisterClass *RC;
4875 if (VT == MVT::i64) {
4876 SelectOpc = AArch64::CSELXr;
4877 RC = &AArch64::GPR64RegClass;
4878 } else {
4879 SelectOpc = AArch64::CSELWr;
4880 RC = &AArch64::GPR32RegClass;
4881 }
4882 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4884 if (!SelectReg)
4885 return false;
4886
4887 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4888 // negate the result.
4889 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4890 unsigned ResultReg;
4891 if (C.isNegative())
4892 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4893 AArch64_AM::ASR, Lg2);
4894 else
4895 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4896
4897 if (!ResultReg)
4898 return false;
4899
4900 updateValueMap(I, ResultReg);
4901 return true;
4902}
4903
4904/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4905/// have to duplicate it for AArch64, because otherwise we would fail during the
4906/// sign-extend emission.
4907unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4908 Register IdxN = getRegForValue(Idx);
4909 if (IdxN == 0)
4910 // Unhandled operand. Halt "fast" selection and bail.
4911 return 0;
4912
4913 // If the index is smaller or larger than intptr_t, truncate or extend it.
4914 MVT PtrVT = TLI.getPointerTy(DL);
4915 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4916 if (IdxVT.bitsLT(PtrVT)) {
4917 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4918 } else if (IdxVT.bitsGT(PtrVT))
4919 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4920 return IdxN;
4921}
4922
4923/// This is mostly a copy of the existing FastISel GEP code, but we have to
4924/// duplicate it for AArch64, because otherwise we would bail out even for
4925/// simple cases. This is because the standard fastEmit functions don't cover
4926/// MUL at all and ADD is lowered very inefficientily.
4927bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4928 if (Subtarget->isTargetILP32())
4929 return false;
4930
4931 Register N = getRegForValue(I->getOperand(0));
4932 if (!N)
4933 return false;
4934
4935 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4936 // into a single N = N + TotalOffset.
4937 uint64_t TotalOffs = 0;
4938 MVT VT = TLI.getPointerTy(DL);
4940 GTI != E; ++GTI) {
4941 const Value *Idx = GTI.getOperand();
4942 if (auto *StTy = GTI.getStructTypeOrNull()) {
4943 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4944 // N = N + Offset
4945 if (Field)
4946 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4947 } else {
4948 Type *Ty = GTI.getIndexedType();
4949
4950 // If this is a constant subscript, handle it quickly.
4951 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4952 if (CI->isZero())
4953 continue;
4954 // N = N + Offset
4955 TotalOffs +=
4956 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4957 continue;
4958 }
4959 if (TotalOffs) {
4960 N = emitAdd_ri_(VT, N, TotalOffs);
4961 if (!N)
4962 return false;
4963 TotalOffs = 0;
4964 }
4965
4966 // N = N + Idx * ElementSize;
4967 uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4968 unsigned IdxN = getRegForGEPIndex(Idx);
4969 if (!IdxN)
4970 return false;
4971
4972 if (ElementSize != 1) {
4973 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4974 if (!C)
4975 return false;
4976 IdxN = emitMul_rr(VT, IdxN, C);
4977 if (!IdxN)
4978 return false;
4979 }
4980 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4981 if (!N)
4982 return false;
4983 }
4984 }
4985 if (TotalOffs) {
4986 N = emitAdd_ri_(VT, N, TotalOffs);
4987 if (!N)
4988 return false;
4989 }
4990 updateValueMap(I, N);
4991 return true;
4992}
4993
4994bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4995 assert(TM.getOptLevel() == CodeGenOpt::None &&
4996 "cmpxchg survived AtomicExpand at optlevel > -O0");
4997
4998 auto *RetPairTy = cast<StructType>(I->getType());
4999 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5000 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5001 "cmpxchg has a non-i1 status result");
5002
5003 MVT VT;
5004 if (!isTypeLegal(RetTy, VT))
5005 return false;
5006
5007 const TargetRegisterClass *ResRC;
5008 unsigned Opc, CmpOpc;
5009 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5010 // extractvalue selection doesn't support that.
5011 if (VT == MVT::i32) {
5012 Opc = AArch64::CMP_SWAP_32;
5013 CmpOpc = AArch64::SUBSWrs;
5014 ResRC = &AArch64::GPR32RegClass;
5015 } else if (VT == MVT::i64) {
5016 Opc = AArch64::CMP_SWAP_64;
5017 CmpOpc = AArch64::SUBSXrs;
5018 ResRC = &AArch64::GPR64RegClass;
5019 } else {
5020 return false;
5021 }
5022
5023 const MCInstrDesc &II = TII.get(Opc);
5024
5025 const Register AddrReg = constrainOperandRegClass(
5026 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5027 const Register DesiredReg = constrainOperandRegClass(
5028 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5029 const Register NewReg = constrainOperandRegClass(
5030 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5031
5032 const Register ResultReg1 = createResultReg(ResRC);
5033 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5034 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5035
5036 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5037 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5038 .addDef(ResultReg1)
5039 .addDef(ScratchReg)
5040 .addUse(AddrReg)
5041 .addUse(DesiredReg)
5042 .addUse(NewReg);
5043
5044 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5045 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5046 .addUse(ResultReg1)
5047 .addUse(DesiredReg)
5048 .addImm(0);
5049
5050 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,