LLVM 19.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/User.h"
61#include "llvm/IR/Value.h"
62#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
151 LLVMContext *Context;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338 if (CC == CallingConv::GHC)
339 return CC_AArch64_GHC;
342 if (Subtarget->isTargetDarwin())
344 if (Subtarget->isTargetWindows())
345 return CC_AArch64_Win64PCS;
346 return CC_AArch64_AAPCS;
347}
348
349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351 "Alloca should always return a pointer.");
352
353 // Don't handle dynamic allocas.
354 if (!FuncInfo.StaticAllocaMap.count(AI))
355 return 0;
356
358 FuncInfo.StaticAllocaMap.find(AI);
359
360 if (SI != FuncInfo.StaticAllocaMap.end()) {
361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
363 ResultReg)
364 .addFrameIndex(SI->second)
365 .addImm(0)
366 .addImm(0);
367 return ResultReg;
368 }
369
370 return 0;
371}
372
373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374 if (VT > MVT::i64)
375 return 0;
376
377 if (!CI->isZero())
378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
379
380 // Create a copy from the zero register to materialize a "0" value.
381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382 : &AArch64::GPR32RegClass;
383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384 Register ResultReg = createResultReg(RC);
385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
386 ResultReg).addReg(ZeroReg, getKillRegState(true));
387 return ResultReg;
388}
389
390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391 // Positive zero (+0.0) has to be materialized with a fmov from the zero
392 // register, because the immediate version of fmov cannot encode zero.
393 if (CFP->isNullValue())
394 return fastMaterializeFloatZero(CFP);
395
396 if (VT != MVT::f32 && VT != MVT::f64)
397 return 0;
398
399 const APFloat Val = CFP->getValueAPF();
400 bool Is64Bit = (VT == MVT::f64);
401 // This checks to see if we can use FMOV instructions to materialize
402 // a constant, otherwise we have to materialize via the constant pool.
403 int Imm =
404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
405 if (Imm != -1) {
406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
408 }
409
410 // For the large code model materialize the FP constant in code.
411 if (TM.getCodeModel() == CodeModel::Large) {
412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413 const TargetRegisterClass *RC = Is64Bit ?
414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416 Register TmpReg = createResultReg(RC);
417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
422 TII.get(TargetOpcode::COPY), ResultReg)
423 .addReg(TmpReg, getKillRegState(true));
424
425 return ResultReg;
426 }
427
428 // Materialize via constant pool. MachineConstantPool wants an explicit
429 // alignment.
430 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
431
432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
440 .addReg(ADRPReg)
442 return ResultReg;
443}
444
445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446 // We can't handle thread-local variables quickly yet.
447 if (GV->isThreadLocal())
448 return 0;
449
450 // MachO still uses GOT for large code-model accesses, but ELF requires
451 // movz/movk sequences, which FastISel doesn't handle yet.
452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 return 0;
454
455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456
457 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
458 if (!DestEVT.isSimple())
459 return 0;
460
461 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
462 unsigned ResultReg;
463
464 if (OpFlags & AArch64II::MO_GOT) {
465 // ADRP + LDRX
466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
467 ADRPReg)
468 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
469
470 unsigned LdrOpc;
471 if (Subtarget->isTargetILP32()) {
472 ResultReg = createResultReg(&AArch64::GPR32RegClass);
473 LdrOpc = AArch64::LDRWui;
474 } else {
475 ResultReg = createResultReg(&AArch64::GPR64RegClass);
476 LdrOpc = AArch64::LDRXui;
477 }
478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
479 ResultReg)
480 .addReg(ADRPReg)
482 AArch64II::MO_NC | OpFlags);
483 if (!Subtarget->isTargetILP32())
484 return ResultReg;
485
486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487 // so we must extend the result on ILP32.
488 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
490 TII.get(TargetOpcode::SUBREG_TO_REG))
491 .addDef(Result64)
492 .addImm(0)
493 .addReg(ResultReg, RegState::Kill)
494 .addImm(AArch64::sub_32);
495 return Result64;
496 } else {
497 // ADRP + ADDX
498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
499 ADRPReg)
500 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
501
502 if (OpFlags & AArch64II::MO_TAGGED) {
503 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
504 // We do so by creating a MOVK that sets bits 48-63 of the register to
505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506 // the small code model so we can assume a binary size of <= 4GB, which
507 // makes the untagged PC relative offset positive. The binary must also be
508 // loaded into address range [0, 2^48). Both of these properties need to
509 // be ensured at runtime when using tagged addresses.
510 //
511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513 // are not exactly 1:1 with FastISel so we cannot easily abstract this
514 // out. At some point, it would be nice to find a way to not have this
515 // duplciate code.
516 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
518 DstReg)
519 .addReg(ADRPReg)
520 .addGlobalAddress(GV, /*Offset=*/0x100000000,
522 .addImm(48);
523 ADRPReg = DstReg;
524 }
525
526 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
528 ResultReg)
529 .addReg(ADRPReg)
530 .addGlobalAddress(GV, 0,
532 .addImm(0);
533 }
534 return ResultReg;
535}
536
537unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
539
540 // Only handle simple types.
541 if (!CEVT.isSimple())
542 return 0;
543 MVT VT = CEVT.getSimpleVT();
544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545 // 'null' pointers need to have a somewhat special treatment.
546 if (isa<ConstantPointerNull>(C)) {
547 assert(VT == MVT::i64 && "Expected 64-bit pointers");
548 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
549 }
550
551 if (const auto *CI = dyn_cast<ConstantInt>(C))
552 return materializeInt(CI, VT);
553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
554 return materializeFP(CFP, VT);
555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
556 return materializeGV(GV);
557
558 return 0;
559}
560
561unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562 assert(CFP->isNullValue() &&
563 "Floating-point constant is not a positive zero.");
564 MVT VT;
565 if (!isTypeLegal(CFP->getType(), VT))
566 return 0;
567
568 if (VT != MVT::f32 && VT != MVT::f64)
569 return 0;
570
571 bool Is64Bit = (VT == MVT::f64);
572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
575}
576
577/// Check if the multiply is by a power-of-2 constant.
578static bool isMulPowOf2(const Value *I) {
579 if (const auto *MI = dyn_cast<MulOperator>(I)) {
580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
581 if (C->getValue().isPowerOf2())
582 return true;
583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
584 if (C->getValue().isPowerOf2())
585 return true;
586 }
587 return false;
588}
589
590// Computes the address to get to an object.
591bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
592{
593 const User *U = nullptr;
594 unsigned Opcode = Instruction::UserOp1;
595 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
596 // Don't walk into other basic blocks unless the object is an alloca from
597 // another block, otherwise it may not have a virtual register assigned.
598 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
600 Opcode = I->getOpcode();
601 U = I;
602 }
603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
604 Opcode = C->getOpcode();
605 U = C;
606 }
607
608 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
609 if (Ty->getAddressSpace() > 255)
610 // Fast instruction selection doesn't support the special
611 // address spaces.
612 return false;
613
614 switch (Opcode) {
615 default:
616 break;
617 case Instruction::BitCast:
618 // Look through bitcasts.
619 return computeAddress(U->getOperand(0), Addr, Ty);
620
621 case Instruction::IntToPtr:
622 // Look past no-op inttoptrs.
623 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
624 TLI.getPointerTy(DL))
625 return computeAddress(U->getOperand(0), Addr, Ty);
626 break;
627
628 case Instruction::PtrToInt:
629 // Look past no-op ptrtoints.
630 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
631 return computeAddress(U->getOperand(0), Addr, Ty);
632 break;
633
634 case Instruction::GetElementPtr: {
635 Address SavedAddr = Addr;
636 uint64_t TmpOffset = Addr.getOffset();
637
638 // Iterate through the GEP folding the constants into offsets where
639 // we can.
640 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
641 GTI != E; ++GTI) {
642 const Value *Op = GTI.getOperand();
643 if (StructType *STy = GTI.getStructTypeOrNull()) {
644 const StructLayout *SL = DL.getStructLayout(STy);
645 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
646 TmpOffset += SL->getElementOffset(Idx);
647 } else {
648 uint64_t S = GTI.getSequentialElementStride(DL);
649 while (true) {
650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
651 // Constant-offset addressing.
652 TmpOffset += CI->getSExtValue() * S;
653 break;
654 }
655 if (canFoldAddIntoGEP(U, Op)) {
656 // A compatible add with a constant operand. Fold the constant.
657 ConstantInt *CI =
658 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
659 TmpOffset += CI->getSExtValue() * S;
660 // Iterate on the other operand.
661 Op = cast<AddOperator>(Op)->getOperand(0);
662 continue;
663 }
664 // Unsupported
665 goto unsupported_gep;
666 }
667 }
668 }
669
670 // Try to grab the base operand now.
671 Addr.setOffset(TmpOffset);
672 if (computeAddress(U->getOperand(0), Addr, Ty))
673 return true;
674
675 // We failed, restore everything and try the other options.
676 Addr = SavedAddr;
677
678 unsupported_gep:
679 break;
680 }
681 case Instruction::Alloca: {
682 const AllocaInst *AI = cast<AllocaInst>(Obj);
684 FuncInfo.StaticAllocaMap.find(AI);
685 if (SI != FuncInfo.StaticAllocaMap.end()) {
686 Addr.setKind(Address::FrameIndexBase);
687 Addr.setFI(SI->second);
688 return true;
689 }
690 break;
691 }
692 case Instruction::Add: {
693 // Adds of constants are common and easy enough.
694 const Value *LHS = U->getOperand(0);
695 const Value *RHS = U->getOperand(1);
696
697 if (isa<ConstantInt>(LHS))
698 std::swap(LHS, RHS);
699
700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702 return computeAddress(LHS, Addr, Ty);
703 }
704
705 Address Backup = Addr;
706 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
707 return true;
708 Addr = Backup;
709
710 break;
711 }
712 case Instruction::Sub: {
713 // Subs of constants are common and easy enough.
714 const Value *LHS = U->getOperand(0);
715 const Value *RHS = U->getOperand(1);
716
717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719 return computeAddress(LHS, Addr, Ty);
720 }
721 break;
722 }
723 case Instruction::Shl: {
724 if (Addr.getOffsetReg())
725 break;
726
727 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
728 if (!CI)
729 break;
730
731 unsigned Val = CI->getZExtValue();
732 if (Val < 1 || Val > 3)
733 break;
734
735 uint64_t NumBytes = 0;
736 if (Ty && Ty->isSized()) {
737 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738 NumBytes = NumBits / 8;
739 if (!isPowerOf2_64(NumBits))
740 NumBytes = 0;
741 }
742
743 if (NumBytes != (1ULL << Val))
744 break;
745
746 Addr.setShift(Val);
747 Addr.setExtendType(AArch64_AM::LSL);
748
749 const Value *Src = U->getOperand(0);
750 if (const auto *I = dyn_cast<Instruction>(Src)) {
751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
752 // Fold the zext or sext when it won't become a noop.
753 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
754 if (!isIntExtFree(ZE) &&
755 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
756 Addr.setExtendType(AArch64_AM::UXTW);
757 Src = ZE->getOperand(0);
758 }
759 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
760 if (!isIntExtFree(SE) &&
761 SE->getOperand(0)->getType()->isIntegerTy(32)) {
762 Addr.setExtendType(AArch64_AM::SXTW);
763 Src = SE->getOperand(0);
764 }
765 }
766 }
767 }
768
769 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
770 if (AI->getOpcode() == Instruction::And) {
771 const Value *LHS = AI->getOperand(0);
772 const Value *RHS = AI->getOperand(1);
773
774 if (const auto *C = dyn_cast<ConstantInt>(LHS))
775 if (C->getValue() == 0xffffffff)
776 std::swap(LHS, RHS);
777
778 if (const auto *C = dyn_cast<ConstantInt>(RHS))
779 if (C->getValue() == 0xffffffff) {
780 Addr.setExtendType(AArch64_AM::UXTW);
781 Register Reg = getRegForValue(LHS);
782 if (!Reg)
783 return false;
784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785 Addr.setOffsetReg(Reg);
786 return true;
787 }
788 }
789
790 Register Reg = getRegForValue(Src);
791 if (!Reg)
792 return false;
793 Addr.setOffsetReg(Reg);
794 return true;
795 }
796 case Instruction::Mul: {
797 if (Addr.getOffsetReg())
798 break;
799
800 if (!isMulPowOf2(U))
801 break;
802
803 const Value *LHS = U->getOperand(0);
804 const Value *RHS = U->getOperand(1);
805
806 // Canonicalize power-of-2 value to the RHS.
807 if (const auto *C = dyn_cast<ConstantInt>(LHS))
808 if (C->getValue().isPowerOf2())
809 std::swap(LHS, RHS);
810
811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812 const auto *C = cast<ConstantInt>(RHS);
813 unsigned Val = C->getValue().logBase2();
814 if (Val < 1 || Val > 3)
815 break;
816
817 uint64_t NumBytes = 0;
818 if (Ty && Ty->isSized()) {
819 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820 NumBytes = NumBits / 8;
821 if (!isPowerOf2_64(NumBits))
822 NumBytes = 0;
823 }
824
825 if (NumBytes != (1ULL << Val))
826 break;
827
828 Addr.setShift(Val);
829 Addr.setExtendType(AArch64_AM::LSL);
830
831 const Value *Src = LHS;
832 if (const auto *I = dyn_cast<Instruction>(Src)) {
833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834 // Fold the zext or sext when it won't become a noop.
835 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
836 if (!isIntExtFree(ZE) &&
837 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
838 Addr.setExtendType(AArch64_AM::UXTW);
839 Src = ZE->getOperand(0);
840 }
841 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
842 if (!isIntExtFree(SE) &&
843 SE->getOperand(0)->getType()->isIntegerTy(32)) {
844 Addr.setExtendType(AArch64_AM::SXTW);
845 Src = SE->getOperand(0);
846 }
847 }
848 }
849 }
850
851 Register Reg = getRegForValue(Src);
852 if (!Reg)
853 return false;
854 Addr.setOffsetReg(Reg);
855 return true;
856 }
857 case Instruction::And: {
858 if (Addr.getOffsetReg())
859 break;
860
861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
862 break;
863
864 const Value *LHS = U->getOperand(0);
865 const Value *RHS = U->getOperand(1);
866
867 if (const auto *C = dyn_cast<ConstantInt>(LHS))
868 if (C->getValue() == 0xffffffff)
869 std::swap(LHS, RHS);
870
871 if (const auto *C = dyn_cast<ConstantInt>(RHS))
872 if (C->getValue() == 0xffffffff) {
873 Addr.setShift(0);
874 Addr.setExtendType(AArch64_AM::LSL);
875 Addr.setExtendType(AArch64_AM::UXTW);
876
877 Register Reg = getRegForValue(LHS);
878 if (!Reg)
879 return false;
880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881 Addr.setOffsetReg(Reg);
882 return true;
883 }
884 break;
885 }
886 case Instruction::SExt:
887 case Instruction::ZExt: {
888 if (!Addr.getReg() || Addr.getOffsetReg())
889 break;
890
891 const Value *Src = nullptr;
892 // Fold the zext or sext when it won't become a noop.
893 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
894 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
895 Addr.setExtendType(AArch64_AM::UXTW);
896 Src = ZE->getOperand(0);
897 }
898 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
899 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
900 Addr.setExtendType(AArch64_AM::SXTW);
901 Src = SE->getOperand(0);
902 }
903 }
904
905 if (!Src)
906 break;
907
908 Addr.setShift(0);
909 Register Reg = getRegForValue(Src);
910 if (!Reg)
911 return false;
912 Addr.setOffsetReg(Reg);
913 return true;
914 }
915 } // end switch
916
917 if (Addr.isRegBase() && !Addr.getReg()) {
918 Register Reg = getRegForValue(Obj);
919 if (!Reg)
920 return false;
921 Addr.setReg(Reg);
922 return true;
923 }
924
925 if (!Addr.getOffsetReg()) {
926 Register Reg = getRegForValue(Obj);
927 if (!Reg)
928 return false;
929 Addr.setOffsetReg(Reg);
930 return true;
931 }
932
933 return false;
934}
935
936bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937 const User *U = nullptr;
938 unsigned Opcode = Instruction::UserOp1;
939 bool InMBB = true;
940
941 if (const auto *I = dyn_cast<Instruction>(V)) {
942 Opcode = I->getOpcode();
943 U = I;
944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
946 Opcode = C->getOpcode();
947 U = C;
948 }
949
950 switch (Opcode) {
951 default: break;
952 case Instruction::BitCast:
953 // Look past bitcasts if its operand is in the same BB.
954 if (InMBB)
955 return computeCallAddress(U->getOperand(0), Addr);
956 break;
957 case Instruction::IntToPtr:
958 // Look past no-op inttoptrs if its operand is in the same BB.
959 if (InMBB &&
960 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
961 TLI.getPointerTy(DL))
962 return computeCallAddress(U->getOperand(0), Addr);
963 break;
964 case Instruction::PtrToInt:
965 // Look past no-op ptrtoints if its operand is in the same BB.
966 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
967 return computeCallAddress(U->getOperand(0), Addr);
968 break;
969 }
970
971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
972 Addr.setGlobalValue(GV);
973 return true;
974 }
975
976 // If all else fails, try to materialize the value in a register.
977 if (!Addr.getGlobalValue()) {
978 Addr.setReg(getRegForValue(V));
979 return Addr.getReg() != 0;
980 }
981
982 return false;
983}
984
985bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986 EVT evt = TLI.getValueType(DL, Ty, true);
987
988 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989 return false;
990
991 // Only handle simple types.
992 if (evt == MVT::Other || !evt.isSimple())
993 return false;
994 VT = evt.getSimpleVT();
995
996 // This is a legal type, but it's not something we handle in fast-isel.
997 if (VT == MVT::f128)
998 return false;
999
1000 // Handle all other legal types, i.e. a register that will directly hold this
1001 // value.
1002 return TLI.isTypeLegal(VT);
1003}
1004
1005/// Determine if the value type is supported by FastISel.
1006///
1007/// FastISel for AArch64 can handle more value types than are legal. This adds
1008/// simple value type such as i1, i8, and i16.
1009bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1010 if (Ty->isVectorTy() && !IsVectorAllowed)
1011 return false;
1012
1013 if (isTypeLegal(Ty, VT))
1014 return true;
1015
1016 // If this is a type than can be sign or zero-extended to a basic operation
1017 // go ahead and accept it now.
1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1019 return true;
1020
1021 return false;
1022}
1023
1024bool AArch64FastISel::isValueAvailable(const Value *V) const {
1025 if (!isa<Instruction>(V))
1026 return true;
1027
1028 const auto *I = cast<Instruction>(V);
1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1030}
1031
1032bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033 if (Subtarget->isTargetILP32())
1034 return false;
1035
1036 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037 if (!ScaleFactor)
1038 return false;
1039
1040 bool ImmediateOffsetNeedsLowering = false;
1041 bool RegisterOffsetNeedsLowering = false;
1042 int64_t Offset = Addr.getOffset();
1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1044 ImmediateOffsetNeedsLowering = true;
1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1046 !isUInt<12>(Offset / ScaleFactor))
1047 ImmediateOffsetNeedsLowering = true;
1048
1049 // Cannot encode an offset register and an immediate offset in the same
1050 // instruction. Fold the immediate offset into the load/store instruction and
1051 // emit an additional add to take care of the offset register.
1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // Cannot encode zero register as base.
1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057 RegisterOffsetNeedsLowering = true;
1058
1059 // If this is a stack pointer and the offset needs to be simplified then put
1060 // the alloca address into a register, set the base type back to register and
1061 // continue. This should almost never happen.
1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1063 {
1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066 ResultReg)
1067 .addFrameIndex(Addr.getFI())
1068 .addImm(0)
1069 .addImm(0);
1070 Addr.setKind(Address::RegBase);
1071 Addr.setReg(ResultReg);
1072 }
1073
1074 if (RegisterOffsetNeedsLowering) {
1075 unsigned ResultReg = 0;
1076 if (Addr.getReg()) {
1077 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1078 Addr.getExtendType() == AArch64_AM::UXTW )
1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), Addr.getExtendType(),
1081 Addr.getShift());
1082 else
1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1084 Addr.getOffsetReg(), AArch64_AM::LSL,
1085 Addr.getShift());
1086 } else {
1087 if (Addr.getExtendType() == AArch64_AM::UXTW)
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089 Addr.getShift(), /*IsZExt=*/true);
1090 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092 Addr.getShift(), /*IsZExt=*/false);
1093 else
1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095 Addr.getShift());
1096 }
1097 if (!ResultReg)
1098 return false;
1099
1100 Addr.setReg(ResultReg);
1101 Addr.setOffsetReg(0);
1102 Addr.setShift(0);
1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104 }
1105
1106 // Since the offset is too large for the load/store instruction get the
1107 // reg+offset into a register.
1108 if (ImmediateOffsetNeedsLowering) {
1109 unsigned ResultReg;
1110 if (Addr.getReg())
1111 // Try to fold the immediate into the add instruction.
1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113 else
1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115
1116 if (!ResultReg)
1117 return false;
1118 Addr.setReg(ResultReg);
1119 Addr.setOffset(0);
1120 }
1121 return true;
1122}
1123
1124void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125 const MachineInstrBuilder &MIB,
1127 unsigned ScaleFactor,
1128 MachineMemOperand *MMO) {
1129 int64_t Offset = Addr.getOffset() / ScaleFactor;
1130 // Frame base works a bit differently. Handle it separately.
1131 if (Addr.isFIBase()) {
1132 int FI = Addr.getFI();
1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1134 // and alignment should be based on the VT.
1135 MMO = FuncInfo.MF->getMachineMemOperand(
1136 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1137 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1138 // Now add the rest of the operands.
1139 MIB.addFrameIndex(FI).addImm(Offset);
1140 } else {
1141 assert(Addr.isRegBase() && "Unexpected address kind.");
1142 const MCInstrDesc &II = MIB->getDesc();
1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1144 Addr.setReg(
1145 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1146 Addr.setOffsetReg(
1147 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1148 if (Addr.getOffsetReg()) {
1149 assert(Addr.getOffset() == 0 && "Unexpected offset");
1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1151 Addr.getExtendType() == AArch64_AM::SXTX;
1152 MIB.addReg(Addr.getReg());
1153 MIB.addReg(Addr.getOffsetReg());
1154 MIB.addImm(IsSigned);
1155 MIB.addImm(Addr.getShift() != 0);
1156 } else
1157 MIB.addReg(Addr.getReg()).addImm(Offset);
1158 }
1159
1160 if (MMO)
1161 MIB.addMemOperand(MMO);
1162}
1163
1164unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165 const Value *RHS, bool SetFlags,
1166 bool WantResult, bool IsZExt) {
1168 bool NeedExtend = false;
1169 switch (RetVT.SimpleTy) {
1170 default:
1171 return 0;
1172 case MVT::i1:
1173 NeedExtend = true;
1174 break;
1175 case MVT::i8:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178 break;
1179 case MVT::i16:
1180 NeedExtend = true;
1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182 break;
1183 case MVT::i32: // fall-through
1184 case MVT::i64:
1185 break;
1186 }
1187 MVT SrcVT = RetVT;
1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189
1190 // Canonicalize immediates to the RHS first.
1191 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1192 std::swap(LHS, RHS);
1193
1194 // Canonicalize mul by power of 2 to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196 if (isMulPowOf2(LHS))
1197 std::swap(LHS, RHS);
1198
1199 // Canonicalize shift immediate to the RHS.
1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1201 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1202 if (isa<ConstantInt>(SI->getOperand(1)))
1203 if (SI->getOpcode() == Instruction::Shl ||
1204 SI->getOpcode() == Instruction::LShr ||
1205 SI->getOpcode() == Instruction::AShr )
1206 std::swap(LHS, RHS);
1207
1208 Register LHSReg = getRegForValue(LHS);
1209 if (!LHSReg)
1210 return 0;
1211
1212 if (NeedExtend)
1213 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1214
1215 unsigned ResultReg = 0;
1216 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218 if (C->isNegative())
1219 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1220 WantResult);
1221 else
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223 WantResult);
1224 } else if (const auto *C = dyn_cast<Constant>(RHS))
1225 if (C->isNullValue())
1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1227
1228 if (ResultReg)
1229 return ResultReg;
1230
1231 // Only extend the RHS within the instruction if there is a valid extend type.
1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233 isValueAvailable(RHS)) {
1234 Register RHSReg = getRegForValue(RHS);
1235 if (!RHSReg)
1236 return 0;
1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1238 SetFlags, WantResult);
1239 }
1240
1241 // Check if the mul can be folded into the instruction.
1242 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1243 if (isMulPowOf2(RHS)) {
1244 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1245 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1246
1247 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1248 if (C->getValue().isPowerOf2())
1249 std::swap(MulLHS, MulRHS);
1250
1251 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1252 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1253 Register RHSReg = getRegForValue(MulLHS);
1254 if (!RHSReg)
1255 return 0;
1256 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1257 ShiftVal, SetFlags, WantResult);
1258 if (ResultReg)
1259 return ResultReg;
1260 }
1261 }
1262
1263 // Check if the shift can be folded into the instruction.
1264 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1265 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1266 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268 switch (SI->getOpcode()) {
1269 default: break;
1270 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1271 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1272 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1273 }
1274 uint64_t ShiftVal = C->getZExtValue();
1275 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1276 Register RHSReg = getRegForValue(SI->getOperand(0));
1277 if (!RHSReg)
1278 return 0;
1279 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1280 ShiftVal, SetFlags, WantResult);
1281 if (ResultReg)
1282 return ResultReg;
1283 }
1284 }
1285 }
1286 }
1287
1288 Register RHSReg = getRegForValue(RHS);
1289 if (!RHSReg)
1290 return 0;
1291
1292 if (NeedExtend)
1293 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1294
1295 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1296}
1297
1298unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1299 unsigned RHSReg, bool SetFlags,
1300 bool WantResult) {
1301 assert(LHSReg && RHSReg && "Invalid register number.");
1302
1303 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1304 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1305 return 0;
1306
1307 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1308 return 0;
1309
1310 static const unsigned OpcTable[2][2][2] = {
1311 { { AArch64::SUBWrr, AArch64::SUBXrr },
1312 { AArch64::ADDWrr, AArch64::ADDXrr } },
1313 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1314 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1315 };
1316 bool Is64Bit = RetVT == MVT::i64;
1317 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1318 const TargetRegisterClass *RC =
1319 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1320 unsigned ResultReg;
1321 if (WantResult)
1322 ResultReg = createResultReg(RC);
1323 else
1324 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1325
1326 const MCInstrDesc &II = TII.get(Opc);
1327 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1328 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1329 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1330 .addReg(LHSReg)
1331 .addReg(RHSReg);
1332 return ResultReg;
1333}
1334
1335unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1336 uint64_t Imm, bool SetFlags,
1337 bool WantResult) {
1338 assert(LHSReg && "Invalid register number.");
1339
1340 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1341 return 0;
1342
1343 unsigned ShiftImm;
1344 if (isUInt<12>(Imm))
1345 ShiftImm = 0;
1346 else if ((Imm & 0xfff000) == Imm) {
1347 ShiftImm = 12;
1348 Imm >>= 12;
1349 } else
1350 return 0;
1351
1352 static const unsigned OpcTable[2][2][2] = {
1353 { { AArch64::SUBWri, AArch64::SUBXri },
1354 { AArch64::ADDWri, AArch64::ADDXri } },
1355 { { AArch64::SUBSWri, AArch64::SUBSXri },
1356 { AArch64::ADDSWri, AArch64::ADDSXri } }
1357 };
1358 bool Is64Bit = RetVT == MVT::i64;
1359 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1360 const TargetRegisterClass *RC;
1361 if (SetFlags)
1362 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1363 else
1364 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1365 unsigned ResultReg;
1366 if (WantResult)
1367 ResultReg = createResultReg(RC);
1368 else
1369 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1370
1371 const MCInstrDesc &II = TII.get(Opc);
1372 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1374 .addReg(LHSReg)
1375 .addImm(Imm)
1376 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1377 return ResultReg;
1378}
1379
1380unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1381 unsigned RHSReg,
1383 uint64_t ShiftImm, bool SetFlags,
1384 bool WantResult) {
1385 assert(LHSReg && RHSReg && "Invalid register number.");
1386 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1387 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1388
1389 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1390 return 0;
1391
1392 // Don't deal with undefined shifts.
1393 if (ShiftImm >= RetVT.getSizeInBits())
1394 return 0;
1395
1396 static const unsigned OpcTable[2][2][2] = {
1397 { { AArch64::SUBWrs, AArch64::SUBXrs },
1398 { AArch64::ADDWrs, AArch64::ADDXrs } },
1399 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1400 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1401 };
1402 bool Is64Bit = RetVT == MVT::i64;
1403 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1404 const TargetRegisterClass *RC =
1405 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1406 unsigned ResultReg;
1407 if (WantResult)
1408 ResultReg = createResultReg(RC);
1409 else
1410 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1411
1412 const MCInstrDesc &II = TII.get(Opc);
1413 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1414 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1416 .addReg(LHSReg)
1417 .addReg(RHSReg)
1418 .addImm(getShifterImm(ShiftType, ShiftImm));
1419 return ResultReg;
1420}
1421
1422unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1423 unsigned RHSReg,
1425 uint64_t ShiftImm, bool SetFlags,
1426 bool WantResult) {
1427 assert(LHSReg && RHSReg && "Invalid register number.");
1428 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1429 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1430
1431 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1432 return 0;
1433
1434 if (ShiftImm >= 4)
1435 return 0;
1436
1437 static const unsigned OpcTable[2][2][2] = {
1438 { { AArch64::SUBWrx, AArch64::SUBXrx },
1439 { AArch64::ADDWrx, AArch64::ADDXrx } },
1440 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1441 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1442 };
1443 bool Is64Bit = RetVT == MVT::i64;
1444 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1445 const TargetRegisterClass *RC = nullptr;
1446 if (SetFlags)
1447 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1448 else
1449 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1450 unsigned ResultReg;
1451 if (WantResult)
1452 ResultReg = createResultReg(RC);
1453 else
1454 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1455
1456 const MCInstrDesc &II = TII.get(Opc);
1457 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1458 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1460 .addReg(LHSReg)
1461 .addReg(RHSReg)
1462 .addImm(getArithExtendImm(ExtType, ShiftImm));
1463 return ResultReg;
1464}
1465
1466bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1467 Type *Ty = LHS->getType();
1468 EVT EVT = TLI.getValueType(DL, Ty, true);
1469 if (!EVT.isSimple())
1470 return false;
1471 MVT VT = EVT.getSimpleVT();
1472
1473 switch (VT.SimpleTy) {
1474 default:
1475 return false;
1476 case MVT::i1:
1477 case MVT::i8:
1478 case MVT::i16:
1479 case MVT::i32:
1480 case MVT::i64:
1481 return emitICmp(VT, LHS, RHS, IsZExt);
1482 case MVT::f32:
1483 case MVT::f64:
1484 return emitFCmp(VT, LHS, RHS);
1485 }
1486}
1487
1488bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1489 bool IsZExt) {
1490 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1491 IsZExt) != 0;
1492}
1493
1494bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1495 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1496 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1497}
1498
1499bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1500 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1501 return false;
1502
1503 // Check to see if the 2nd operand is a constant that we can encode directly
1504 // in the compare.
1505 bool UseImm = false;
1506 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1507 if (CFP->isZero() && !CFP->isNegative())
1508 UseImm = true;
1509
1510 Register LHSReg = getRegForValue(LHS);
1511 if (!LHSReg)
1512 return false;
1513
1514 if (UseImm) {
1515 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1517 .addReg(LHSReg);
1518 return true;
1519 }
1520
1521 Register RHSReg = getRegForValue(RHS);
1522 if (!RHSReg)
1523 return false;
1524
1525 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1527 .addReg(LHSReg)
1528 .addReg(RHSReg);
1529 return true;
1530}
1531
1532unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1533 bool SetFlags, bool WantResult, bool IsZExt) {
1534 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1535 IsZExt);
1536}
1537
1538/// This method is a wrapper to simplify add emission.
1539///
1540/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1541/// that fails, then try to materialize the immediate into a register and use
1542/// emitAddSub_rr instead.
1543unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1544 unsigned ResultReg;
1545 if (Imm < 0)
1546 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1547 else
1548 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1549
1550 if (ResultReg)
1551 return ResultReg;
1552
1553 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1554 if (!CReg)
1555 return 0;
1556
1557 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1558 return ResultReg;
1559}
1560
1561unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1562 bool SetFlags, bool WantResult, bool IsZExt) {
1563 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1564 IsZExt);
1565}
1566
1567unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1568 unsigned RHSReg, bool WantResult) {
1569 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1570 /*SetFlags=*/true, WantResult);
1571}
1572
1573unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1574 unsigned RHSReg,
1576 uint64_t ShiftImm, bool WantResult) {
1577 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1578 ShiftImm, /*SetFlags=*/true, WantResult);
1579}
1580
1581unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1582 const Value *LHS, const Value *RHS) {
1583 // Canonicalize immediates to the RHS first.
1584 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1585 std::swap(LHS, RHS);
1586
1587 // Canonicalize mul by power-of-2 to the RHS.
1588 if (LHS->hasOneUse() && isValueAvailable(LHS))
1589 if (isMulPowOf2(LHS))
1590 std::swap(LHS, RHS);
1591
1592 // Canonicalize shift immediate to the RHS.
1593 if (LHS->hasOneUse() && isValueAvailable(LHS))
1594 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1595 if (isa<ConstantInt>(SI->getOperand(1)))
1596 std::swap(LHS, RHS);
1597
1598 Register LHSReg = getRegForValue(LHS);
1599 if (!LHSReg)
1600 return 0;
1601
1602 unsigned ResultReg = 0;
1603 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1604 uint64_t Imm = C->getZExtValue();
1605 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1606 }
1607 if (ResultReg)
1608 return ResultReg;
1609
1610 // Check if the mul can be folded into the instruction.
1611 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1612 if (isMulPowOf2(RHS)) {
1613 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1614 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1615
1616 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1617 if (C->getValue().isPowerOf2())
1618 std::swap(MulLHS, MulRHS);
1619
1620 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1621 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1622
1623 Register RHSReg = getRegForValue(MulLHS);
1624 if (!RHSReg)
1625 return 0;
1626 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1627 if (ResultReg)
1628 return ResultReg;
1629 }
1630 }
1631
1632 // Check if the shift can be folded into the instruction.
1633 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1634 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1635 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1636 uint64_t ShiftVal = C->getZExtValue();
1637 Register RHSReg = getRegForValue(SI->getOperand(0));
1638 if (!RHSReg)
1639 return 0;
1640 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1641 if (ResultReg)
1642 return ResultReg;
1643 }
1644 }
1645
1646 Register RHSReg = getRegForValue(RHS);
1647 if (!RHSReg)
1648 return 0;
1649
1650 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1651 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1652 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1653 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1654 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1655 }
1656 return ResultReg;
1657}
1658
1659unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1660 unsigned LHSReg, uint64_t Imm) {
1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662 "ISD nodes are not consecutive!");
1663 static const unsigned OpcTable[3][2] = {
1664 { AArch64::ANDWri, AArch64::ANDXri },
1665 { AArch64::ORRWri, AArch64::ORRXri },
1666 { AArch64::EORWri, AArch64::EORXri }
1667 };
1668 const TargetRegisterClass *RC;
1669 unsigned Opc;
1670 unsigned RegSize;
1671 switch (RetVT.SimpleTy) {
1672 default:
1673 return 0;
1674 case MVT::i1:
1675 case MVT::i8:
1676 case MVT::i16:
1677 case MVT::i32: {
1678 unsigned Idx = ISDOpc - ISD::AND;
1679 Opc = OpcTable[Idx][0];
1680 RC = &AArch64::GPR32spRegClass;
1681 RegSize = 32;
1682 break;
1683 }
1684 case MVT::i64:
1685 Opc = OpcTable[ISDOpc - ISD::AND][1];
1686 RC = &AArch64::GPR64spRegClass;
1687 RegSize = 64;
1688 break;
1689 }
1690
1692 return 0;
1693
1694 Register ResultReg =
1695 fastEmitInst_ri(Opc, RC, LHSReg,
1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1700 }
1701 return ResultReg;
1702}
1703
1704unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705 unsigned LHSReg, unsigned RHSReg,
1706 uint64_t ShiftImm) {
1707 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708 "ISD nodes are not consecutive!");
1709 static const unsigned OpcTable[3][2] = {
1710 { AArch64::ANDWrs, AArch64::ANDXrs },
1711 { AArch64::ORRWrs, AArch64::ORRXrs },
1712 { AArch64::EORWrs, AArch64::EORXrs }
1713 };
1714
1715 // Don't deal with undefined shifts.
1716 if (ShiftImm >= RetVT.getSizeInBits())
1717 return 0;
1718
1719 const TargetRegisterClass *RC;
1720 unsigned Opc;
1721 switch (RetVT.SimpleTy) {
1722 default:
1723 return 0;
1724 case MVT::i1:
1725 case MVT::i8:
1726 case MVT::i16:
1727 case MVT::i32:
1728 Opc = OpcTable[ISDOpc - ISD::AND][0];
1729 RC = &AArch64::GPR32RegClass;
1730 break;
1731 case MVT::i64:
1732 Opc = OpcTable[ISDOpc - ISD::AND][1];
1733 RC = &AArch64::GPR64RegClass;
1734 break;
1735 }
1736 Register ResultReg =
1737 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1742 }
1743 return ResultReg;
1744}
1745
1746unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1747 uint64_t Imm) {
1748 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1749}
1750
1751unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752 bool WantZExt, MachineMemOperand *MMO) {
1753 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754 return 0;
1755
1756 // Simplify this down to something we can handle.
1757 if (!simplifyAddress(Addr, VT))
1758 return 0;
1759
1760 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761 if (!ScaleFactor)
1762 llvm_unreachable("Unexpected value type.");
1763
1764 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766 bool UseScaled = true;
1767 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768 UseScaled = false;
1769 ScaleFactor = 1;
1770 }
1771
1772 static const unsigned GPOpcTable[2][8][4] = {
1773 // Sign-extend.
1774 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1775 AArch64::LDURXi },
1776 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1777 AArch64::LDURXi },
1778 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1779 AArch64::LDRXui },
1780 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1781 AArch64::LDRXui },
1782 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783 AArch64::LDRXroX },
1784 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785 AArch64::LDRXroX },
1786 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787 AArch64::LDRXroW },
1788 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789 AArch64::LDRXroW }
1790 },
1791 // Zero-extend.
1792 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1793 AArch64::LDURXi },
1794 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1795 AArch64::LDURXi },
1796 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1797 AArch64::LDRXui },
1798 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1799 AArch64::LDRXui },
1800 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1801 AArch64::LDRXroX },
1802 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1803 AArch64::LDRXroX },
1804 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1805 AArch64::LDRXroW },
1806 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1807 AArch64::LDRXroW }
1808 }
1809 };
1810
1811 static const unsigned FPOpcTable[4][2] = {
1812 { AArch64::LDURSi, AArch64::LDURDi },
1813 { AArch64::LDRSui, AArch64::LDRDui },
1814 { AArch64::LDRSroX, AArch64::LDRDroX },
1815 { AArch64::LDRSroW, AArch64::LDRDroW }
1816 };
1817
1818 unsigned Opc;
1819 const TargetRegisterClass *RC;
1820 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821 Addr.getOffsetReg();
1822 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824 Addr.getExtendType() == AArch64_AM::SXTW)
1825 Idx++;
1826
1827 bool IsRet64Bit = RetVT == MVT::i64;
1828 switch (VT.SimpleTy) {
1829 default:
1830 llvm_unreachable("Unexpected value type.");
1831 case MVT::i1: // Intentional fall-through.
1832 case MVT::i8:
1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834 RC = (IsRet64Bit && !WantZExt) ?
1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836 break;
1837 case MVT::i16:
1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839 RC = (IsRet64Bit && !WantZExt) ?
1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841 break;
1842 case MVT::i32:
1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844 RC = (IsRet64Bit && !WantZExt) ?
1845 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846 break;
1847 case MVT::i64:
1848 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849 RC = &AArch64::GPR64RegClass;
1850 break;
1851 case MVT::f32:
1852 Opc = FPOpcTable[Idx][0];
1853 RC = &AArch64::FPR32RegClass;
1854 break;
1855 case MVT::f64:
1856 Opc = FPOpcTable[Idx][1];
1857 RC = &AArch64::FPR64RegClass;
1858 break;
1859 }
1860
1861 // Create the base instruction, then add the operands.
1862 Register ResultReg = createResultReg(RC);
1863 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1864 TII.get(Opc), ResultReg);
1865 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
1867 // Loading an i1 requires special handling.
1868 if (VT == MVT::i1) {
1869 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1870 assert(ANDReg && "Unexpected AND instruction emission failure.");
1871 ResultReg = ANDReg;
1872 }
1873
1874 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875 // the 32bit reg to a 64bit reg.
1876 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1879 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880 .addImm(0)
1881 .addReg(ResultReg, getKillRegState(true))
1882 .addImm(AArch64::sub_32);
1883 ResultReg = Reg64;
1884 }
1885 return ResultReg;
1886}
1887
1888bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889 MVT VT;
1890 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891 return false;
1892
1893 if (VT.isVector())
1894 return selectOperator(I, I->getOpcode());
1895
1896 unsigned ResultReg;
1897 switch (I->getOpcode()) {
1898 default:
1899 llvm_unreachable("Unexpected instruction.");
1900 case Instruction::Add:
1901 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902 break;
1903 case Instruction::Sub:
1904 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905 break;
1906 }
1907 if (!ResultReg)
1908 return false;
1909
1910 updateValueMap(I, ResultReg);
1911 return true;
1912}
1913
1914bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915 MVT VT;
1916 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917 return false;
1918
1919 if (VT.isVector())
1920 return selectOperator(I, I->getOpcode());
1921
1922 unsigned ResultReg;
1923 switch (I->getOpcode()) {
1924 default:
1925 llvm_unreachable("Unexpected instruction.");
1926 case Instruction::And:
1927 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 case Instruction::Or:
1930 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1932 case Instruction::Xor:
1933 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934 break;
1935 }
1936 if (!ResultReg)
1937 return false;
1938
1939 updateValueMap(I, ResultReg);
1940 return true;
1941}
1942
1943bool AArch64FastISel::selectLoad(const Instruction *I) {
1944 MVT VT;
1945 // Verify we have a legal type before going any further. Currently, we handle
1946 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949 cast<LoadInst>(I)->isAtomic())
1950 return false;
1951
1952 const Value *SV = I->getOperand(0);
1953 if (TLI.supportSwiftError()) {
1954 // Swifterror values can come from either a function parameter with
1955 // swifterror attribute or an alloca with swifterror attribute.
1956 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1957 if (Arg->hasSwiftErrorAttr())
1958 return false;
1959 }
1960
1961 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1962 if (Alloca->isSwiftError())
1963 return false;
1964 }
1965 }
1966
1967 // See if we can handle this address.
1968 Address Addr;
1969 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970 return false;
1971
1972 // Fold the following sign-/zero-extend into the load instruction.
1973 bool WantZExt = true;
1974 MVT RetVT = VT;
1975 const Value *IntExtVal = nullptr;
1976 if (I->hasOneUse()) {
1977 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978 if (isTypeSupported(ZE->getType(), RetVT))
1979 IntExtVal = ZE;
1980 else
1981 RetVT = VT;
1982 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983 if (isTypeSupported(SE->getType(), RetVT))
1984 IntExtVal = SE;
1985 else
1986 RetVT = VT;
1987 WantZExt = false;
1988 }
1989 }
1990
1991 unsigned ResultReg =
1992 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993 if (!ResultReg)
1994 return false;
1995
1996 // There are a few different cases we have to handle, because the load or the
1997 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998 // SelectionDAG. There is also an ordering issue when both instructions are in
1999 // different basic blocks.
2000 // 1.) The load instruction is selected by FastISel, but the integer extend
2001 // not. This usually happens when the integer extend is in a different
2002 // basic block and SelectionDAG took over for that basic block.
2003 // 2.) The load instruction is selected before the integer extend. This only
2004 // happens when the integer extend is in a different basic block.
2005 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006 // by FastISel. This happens if there are instructions between the load
2007 // and the integer extend that couldn't be selected by FastISel.
2008 if (IntExtVal) {
2009 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011 // it when it selects the integer extend.
2012 Register Reg = lookUpRegForValue(IntExtVal);
2013 auto *MI = MRI.getUniqueVRegDef(Reg);
2014 if (!MI) {
2015 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016 if (WantZExt) {
2017 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2019 ResultReg = std::prev(I)->getOperand(0).getReg();
2020 removeDeadCode(I, std::next(I));
2021 } else
2022 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023 AArch64::sub_32);
2024 }
2025 updateValueMap(I, ResultReg);
2026 return true;
2027 }
2028
2029 // The integer extend has already been emitted - delete all the instructions
2030 // that have been emitted by the integer extend lowering code and use the
2031 // result from the load instruction directly.
2032 while (MI) {
2033 Reg = 0;
2034 for (auto &Opnd : MI->uses()) {
2035 if (Opnd.isReg()) {
2036 Reg = Opnd.getReg();
2037 break;
2038 }
2039 }
2041 removeDeadCode(I, std::next(I));
2042 MI = nullptr;
2043 if (Reg)
2044 MI = MRI.getUniqueVRegDef(Reg);
2045 }
2046 updateValueMap(IntExtVal, ResultReg);
2047 return true;
2048 }
2049
2050 updateValueMap(I, ResultReg);
2051 return true;
2052}
2053
2054bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055 unsigned AddrReg,
2056 MachineMemOperand *MMO) {
2057 unsigned Opc;
2058 switch (VT.SimpleTy) {
2059 default: return false;
2060 case MVT::i8: Opc = AArch64::STLRB; break;
2061 case MVT::i16: Opc = AArch64::STLRH; break;
2062 case MVT::i32: Opc = AArch64::STLRW; break;
2063 case MVT::i64: Opc = AArch64::STLRX; break;
2064 }
2065
2066 const MCInstrDesc &II = TII.get(Opc);
2067 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2070 .addReg(SrcReg)
2071 .addReg(AddrReg)
2072 .addMemOperand(MMO);
2073 return true;
2074}
2075
2076bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077 MachineMemOperand *MMO) {
2078 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079 return false;
2080
2081 // Simplify this down to something we can handle.
2082 if (!simplifyAddress(Addr, VT))
2083 return false;
2084
2085 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086 if (!ScaleFactor)
2087 llvm_unreachable("Unexpected value type.");
2088
2089 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091 bool UseScaled = true;
2092 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093 UseScaled = false;
2094 ScaleFactor = 1;
2095 }
2096
2097 static const unsigned OpcTable[4][6] = {
2098 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099 AArch64::STURSi, AArch64::STURDi },
2100 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101 AArch64::STRSui, AArch64::STRDui },
2102 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103 AArch64::STRSroX, AArch64::STRDroX },
2104 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105 AArch64::STRSroW, AArch64::STRDroW }
2106 };
2107
2108 unsigned Opc;
2109 bool VTIsi1 = false;
2110 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111 Addr.getOffsetReg();
2112 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114 Addr.getExtendType() == AArch64_AM::SXTW)
2115 Idx++;
2116
2117 switch (VT.SimpleTy) {
2118 default: llvm_unreachable("Unexpected value type.");
2119 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2120 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126 }
2127
2128 // Storing an i1 requires special handling.
2129 if (VTIsi1 && SrcReg != AArch64::WZR) {
2130 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2131 assert(ANDReg && "Unexpected AND instruction emission failure.");
2132 SrcReg = ANDReg;
2133 }
2134 // Create the base instruction, then add the operands.
2135 const MCInstrDesc &II = TII.get(Opc);
2136 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2139 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141 return true;
2142}
2143
2144bool AArch64FastISel::selectStore(const Instruction *I) {
2145 MVT VT;
2146 const Value *Op0 = I->getOperand(0);
2147 // Verify we have a legal type before going any further. Currently, we handle
2148 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151 return false;
2152
2153 const Value *PtrV = I->getOperand(1);
2154 if (TLI.supportSwiftError()) {
2155 // Swifterror values can come from either a function parameter with
2156 // swifterror attribute or an alloca with swifterror attribute.
2157 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158 if (Arg->hasSwiftErrorAttr())
2159 return false;
2160 }
2161
2162 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163 if (Alloca->isSwiftError())
2164 return false;
2165 }
2166 }
2167
2168 // Get the value to be stored into a register. Use the zero register directly
2169 // when possible to avoid an unnecessary copy and a wasted register.
2170 unsigned SrcReg = 0;
2171 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172 if (CI->isZero())
2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175 if (CF->isZero() && !CF->isNegative()) {
2177 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178 }
2179 }
2180
2181 if (!SrcReg)
2182 SrcReg = getRegForValue(Op0);
2183
2184 if (!SrcReg)
2185 return false;
2186
2187 auto *SI = cast<StoreInst>(I);
2188
2189 // Try to emit a STLR for seq_cst/release.
2190 if (SI->isAtomic()) {
2191 AtomicOrdering Ord = SI->getOrdering();
2192 // The non-atomic instructions are sufficient for relaxed stores.
2193 if (isReleaseOrStronger(Ord)) {
2194 // The STLR addressing mode only supports a base reg; pass that directly.
2195 Register AddrReg = getRegForValue(PtrV);
2196 return emitStoreRelease(VT, SrcReg, AddrReg,
2197 createMachineMemOperandFor(I));
2198 }
2199 }
2200
2201 // See if we can handle this address.
2202 Address Addr;
2203 if (!computeAddress(PtrV, Addr, Op0->getType()))
2204 return false;
2205
2206 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207 return false;
2208 return true;
2209}
2210
2212 switch (Pred) {
2213 case CmpInst::FCMP_ONE:
2214 case CmpInst::FCMP_UEQ:
2215 default:
2216 // AL is our "false" for now. The other two need more compares.
2217 return AArch64CC::AL;
2218 case CmpInst::ICMP_EQ:
2219 case CmpInst::FCMP_OEQ:
2220 return AArch64CC::EQ;
2221 case CmpInst::ICMP_SGT:
2222 case CmpInst::FCMP_OGT:
2223 return AArch64CC::GT;
2224 case CmpInst::ICMP_SGE:
2225 case CmpInst::FCMP_OGE:
2226 return AArch64CC::GE;
2227 case CmpInst::ICMP_UGT:
2228 case CmpInst::FCMP_UGT:
2229 return AArch64CC::HI;
2230 case CmpInst::FCMP_OLT:
2231 return AArch64CC::MI;
2232 case CmpInst::ICMP_ULE:
2233 case CmpInst::FCMP_OLE:
2234 return AArch64CC::LS;
2235 case CmpInst::FCMP_ORD:
2236 return AArch64CC::VC;
2237 case CmpInst::FCMP_UNO:
2238 return AArch64CC::VS;
2239 case CmpInst::FCMP_UGE:
2240 return AArch64CC::PL;
2241 case CmpInst::ICMP_SLT:
2242 case CmpInst::FCMP_ULT:
2243 return AArch64CC::LT;
2244 case CmpInst::ICMP_SLE:
2245 case CmpInst::FCMP_ULE:
2246 return AArch64CC::LE;
2247 case CmpInst::FCMP_UNE:
2248 case CmpInst::ICMP_NE:
2249 return AArch64CC::NE;
2250 case CmpInst::ICMP_UGE:
2251 return AArch64CC::HS;
2252 case CmpInst::ICMP_ULT:
2253 return AArch64CC::LO;
2254 }
2255}
2256
2257/// Try to emit a combined compare-and-branch instruction.
2258bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2260 // will not be produced, as they are conditional branch instructions that do
2261 // not set flags.
2262 if (FuncInfo.MF->getFunction().hasFnAttribute(
2263 Attribute::SpeculativeLoadHardening))
2264 return false;
2265
2266 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2267 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2268 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2269
2270 const Value *LHS = CI->getOperand(0);
2271 const Value *RHS = CI->getOperand(1);
2272
2273 MVT VT;
2274 if (!isTypeSupported(LHS->getType(), VT))
2275 return false;
2276
2277 unsigned BW = VT.getSizeInBits();
2278 if (BW > 64)
2279 return false;
2280
2281 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2282 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2283
2284 // Try to take advantage of fallthrough opportunities.
2285 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2286 std::swap(TBB, FBB);
2288 }
2289
2290 int TestBit = -1;
2291 bool IsCmpNE;
2292 switch (Predicate) {
2293 default:
2294 return false;
2295 case CmpInst::ICMP_EQ:
2296 case CmpInst::ICMP_NE:
2297 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2298 std::swap(LHS, RHS);
2299
2300 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2301 return false;
2302
2303 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2304 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2305 const Value *AndLHS = AI->getOperand(0);
2306 const Value *AndRHS = AI->getOperand(1);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2309 if (C->getValue().isPowerOf2())
2310 std::swap(AndLHS, AndRHS);
2311
2312 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2313 if (C->getValue().isPowerOf2()) {
2314 TestBit = C->getValue().logBase2();
2315 LHS = AndLHS;
2316 }
2317 }
2318
2319 if (VT == MVT::i1)
2320 TestBit = 0;
2321
2322 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2323 break;
2324 case CmpInst::ICMP_SLT:
2325 case CmpInst::ICMP_SGE:
2326 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2327 return false;
2328
2329 TestBit = BW - 1;
2330 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2331 break;
2332 case CmpInst::ICMP_SGT:
2333 case CmpInst::ICMP_SLE:
2334 if (!isa<ConstantInt>(RHS))
2335 return false;
2336
2337 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2338 return false;
2339
2340 TestBit = BW - 1;
2341 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2342 break;
2343 } // end switch
2344
2345 static const unsigned OpcTable[2][2][2] = {
2346 { {AArch64::CBZW, AArch64::CBZX },
2347 {AArch64::CBNZW, AArch64::CBNZX} },
2348 { {AArch64::TBZW, AArch64::TBZX },
2349 {AArch64::TBNZW, AArch64::TBNZX} }
2350 };
2351
2352 bool IsBitTest = TestBit != -1;
2353 bool Is64Bit = BW == 64;
2354 if (TestBit < 32 && TestBit >= 0)
2355 Is64Bit = false;
2356
2357 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2358 const MCInstrDesc &II = TII.get(Opc);
2359
2360 Register SrcReg = getRegForValue(LHS);
2361 if (!SrcReg)
2362 return false;
2363
2364 if (BW == 64 && !Is64Bit)
2365 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2366
2367 if ((BW < 32) && !IsBitTest)
2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369
2370 // Emit the combined compare and branch instruction.
2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2374 .addReg(SrcReg);
2375 if (IsBitTest)
2376 MIB.addImm(TestBit);
2377 MIB.addMBB(TBB);
2378
2379 finishCondBranch(BI->getParent(), TBB, FBB);
2380 return true;
2381}
2382
2383bool AArch64FastISel::selectBranch(const Instruction *I) {
2384 const BranchInst *BI = cast<BranchInst>(I);
2385 if (BI->isUnconditional()) {
2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387 fastEmitBranch(MSucc, BI->getDebugLoc());
2388 return true;
2389 }
2390
2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393
2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395 if (CI->hasOneUse() && isValueAvailable(CI)) {
2396 // Try to optimize or fold the cmp.
2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398 switch (Predicate) {
2399 default:
2400 break;
2402 fastEmitBranch(FBB, MIMD.getDL());
2403 return true;
2404 case CmpInst::FCMP_TRUE:
2405 fastEmitBranch(TBB, MIMD.getDL());
2406 return true;
2407 }
2408
2409 // Try to emit a combined compare-and-branch first.
2410 if (emitCompareAndBranch(BI))
2411 return true;
2412
2413 // Try to take advantage of fallthrough opportunities.
2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415 std::swap(TBB, FBB);
2417 }
2418
2419 // Emit the cmp.
2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421 return false;
2422
2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424 // instruction.
2425 AArch64CC::CondCode CC = getCompareCC(Predicate);
2427 switch (Predicate) {
2428 default:
2429 break;
2430 case CmpInst::FCMP_UEQ:
2431 ExtraCC = AArch64CC::EQ;
2432 CC = AArch64CC::VS;
2433 break;
2434 case CmpInst::FCMP_ONE:
2435 ExtraCC = AArch64CC::MI;
2436 CC = AArch64CC::GT;
2437 break;
2438 }
2439 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440
2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442 if (ExtraCC != AArch64CC::AL) {
2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2444 .addImm(ExtraCC)
2445 .addMBB(TBB);
2446 }
2447
2448 // Emit the branch.
2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450 .addImm(CC)
2451 .addMBB(TBB);
2452
2453 finishCondBranch(BI->getParent(), TBB, FBB);
2454 return true;
2455 }
2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457 uint64_t Imm = CI->getZExtValue();
2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2460 .addMBB(Target);
2461
2462 // Obtain the branch probability and add the target to the successor list.
2463 if (FuncInfo.BPI) {
2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465 BI->getParent(), Target->getBasicBlock());
2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467 } else
2468 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469 return true;
2470 } else {
2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473 // Fake request the condition, otherwise the intrinsic might be completely
2474 // optimized away.
2475 Register CondReg = getRegForValue(BI->getCondition());
2476 if (!CondReg)
2477 return false;
2478
2479 // Emit the branch.
2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2481 .addImm(CC)
2482 .addMBB(TBB);
2483
2484 finishCondBranch(BI->getParent(), TBB, FBB);
2485 return true;
2486 }
2487 }
2488
2489 Register CondReg = getRegForValue(BI->getCondition());
2490 if (CondReg == 0)
2491 return false;
2492
2493 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2494 unsigned Opcode = AArch64::TBNZW;
2495 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2496 std::swap(TBB, FBB);
2497 Opcode = AArch64::TBZW;
2498 }
2499
2500 const MCInstrDesc &II = TII.get(Opcode);
2501 Register ConstrainedCondReg
2502 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2503 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2504 .addReg(ConstrainedCondReg)
2505 .addImm(0)
2506 .addMBB(TBB);
2507
2508 finishCondBranch(BI->getParent(), TBB, FBB);
2509 return true;
2510}
2511
2512bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2513 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2514 Register AddrReg = getRegForValue(BI->getOperand(0));
2515 if (AddrReg == 0)
2516 return false;
2517
2518 // Emit the indirect branch.
2519 const MCInstrDesc &II = TII.get(AArch64::BR);
2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2522
2523 // Make sure the CFG is up-to-date.
2524 for (const auto *Succ : BI->successors())
2525 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2526
2527 return true;
2528}
2529
2530bool AArch64FastISel::selectCmp(const Instruction *I) {
2531 const CmpInst *CI = cast<CmpInst>(I);
2532
2533 // Vectors of i1 are weird: bail out.
2534 if (CI->getType()->isVectorTy())
2535 return false;
2536
2537 // Try to optimize or fold the cmp.
2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539 unsigned ResultReg = 0;
2540 switch (Predicate) {
2541 default:
2542 break;
2544 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546 TII.get(TargetOpcode::COPY), ResultReg)
2547 .addReg(AArch64::WZR, getKillRegState(true));
2548 break;
2549 case CmpInst::FCMP_TRUE:
2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551 break;
2552 }
2553
2554 if (ResultReg) {
2555 updateValueMap(I, ResultReg);
2556 return true;
2557 }
2558
2559 // Emit the cmp.
2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2561 return false;
2562
2563 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566 // condition codes are inverted, because they are used by CSINC.
2567 static unsigned CondCodeTable[2][2] = {
2570 };
2571 unsigned *CondCodes = nullptr;
2572 switch (Predicate) {
2573 default:
2574 break;
2575 case CmpInst::FCMP_UEQ:
2576 CondCodes = &CondCodeTable[0][0];
2577 break;
2578 case CmpInst::FCMP_ONE:
2579 CondCodes = &CondCodeTable[1][0];
2580 break;
2581 }
2582
2583 if (CondCodes) {
2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 TmpReg1)
2587 .addReg(AArch64::WZR, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[0]);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 ResultReg)
2592 .addReg(TmpReg1, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[1]);
2595
2596 updateValueMap(I, ResultReg);
2597 return true;
2598 }
2599
2600 // Now set a register based on the comparison.
2601 AArch64CC::CondCode CC = getCompareCC(Predicate);
2602 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605 ResultReg)
2606 .addReg(AArch64::WZR, getKillRegState(true))
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addImm(invertedCC);
2609
2610 updateValueMap(I, ResultReg);
2611 return true;
2612}
2613
2614/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615/// value.
2616bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617 if (!SI->getType()->isIntegerTy(1))
2618 return false;
2619
2620 const Value *Src1Val, *Src2Val;
2621 unsigned Opc = 0;
2622 bool NeedExtraOp = false;
2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2624 if (CI->isOne()) {
2625 Src1Val = SI->getCondition();
2626 Src2Val = SI->getFalseValue();
2627 Opc = AArch64::ORRWrr;
2628 } else {
2629 assert(CI->isZero());
2630 Src1Val = SI->getFalseValue();
2631 Src2Val = SI->getCondition();
2632 Opc = AArch64::BICWrr;
2633 }
2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2635 if (CI->isOne()) {
2636 Src1Val = SI->getCondition();
2637 Src2Val = SI->getTrueValue();
2638 Opc = AArch64::ORRWrr;
2639 NeedExtraOp = true;
2640 } else {
2641 assert(CI->isZero());
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ANDWrr;
2645 }
2646 }
2647
2648 if (!Opc)
2649 return false;
2650
2651 Register Src1Reg = getRegForValue(Src1Val);
2652 if (!Src1Reg)
2653 return false;
2654
2655 Register Src2Reg = getRegForValue(Src2Val);
2656 if (!Src2Reg)
2657 return false;
2658
2659 if (NeedExtraOp)
2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661
2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663 Src2Reg);
2664 updateValueMap(SI, ResultReg);
2665 return true;
2666}
2667
2668bool AArch64FastISel::selectSelect(const Instruction *I) {
2669 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670 MVT VT;
2671 if (!isTypeSupported(I->getType(), VT))
2672 return false;
2673
2674 unsigned Opc;
2675 const TargetRegisterClass *RC;
2676 switch (VT.SimpleTy) {
2677 default:
2678 return false;
2679 case MVT::i1:
2680 case MVT::i8:
2681 case MVT::i16:
2682 case MVT::i32:
2683 Opc = AArch64::CSELWr;
2684 RC = &AArch64::GPR32RegClass;
2685 break;
2686 case MVT::i64:
2687 Opc = AArch64::CSELXr;
2688 RC = &AArch64::GPR64RegClass;
2689 break;
2690 case MVT::f32:
2691 Opc = AArch64::FCSELSrrr;
2692 RC = &AArch64::FPR32RegClass;
2693 break;
2694 case MVT::f64:
2695 Opc = AArch64::FCSELDrrr;
2696 RC = &AArch64::FPR64RegClass;
2697 break;
2698 }
2699
2700 const SelectInst *SI = cast<SelectInst>(I);
2701 const Value *Cond = SI->getCondition();
2704
2705 if (optimizeSelect(SI))
2706 return true;
2707
2708 // Try to pickup the flags, so we don't have to emit another compare.
2709 if (foldXALUIntrinsic(CC, I, Cond)) {
2710 // Fake request the condition to force emission of the XALU intrinsic.
2711 Register CondReg = getRegForValue(Cond);
2712 if (!CondReg)
2713 return false;
2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2715 isValueAvailable(Cond)) {
2716 const auto *Cmp = cast<CmpInst>(Cond);
2717 // Try to optimize or fold the cmp.
2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2719 const Value *FoldSelect = nullptr;
2720 switch (Predicate) {
2721 default:
2722 break;
2724 FoldSelect = SI->getFalseValue();
2725 break;
2726 case CmpInst::FCMP_TRUE:
2727 FoldSelect = SI->getTrueValue();
2728 break;
2729 }
2730
2731 if (FoldSelect) {
2732 Register SrcReg = getRegForValue(FoldSelect);
2733 if (!SrcReg)
2734 return false;
2735
2736 updateValueMap(I, SrcReg);
2737 return true;
2738 }
2739
2740 // Emit the cmp.
2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742 return false;
2743
2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745 CC = getCompareCC(Predicate);
2746 switch (Predicate) {
2747 default:
2748 break;
2749 case CmpInst::FCMP_UEQ:
2750 ExtraCC = AArch64CC::EQ;
2751 CC = AArch64CC::VS;
2752 break;
2753 case CmpInst::FCMP_ONE:
2754 ExtraCC = AArch64CC::MI;
2755 CC = AArch64CC::GT;
2756 break;
2757 }
2758 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759 } else {
2760 Register CondReg = getRegForValue(Cond);
2761 if (!CondReg)
2762 return false;
2763
2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765 CondReg = constrainOperandRegClass(II, CondReg, 1);
2766
2767 // Emit a TST instruction (ANDS wzr, reg, #imm).
2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769 AArch64::WZR)
2770 .addReg(CondReg)
2772 }
2773
2774 Register Src1Reg = getRegForValue(SI->getTrueValue());
2775 Register Src2Reg = getRegForValue(SI->getFalseValue());
2776
2777 if (!Src1Reg || !Src2Reg)
2778 return false;
2779
2780 if (ExtraCC != AArch64CC::AL)
2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2782
2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2784 updateValueMap(I, ResultReg);
2785 return true;
2786}
2787
2788bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789 Value *V = I->getOperand(0);
2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791 return false;
2792
2793 Register Op = getRegForValue(V);
2794 if (Op == 0)
2795 return false;
2796
2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799 ResultReg).addReg(Op);
2800 updateValueMap(I, ResultReg);
2801 return true;
2802}
2803
2804bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805 Value *V = I->getOperand(0);
2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807 return false;
2808
2809 Register Op = getRegForValue(V);
2810 if (Op == 0)
2811 return false;
2812
2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815 ResultReg).addReg(Op);
2816 updateValueMap(I, ResultReg);
2817 return true;
2818}
2819
2820// FPToUI and FPToSI
2821bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822 MVT DestVT;
2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2824 return false;
2825
2826 Register SrcReg = getRegForValue(I->getOperand(0));
2827 if (SrcReg == 0)
2828 return false;
2829
2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2832 return false;
2833
2834 unsigned Opc;
2835 if (SrcVT == MVT::f64) {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840 } else {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845 }
2846 Register ResultReg = createResultReg(
2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2849 .addReg(SrcReg);
2850 updateValueMap(I, ResultReg);
2851 return true;
2852}
2853
2854bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855 MVT DestVT;
2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2857 return false;
2858 // Let regular ISEL handle FP16
2859 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2860 return false;
2861
2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863 "Unexpected value type.");
2864
2865 Register SrcReg = getRegForValue(I->getOperand(0));
2866 if (!SrcReg)
2867 return false;
2868
2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2870
2871 // Handle sign-extension.
2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873 SrcReg =
2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875 if (!SrcReg)
2876 return false;
2877 }
2878
2879 unsigned Opc;
2880 if (SrcVT == MVT::i64) {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885 } else {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890 }
2891
2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2893 updateValueMap(I, ResultReg);
2894 return true;
2895}
2896
2897bool AArch64FastISel::fastLowerArguments() {
2898 if (!FuncInfo.CanLowerReturn)
2899 return false;
2900
2901 const Function *F = FuncInfo.Fn;
2902 if (F->isVarArg())
2903 return false;
2904
2905 CallingConv::ID CC = F->getCallingConv();
2907 return false;
2908
2909 if (Subtarget->hasCustomCallingConv())
2910 return false;
2911
2912 // Only handle simple cases of up to 8 GPR and FPR each.
2913 unsigned GPRCnt = 0;
2914 unsigned FPRCnt = 0;
2915 for (auto const &Arg : F->args()) {
2916 if (Arg.hasAttribute(Attribute::ByVal) ||
2917 Arg.hasAttribute(Attribute::InReg) ||
2918 Arg.hasAttribute(Attribute::StructRet) ||
2919 Arg.hasAttribute(Attribute::SwiftSelf) ||
2920 Arg.hasAttribute(Attribute::SwiftAsync) ||
2921 Arg.hasAttribute(Attribute::SwiftError) ||
2922 Arg.hasAttribute(Attribute::Nest))
2923 return false;
2924
2925 Type *ArgTy = Arg.getType();
2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927 return false;
2928
2929 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2930 if (!ArgVT.isSimple())
2931 return false;
2932
2933 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935 return false;
2936
2937 if (VT.isVector() &&
2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939 return false;
2940
2941 if (VT >= MVT::i1 && VT <= MVT::i64)
2942 ++GPRCnt;
2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944 VT.is128BitVector())
2945 ++FPRCnt;
2946 else
2947 return false;
2948
2949 if (GPRCnt > 8 || FPRCnt > 8)
2950 return false;
2951 }
2952
2953 static const MCPhysReg Registers[6][8] = {
2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955 AArch64::W5, AArch64::W6, AArch64::W7 },
2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957 AArch64::X5, AArch64::X6, AArch64::X7 },
2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959 AArch64::H5, AArch64::H6, AArch64::H7 },
2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961 AArch64::S5, AArch64::S6, AArch64::S7 },
2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963 AArch64::D5, AArch64::D6, AArch64::D7 },
2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966 };
2967
2968 unsigned GPRIdx = 0;
2969 unsigned FPRIdx = 0;
2970 for (auto const &Arg : F->args()) {
2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2972 unsigned SrcReg;
2973 const TargetRegisterClass *RC;
2974 if (VT >= MVT::i1 && VT <= MVT::i32) {
2975 SrcReg = Registers[0][GPRIdx++];
2976 RC = &AArch64::GPR32RegClass;
2977 VT = MVT::i32;
2978 } else if (VT == MVT::i64) {
2979 SrcReg = Registers[1][GPRIdx++];
2980 RC = &AArch64::GPR64RegClass;
2981 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2982 SrcReg = Registers[2][FPRIdx++];
2983 RC = &AArch64::FPR16RegClass;
2984 } else if (VT == MVT::f32) {
2985 SrcReg = Registers[3][FPRIdx++];
2986 RC = &AArch64::FPR32RegClass;
2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988 SrcReg = Registers[4][FPRIdx++];
2989 RC = &AArch64::FPR64RegClass;
2990 } else if (VT.is128BitVector()) {
2991 SrcReg = Registers[5][FPRIdx++];
2992 RC = &AArch64::FPR128RegClass;
2993 } else
2994 llvm_unreachable("Unexpected value type.");
2995
2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998 // Without this, EmitLiveInCopies may eliminate the livein if its only
2999 // use is a bitcast (which isn't turned into an instruction).
3000 Register ResultReg = createResultReg(RC);
3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3002 TII.get(TargetOpcode::COPY), ResultReg)
3003 .addReg(DstReg, getKillRegState(true));
3004 updateValueMap(&Arg, ResultReg);
3005 }
3006 return true;
3007}
3008
3009bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010 SmallVectorImpl<MVT> &OutVTs,
3011 unsigned &NumBytes) {
3012 CallingConv::ID CC = CLI.CallConv;
3014 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3015 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3016
3017 // Get a count of how many bytes are to be pushed on the stack.
3018 NumBytes = CCInfo.getStackSize();
3019
3020 // Issue CALLSEQ_START
3021 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3023 .addImm(NumBytes).addImm(0);
3024
3025 // Process the args.
3026 for (CCValAssign &VA : ArgLocs) {
3027 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3028 MVT ArgVT = OutVTs[VA.getValNo()];
3029
3030 Register ArgReg = getRegForValue(ArgVal);
3031 if (!ArgReg)
3032 return false;
3033
3034 // Handle arg promotion: SExt, ZExt, AExt.
3035 switch (VA.getLocInfo()) {
3036 case CCValAssign::Full:
3037 break;
3038 case CCValAssign::SExt: {
3039 MVT DestVT = VA.getLocVT();
3040 MVT SrcVT = ArgVT;
3041 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3042 if (!ArgReg)
3043 return false;
3044 break;
3045 }
3046 case CCValAssign::AExt:
3047 // Intentional fall-through.
3048 case CCValAssign::ZExt: {
3049 MVT DestVT = VA.getLocVT();
3050 MVT SrcVT = ArgVT;
3051 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3052 if (!ArgReg)
3053 return false;
3054 break;
3055 }
3056 default:
3057 llvm_unreachable("Unknown arg promotion!");
3058 }
3059
3060 // Now copy/store arg to correct locations.
3061 if (VA.isRegLoc() && !VA.needsCustom()) {
3062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3063 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3064 CLI.OutRegs.push_back(VA.getLocReg());
3065 } else if (VA.needsCustom()) {
3066 // FIXME: Handle custom args.
3067 return false;
3068 } else {
3069 assert(VA.isMemLoc() && "Assuming store on stack.");
3070
3071 // Don't emit stores for undef values.
3072 if (isa<UndefValue>(ArgVal))
3073 continue;
3074
3075 // Need to store on the stack.
3076 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3077
3078 unsigned BEAlign = 0;
3079 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3080 BEAlign = 8 - ArgSize;
3081
3082 Address Addr;
3083 Addr.setKind(Address::RegBase);
3084 Addr.setReg(AArch64::SP);
3085 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3086
3087 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3088 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3089 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3090 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3091
3092 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3093 return false;
3094 }
3095 }
3096 return true;
3097}
3098
3099bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3100 CallingConv::ID CC = CLI.CallConv;
3101
3102 // Issue CALLSEQ_END
3103 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3105 .addImm(NumBytes).addImm(0);
3106
3107 // Now the return values.
3109 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3110 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3111
3112 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3113 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3114 CCValAssign &VA = RVLocs[i];
3115 MVT CopyVT = VA.getValVT();
3116 unsigned CopyReg = ResultReg + i;
3117
3118 // TODO: Handle big-endian results
3119 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3120 return false;
3121
3122 // Copy result out of their specified physreg.
3123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3124 CopyReg)
3125 .addReg(VA.getLocReg());
3126 CLI.InRegs.push_back(VA.getLocReg());
3127 }
3128
3129 CLI.ResultReg = ResultReg;
3130 CLI.NumResultRegs = RVLocs.size();
3131
3132 return true;
3133}
3134
3135bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3136 CallingConv::ID CC = CLI.CallConv;
3137 bool IsTailCall = CLI.IsTailCall;
3138 bool IsVarArg = CLI.IsVarArg;
3139 const Value *Callee = CLI.Callee;
3140 MCSymbol *Symbol = CLI.Symbol;
3141
3142 if (!Callee && !Symbol)
3143 return false;
3144
3145 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3146 // a bti instruction following the call.
3147 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3148 !Subtarget->noBTIAtReturnTwice() &&
3150 return false;
3151
3152 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3153 if (CLI.CB && CLI.CB->isIndirectCall() &&
3154 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle tail calls.
3158 if (IsTailCall)
3159 return false;
3160
3161 // FIXME: we could and should support this, but for now correctness at -O0 is
3162 // more important.
3163 if (Subtarget->isTargetILP32())
3164 return false;
3165
3166 CodeModel::Model CM = TM.getCodeModel();
3167 // Only support the small-addressing and large code models.
3168 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3169 return false;
3170
3171 // FIXME: Add large code model support for ELF.
3172 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3173 return false;
3174
3175 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3176 // attribute. Check "RtLibUseGOT" instead.
3177 if (MF->getFunction().getParent()->getRtLibUseGOT())
3178 return false;
3179
3180 // Let SDISel handle vararg functions.
3181 if (IsVarArg)
3182 return false;
3183
3184 if (Subtarget->isWindowsArm64EC())
3185 return false;
3186
3187 for (auto Flag : CLI.OutFlags)
3188 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3189 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3190 return false;
3191
3192 // Set up the argument vectors.
3193 SmallVector<MVT, 16> OutVTs;
3194 OutVTs.reserve(CLI.OutVals.size());
3195
3196 for (auto *Val : CLI.OutVals) {
3197 MVT VT;
3198 if (!isTypeLegal(Val->getType(), VT) &&
3199 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3200 return false;
3201
3202 // We don't handle vector parameters yet.
3203 if (VT.isVector() || VT.getSizeInBits() > 64)
3204 return false;
3205
3206 OutVTs.push_back(VT);
3207 }
3208
3209 Address Addr;
3210 if (Callee && !computeCallAddress(Callee, Addr))
3211 return false;
3212
3213 // The weak function target may be zero; in that case we must use indirect
3214 // addressing via a stub on windows as it may be out of range for a
3215 // PC-relative jump.
3216 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3217 Addr.getGlobalValue()->hasExternalWeakLinkage())
3218 return false;
3219
3220 // Handle the arguments now that we've gotten them.
3221 unsigned NumBytes;
3222 if (!processCallArgs(CLI, OutVTs, NumBytes))
3223 return false;
3224
3225 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3226 if (RegInfo->isAnyArgRegReserved(*MF))
3227 RegInfo->emitReservedArgRegCallError(*MF);
3228
3229 // Issue the call.
3231 if (Subtarget->useSmallAddressing()) {
3232 const MCInstrDesc &II =
3233 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3234 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3235 if (Symbol)
3236 MIB.addSym(Symbol, 0);
3237 else if (Addr.getGlobalValue())
3238 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3239 else if (Addr.getReg()) {
3241 MIB.addReg(Reg);
3242 } else
3243 return false;
3244 } else {
3245 unsigned CallReg = 0;
3246 if (Symbol) {
3247 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3249 ADRPReg)
3251
3252 CallReg = createResultReg(&AArch64::GPR64RegClass);
3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3254 TII.get(AArch64::LDRXui), CallReg)
3255 .addReg(ADRPReg)
3256 .addSym(Symbol,
3258 } else if (Addr.getGlobalValue())
3259 CallReg = materializeGV(Addr.getGlobalValue());
3260 else if (Addr.getReg())
3261 CallReg = Addr.getReg();
3262
3263 if (!CallReg)
3264 return false;
3265
3266 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3267 CallReg = constrainOperandRegClass(II, CallReg, 0);
3268 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3269 }
3270
3271 // Add implicit physical register uses to the call.
3272 for (auto Reg : CLI.OutRegs)
3273 MIB.addReg(Reg, RegState::Implicit);
3274
3275 // Add a register mask with the call-preserved registers.
3276 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3277 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3278
3279 CLI.Call = MIB;
3280
3281 // Finish off the call including any return values.
3282 return finishCall(CLI, NumBytes);
3283}
3284
3285bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3286 if (Alignment)
3287 return Len / Alignment->value() <= 4;
3288 else
3289 return Len < 32;
3290}
3291
3292bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3293 uint64_t Len, MaybeAlign Alignment) {
3294 // Make sure we don't bloat code by inlining very large memcpy's.
3295 if (!isMemCpySmall(Len, Alignment))
3296 return false;
3297
3298 int64_t UnscaledOffset = 0;
3299 Address OrigDest = Dest;
3300 Address OrigSrc = Src;
3301
3302 while (Len) {
3303 MVT VT;
3304 if (!Alignment || *Alignment >= 8) {
3305 if (Len >= 8)
3306 VT = MVT::i64;
3307 else if (Len >= 4)
3308 VT = MVT::i32;
3309 else if (Len >= 2)
3310 VT = MVT::i16;
3311 else {
3312 VT = MVT::i8;
3313 }
3314 } else {
3315 assert(Alignment && "Alignment is set in this branch");
3316 // Bound based on alignment.
3317 if (Len >= 4 && *Alignment == 4)
3318 VT = MVT::i32;
3319 else if (Len >= 2 && *Alignment == 2)
3320 VT = MVT::i16;
3321 else {
3322 VT = MVT::i8;
3323 }
3324 }
3325
3326 unsigned ResultReg = emitLoad(VT, VT, Src);
3327 if (!ResultReg)
3328 return false;
3329
3330 if (!emitStore(VT, ResultReg, Dest))
3331 return false;
3332
3333 int64_t Size = VT.getSizeInBits() / 8;
3334 Len -= Size;
3335 UnscaledOffset += Size;
3336
3337 // We need to recompute the unscaled offset for each iteration.
3338 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3339 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3340 }
3341
3342 return true;
3343}
3344
3345/// Check if it is possible to fold the condition from the XALU intrinsic
3346/// into the user. The condition code will only be updated on success.
3347bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3348 const Instruction *I,
3349 const Value *Cond) {
3350 if (!isa<ExtractValueInst>(Cond))
3351 return false;
3352
3353 const auto *EV = cast<ExtractValueInst>(Cond);
3354 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3355 return false;
3356
3357 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3358 MVT RetVT;
3359 const Function *Callee = II->getCalledFunction();
3360 Type *RetTy =
3361 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3362 if (!isTypeLegal(RetTy, RetVT))
3363 return false;
3364
3365 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3366 return false;
3367
3368 const Value *LHS = II->getArgOperand(0);
3369 const Value *RHS = II->getArgOperand(1);
3370
3371 // Canonicalize immediate to the RHS.
3372 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3373 std::swap(LHS, RHS);
3374
3375 // Simplify multiplies.
3376 Intrinsic::ID IID = II->getIntrinsicID();
3377 switch (IID) {
3378 default:
3379 break;
3380 case Intrinsic::smul_with_overflow:
3381 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382 if (C->getValue() == 2)
3383 IID = Intrinsic::sadd_with_overflow;
3384 break;
3385 case Intrinsic::umul_with_overflow:
3386 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387 if (C->getValue() == 2)
3388 IID = Intrinsic::uadd_with_overflow;
3389 break;
3390 }
3391
3392 AArch64CC::CondCode TmpCC;
3393 switch (IID) {
3394 default:
3395 return false;
3396 case Intrinsic::sadd_with_overflow:
3397 case Intrinsic::ssub_with_overflow:
3398 TmpCC = AArch64CC::VS;
3399 break;
3400 case Intrinsic::uadd_with_overflow:
3401 TmpCC = AArch64CC::HS;
3402 break;
3403 case Intrinsic::usub_with_overflow:
3404 TmpCC = AArch64CC::LO;
3405 break;
3406 case Intrinsic::smul_with_overflow:
3407 case Intrinsic::umul_with_overflow:
3408 TmpCC = AArch64CC::NE;
3409 break;
3410 }
3411
3412 // Check if both instructions are in the same basic block.
3413 if (!isValueAvailable(II))
3414 return false;
3415
3416 // Make sure nothing is in the way
3419 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3420 // We only expect extractvalue instructions between the intrinsic and the
3421 // instruction to be selected.
3422 if (!isa<ExtractValueInst>(Itr))
3423 return false;
3424
3425 // Check that the extractvalue operand comes from the intrinsic.
3426 const auto *EVI = cast<ExtractValueInst>(Itr);
3427 if (EVI->getAggregateOperand() != II)
3428 return false;
3429 }
3430
3431 CC = TmpCC;
3432 return true;
3433}
3434
3435bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3436 // FIXME: Handle more intrinsics.
3437 switch (II->getIntrinsicID()) {
3438 default: return false;
3439 case Intrinsic::frameaddress: {
3440 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3441 MFI.setFrameAddressIsTaken(true);
3442
3443 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3444 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3445 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3447 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3448 // Recursively load frame address
3449 // ldr x0, [fp]
3450 // ldr x0, [x0]
3451 // ldr x0, [x0]
3452 // ...
3453 unsigned DestReg;
3454 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3455 while (Depth--) {
3456 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3457 SrcReg, 0);
3458 assert(DestReg && "Unexpected LDR instruction emission failure.");
3459 SrcReg = DestReg;
3460 }
3461
3462 updateValueMap(II, SrcReg);
3463 return true;
3464 }
3465 case Intrinsic::sponentry: {
3466 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3467
3468 // SP = FP + Fixed Object + 16
3469 int FI = MFI.CreateFixedObject(4, 0, false);
3470 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3472 TII.get(AArch64::ADDXri), ResultReg)
3473 .addFrameIndex(FI)
3474 .addImm(0)
3475 .addImm(0);
3476
3477 updateValueMap(II, ResultReg);
3478 return true;
3479 }
3480 case Intrinsic::memcpy:
3481 case Intrinsic::memmove: {
3482 const auto *MTI = cast<MemTransferInst>(II);
3483 // Don't handle volatile.
3484 if (MTI->isVolatile())
3485 return false;
3486
3487 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3488 // we would emit dead code because we don't currently handle memmoves.
3489 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3490 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3491 // Small memcpy's are common enough that we want to do them without a call
3492 // if possible.
3493 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3494 MaybeAlign Alignment;
3495 if (MTI->getDestAlign() || MTI->getSourceAlign())
3496 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3497 MTI->getSourceAlign().valueOrOne());
3498 if (isMemCpySmall(Len, Alignment)) {
3499 Address Dest, Src;
3500 if (!computeAddress(MTI->getRawDest(), Dest) ||
3501 !computeAddress(MTI->getRawSource(), Src))
3502 return false;
3503 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3504 return true;
3505 }
3506 }
3507
3508 if (!MTI->getLength()->getType()->isIntegerTy(64))
3509 return false;
3510
3511 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3512 // Fast instruction selection doesn't support the special
3513 // address spaces.
3514 return false;
3515
3516 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3517 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3518 }
3519 case Intrinsic::memset: {
3520 const MemSetInst *MSI = cast<MemSetInst>(II);
3521 // Don't handle volatile.
3522 if (MSI->isVolatile())
3523 return false;
3524
3525 if (!MSI->getLength()->getType()->isIntegerTy(64))
3526 return false;
3527
3528 if (MSI->getDestAddressSpace() > 255)
3529 // Fast instruction selection doesn't support the special
3530 // address spaces.
3531 return false;
3532
3533 return lowerCallTo(II, "memset", II->arg_size() - 1);
3534 }
3535 case Intrinsic::sin:
3536 case Intrinsic::cos:
3537 case Intrinsic::tan:
3538 case Intrinsic::pow: {
3539 MVT RetVT;
3540 if (!isTypeLegal(II->getType(), RetVT))
3541 return false;
3542
3543 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3544 return false;
3545
3546 static const RTLIB::Libcall LibCallTable[4][2] = {
3547 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3548 {RTLIB::COS_F32, RTLIB::COS_F64},
3549 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3550 {RTLIB::POW_F32, RTLIB::POW_F64}};
3551 RTLIB::Libcall LC;
3552 bool Is64Bit = RetVT == MVT::f64;
3553 switch (II->getIntrinsicID()) {
3554 default:
3555 llvm_unreachable("Unexpected intrinsic.");
3556 case Intrinsic::sin:
3557 LC = LibCallTable[0][Is64Bit];
3558 break;
3559 case Intrinsic::cos:
3560 LC = LibCallTable[1][Is64Bit];
3561 break;
3562 case Intrinsic::tan:
3563 LC = LibCallTable[2][Is64Bit];
3564 break;
3565 case Intrinsic::pow:
3566 LC = LibCallTable[3][Is64Bit];
3567 break;
3568 }
3569
3570 ArgListTy Args;
3571 Args.reserve(II->arg_size());
3572
3573 // Populate the argument list.
3574 for (auto &Arg : II->args()) {
3575 ArgListEntry Entry;
3576 Entry.Val = Arg;
3577 Entry.Ty = Arg->getType();
3578 Args.push_back(Entry);
3579 }
3580
3581 CallLoweringInfo CLI;
3582 MCContext &Ctx = MF->getContext();
3583 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3584 TLI.getLibcallName(LC), std::move(Args));
3585 if (!lowerCallTo(CLI))
3586 return false;
3587 updateValueMap(II, CLI.ResultReg);
3588 return true;
3589 }
3590 case Intrinsic::fabs: {
3591 MVT VT;
3592 if (!isTypeLegal(II->getType(), VT))
3593 return false;
3594
3595 unsigned Opc;
3596 switch (VT.SimpleTy) {
3597 default:
3598 return false;
3599 case MVT::f32:
3600 Opc = AArch64::FABSSr;
3601 break;
3602 case MVT::f64:
3603 Opc = AArch64::FABSDr;
3604 break;
3605 }
3606 Register SrcReg = getRegForValue(II->getOperand(0));
3607 if (!SrcReg)
3608 return false;
3609 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3611 .addReg(SrcReg);
3612 updateValueMap(II, ResultReg);
3613 return true;
3614 }
3615 case Intrinsic::trap:
3616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3617 .addImm(1);
3618 return true;
3619 case Intrinsic::debugtrap:
3620 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3621 .addImm(0xF000);
3622 return true;
3623
3624 case Intrinsic::sqrt: {
3625 Type *RetTy = II->getCalledFunction()->getReturnType();
3626
3627 MVT VT;
3628 if (!isTypeLegal(RetTy, VT))
3629 return false;
3630
3631 Register Op0Reg = getRegForValue(II->getOperand(0));
3632 if (!Op0Reg)
3633 return false;
3634
3635 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3636 if (!ResultReg)
3637 return false;
3638
3639 updateValueMap(II, ResultReg);
3640 return true;
3641 }
3642 case Intrinsic::sadd_with_overflow:
3643 case Intrinsic::uadd_with_overflow:
3644 case Intrinsic::ssub_with_overflow:
3645 case Intrinsic::usub_with_overflow:
3646 case Intrinsic::smul_with_overflow:
3647 case Intrinsic::umul_with_overflow: {
3648 // This implements the basic lowering of the xalu with overflow intrinsics.
3649 const Function *Callee = II->getCalledFunction();
3650 auto *Ty = cast<StructType>(Callee->getReturnType());
3651 Type *RetTy = Ty->getTypeAtIndex(0U);
3652
3653 MVT VT;
3654 if (!isTypeLegal(RetTy, VT))
3655 return false;
3656
3657 if (VT != MVT::i32 && VT != MVT::i64)
3658 return false;
3659
3660 const Value *LHS = II->getArgOperand(0);
3661 const Value *RHS = II->getArgOperand(1);
3662 // Canonicalize immediate to the RHS.
3663 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3664 std::swap(LHS, RHS);
3665
3666 // Simplify multiplies.
3667 Intrinsic::ID IID = II->getIntrinsicID();
3668 switch (IID) {
3669 default:
3670 break;
3671 case Intrinsic::smul_with_overflow:
3672 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3673 if (C->getValue() == 2) {
3674 IID = Intrinsic::sadd_with_overflow;
3675 RHS = LHS;
3676 }
3677 break;
3678 case Intrinsic::umul_with_overflow:
3679 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3680 if (C->getValue() == 2) {
3681 IID = Intrinsic::uadd_with_overflow;
3682 RHS = LHS;
3683 }
3684 break;
3685 }
3686
3687 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3689 switch (IID) {
3690 default: llvm_unreachable("Unexpected intrinsic!");
3691 case Intrinsic::sadd_with_overflow:
3692 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3693 CC = AArch64CC::VS;
3694 break;
3695 case Intrinsic::uadd_with_overflow:
3696 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3697 CC = AArch64CC::HS;
3698 break;
3699 case Intrinsic::ssub_with_overflow:
3700 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::VS;
3702 break;
3703 case Intrinsic::usub_with_overflow:
3704 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3705 CC = AArch64CC::LO;
3706 break;
3707 case Intrinsic::smul_with_overflow: {
3708 CC = AArch64CC::NE;
3709 Register LHSReg = getRegForValue(LHS);
3710 if (!LHSReg)
3711 return false;
3712
3713 Register RHSReg = getRegForValue(RHS);
3714 if (!RHSReg)
3715 return false;
3716
3717 if (VT == MVT::i32) {
3718 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3719 Register MulSubReg =
3720 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3721 // cmp xreg, wreg, sxtw
3722 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3723 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3724 /*WantResult=*/false);
3725 MulReg = MulSubReg;
3726 } else {
3727 assert(VT == MVT::i64 && "Unexpected value type.");
3728 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3729 // reused in the next instruction.
3730 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3731 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3732 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3733 /*WantResult=*/false);
3734 }
3735 break;
3736 }
3737 case Intrinsic::umul_with_overflow: {
3738 CC = AArch64CC::NE;
3739 Register LHSReg = getRegForValue(LHS);
3740 if (!LHSReg)
3741 return false;
3742
3743 Register RHSReg = getRegForValue(RHS);
3744 if (!RHSReg)
3745 return false;
3746
3747 if (VT == MVT::i32) {
3748 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3749 // tst xreg, #0xffffffff00000000
3750 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3751 TII.get(AArch64::ANDSXri), AArch64::XZR)
3752 .addReg(MulReg)
3753 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3754 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3755 } else {
3756 assert(VT == MVT::i64 && "Unexpected value type.");
3757 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758 // reused in the next instruction.
3759 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3760 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3761 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3762 }
3763 break;
3764 }
3765 }
3766
3767 if (MulReg) {
3768 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3769 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3770 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3771 }
3772
3773 if (!ResultReg1)
3774 return false;
3775
3776 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3777 AArch64::WZR, AArch64::WZR,
3778 getInvertedCondCode(CC));
3779 (void)ResultReg2;
3780 assert((ResultReg1 + 1) == ResultReg2 &&
3781 "Nonconsecutive result registers.");
3782 updateValueMap(II, ResultReg1, 2);
3783 return true;
3784 }
3785 case Intrinsic::aarch64_crc32b:
3786 case Intrinsic::aarch64_crc32h:
3787 case Intrinsic::aarch64_crc32w:
3788 case Intrinsic::aarch64_crc32x:
3789 case Intrinsic::aarch64_crc32cb:
3790 case Intrinsic::aarch64_crc32ch:
3791 case Intrinsic::aarch64_crc32cw:
3792 case Intrinsic::aarch64_crc32cx: {
3793 if (!Subtarget->hasCRC())
3794 return false;
3795
3796 unsigned Opc;
3797 switch (II->getIntrinsicID()) {
3798 default:
3799 llvm_unreachable("Unexpected intrinsic!");
3800 case Intrinsic::aarch64_crc32b:
3801 Opc = AArch64::CRC32Brr;
3802 break;
3803 case Intrinsic::aarch64_crc32h:
3804 Opc = AArch64::CRC32Hrr;
3805 break;
3806 case Intrinsic::aarch64_crc32w:
3807 Opc = AArch64::CRC32Wrr;
3808 break;
3809 case Intrinsic::aarch64_crc32x:
3810 Opc = AArch64::CRC32Xrr;
3811 break;
3812 case Intrinsic::aarch64_crc32cb:
3813 Opc = AArch64::CRC32CBrr;
3814 break;
3815 case Intrinsic::aarch64_crc32ch:
3816 Opc = AArch64::CRC32CHrr;
3817 break;
3818 case Intrinsic::aarch64_crc32cw:
3819 Opc = AArch64::CRC32CWrr;
3820 break;
3821 case Intrinsic::aarch64_crc32cx:
3822 Opc = AArch64::CRC32CXrr;
3823 break;
3824 }
3825
3826 Register LHSReg = getRegForValue(II->getArgOperand(0));
3827 Register RHSReg = getRegForValue(II->getArgOperand(1));
3828 if (!LHSReg || !RHSReg)
3829 return false;
3830
3831 Register ResultReg =
3832 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3833 updateValueMap(II, ResultReg);
3834 return true;
3835 }
3836 }
3837 return false;
3838}
3839
3840bool AArch64FastISel::selectRet(const Instruction *I) {
3841 const ReturnInst *Ret = cast<ReturnInst>(I);
3842 const Function &F = *I->getParent()->getParent();
3843
3844 if (!FuncInfo.CanLowerReturn)
3845 return false;
3846
3847 if (F.isVarArg())
3848 return false;
3849
3850 if (TLI.supportSwiftError() &&
3851 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3852 return false;
3853
3854 if (TLI.supportSplitCSR(FuncInfo.MF))
3855 return false;
3856
3857 // Build a list of return value registers.
3859
3860 if (Ret->getNumOperands() > 0) {
3861 CallingConv::ID CC = F.getCallingConv();
3863 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3864
3865 // Analyze operands of the call, assigning locations to each operand.
3867 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3868 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3869
3870 // Only handle a single return value for now.
3871 if (ValLocs.size() != 1)
3872 return false;
3873
3874 CCValAssign &VA = ValLocs[0];
3875 const Value *RV = Ret->getOperand(0);
3876
3877 // Don't bother handling odd stuff for now.
3878 if ((VA.getLocInfo() != CCValAssign::Full) &&
3879 (VA.getLocInfo() != CCValAssign::BCvt))
3880 return false;
3881
3882 // Only handle register returns for now.
3883 if (!VA.isRegLoc())
3884 return false;
3885
3886 Register Reg = getRegForValue(RV);
3887 if (Reg == 0)
3888 return false;
3889
3890 unsigned SrcReg = Reg + VA.getValNo();
3891 Register DestReg = VA.getLocReg();
3892 // Avoid a cross-class copy. This is very unlikely.
3893 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3894 return false;
3895
3896 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3897 if (!RVEVT.isSimple())
3898 return false;
3899
3900 // Vectors (of > 1 lane) in big endian need tricky handling.
3901 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3902 !Subtarget->isLittleEndian())
3903 return false;
3904
3905 MVT RVVT = RVEVT.getSimpleVT();
3906 if (RVVT == MVT::f128)
3907 return false;
3908
3909 MVT DestVT = VA.getValVT();
3910 // Special handling for extended integers.
3911 if (RVVT != DestVT) {
3912 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3913 return false;
3914
3915 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3916 return false;
3917
3918 bool IsZExt = Outs[0].Flags.isZExt();
3919 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3920 if (SrcReg == 0)
3921 return false;
3922 }
3923
3924 // "Callee" (i.e. value producer) zero extends pointers at function
3925 // boundary.
3926 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3927 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3928
3929 // Make the copy.
3930 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3931 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3932
3933 // Add register to return instruction.
3934 RetRegs.push_back(VA.getLocReg());
3935 }
3936
3937 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3938 TII.get(AArch64::RET_ReallyLR));
3939 for (unsigned RetReg : RetRegs)
3940 MIB.addReg(RetReg, RegState::Implicit);
3941 return true;
3942}
3943
3944bool AArch64FastISel::selectTrunc(const Instruction *I) {
3945 Type *DestTy = I->getType();
3946 Value *Op = I->getOperand(0);
3947 Type *SrcTy = Op->getType();
3948
3949 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3950 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3951 if (!SrcEVT.isSimple())
3952 return false;
3953 if (!DestEVT.isSimple())
3954 return false;
3955
3956 MVT SrcVT = SrcEVT.getSimpleVT();
3957 MVT DestVT = DestEVT.getSimpleVT();
3958
3959 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3960 SrcVT != MVT::i8)
3961 return false;
3962 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3963 DestVT != MVT::i1)
3964 return false;
3965
3966 Register SrcReg = getRegForValue(Op);
3967 if (!SrcReg)
3968 return false;
3969
3970 // If we're truncating from i64 to a smaller non-legal type then generate an
3971 // AND. Otherwise, we know the high bits are undefined and a truncate only
3972 // generate a COPY. We cannot mark the source register also as result
3973 // register, because this can incorrectly transfer the kill flag onto the
3974 // source register.
3975 unsigned ResultReg;
3976 if (SrcVT == MVT::i64) {
3977 uint64_t Mask = 0;
3978 switch (DestVT.SimpleTy) {
3979 default:
3980 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3981 return false;
3982 case MVT::i1:
3983 Mask = 0x1;
3984 break;
3985 case MVT::i8:
3986 Mask = 0xff;
3987 break;
3988 case MVT::i16:
3989 Mask = 0xffff;
3990 break;
3991 }
3992 // Issue an extract_subreg to get the lower 32-bits.
3993 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3994 AArch64::sub_32);
3995 // Create the AND instruction which performs the actual truncation.
3996 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3997 assert(ResultReg && "Unexpected AND instruction emission failure.");
3998 } else {
3999 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4000 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4001 TII.get(TargetOpcode::COPY), ResultReg)
4002 .addReg(SrcReg);
4003 }
4004
4005 updateValueMap(I, ResultReg);
4006 return true;
4007}
4008
4009unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4010 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4011 DestVT == MVT::i64) &&
4012 "Unexpected value type.");
4013 // Handle i8 and i16 as i32.
4014 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4015 DestVT = MVT::i32;
4016
4017 if (IsZExt) {
4018 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4019 assert(ResultReg && "Unexpected AND instruction emission failure.");
4020 if (DestVT == MVT::i64) {
4021 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4022 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4023 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4024 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4025 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4026 .addImm(0)
4027 .addReg(ResultReg)
4028 .addImm(AArch64::sub_32);
4029 ResultReg = Reg64;
4030 }
4031 return ResultReg;
4032 } else {
4033 if (DestVT == MVT::i64) {
4034 // FIXME: We're SExt i1 to i64.
4035 return 0;
4036 }
4037 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4038 0, 0);
4039 }
4040}
4041
4042unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4043 unsigned Opc, ZReg;
4044 switch (RetVT.SimpleTy) {
4045 default: return 0;
4046 case MVT::i8:
4047 case MVT::i16:
4048 case MVT::i32:
4049 RetVT = MVT::i32;
4050 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4051 case MVT::i64:
4052 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4053 }
4054
4055 const TargetRegisterClass *RC =
4056 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4057 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4058}
4059
4060unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4061 if (RetVT != MVT::i64)
4062 return 0;
4063
4064 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4065 Op0, Op1, AArch64::XZR);
4066}
4067
4068unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4069 if (RetVT != MVT::i64)
4070 return 0;
4071
4072 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4073 Op0, Op1, AArch64::XZR);
4074}
4075
4076unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4077 unsigned Op1Reg) {
4078 unsigned Opc = 0;
4079 bool NeedTrunc = false;
4080 uint64_t Mask = 0;
4081 switch (RetVT.SimpleTy) {
4082 default: return 0;
4083 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4084 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4085 case MVT::i32: Opc = AArch64::LSLVWr; break;
4086 case MVT::i64: Opc = AArch64::LSLVXr; break;
4087 }
4088
4089 const TargetRegisterClass *RC =
4090 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4091 if (NeedTrunc)
4092 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4093
4094 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4095 if (NeedTrunc)
4096 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4097 return ResultReg;
4098}
4099
4100unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4101 uint64_t Shift, bool IsZExt) {
4102 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4103 "Unexpected source/return type pair.");
4104 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4105 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4106 "Unexpected source value type.");
4107 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4108 RetVT == MVT::i64) && "Unexpected return value type.");
4109
4110 bool Is64Bit = (RetVT == MVT::i64);
4111 unsigned RegSize = Is64Bit ? 64 : 32;
4112 unsigned DstBits = RetVT.getSizeInBits();
4113 unsigned SrcBits = SrcVT.getSizeInBits();
4114 const TargetRegisterClass *RC =
4115 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4116
4117 // Just emit a copy for "zero" shifts.
4118 if (Shift == 0) {
4119 if (RetVT == SrcVT) {
4120 Register ResultReg = createResultReg(RC);
4121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4122 TII.get(TargetOpcode::COPY), ResultReg)
4123 .addReg(Op0);
4124 return ResultReg;
4125 } else
4126 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4127 }
4128
4129 // Don't deal with undefined shifts.
4130 if (Shift >= DstBits)
4131 return 0;
4132
4133 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4134 // {S|U}BFM Wd, Wn, #r, #s
4135 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4136
4137 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4138 // %2 = shl i16 %1, 4
4139 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4140 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4141 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4142 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4143
4144 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4145 // %2 = shl i16 %1, 8
4146 // Wd<32+7-24,32-24> = Wn<7:0>
4147 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4148 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4149 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4150
4151 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4152 // %2 = shl i16 %1, 12
4153 // Wd<32+3-20,32-20> = Wn<3:0>
4154 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4155 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4156 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4157
4158 unsigned ImmR = RegSize - Shift;
4159 // Limit the width to the length of the source type.
4160 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4161 static const unsigned OpcTable[2][2] = {
4162 {AArch64::SBFMWri, AArch64::SBFMXri},
4163 {AArch64::UBFMWri, AArch64::UBFMXri}
4164 };
4165 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4166 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4167 Register TmpReg = MRI.createVirtualRegister(RC);
4168 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4169 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4170 .addImm(0)
4171 .addReg(Op0)
4172 .addImm(AArch64::sub_32);
4173 Op0 = TmpReg;
4174 }
4175 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4176}
4177
4178unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4179 unsigned Op1Reg) {
4180 unsigned Opc = 0;
4181 bool NeedTrunc = false;
4182 uint64_t Mask = 0;
4183 switch (RetVT.SimpleTy) {
4184 default: return 0;
4185 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4186 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4187 case MVT::i32: Opc = AArch64::LSRVWr; break;
4188 case MVT::i64: Opc = AArch64::LSRVXr; break;
4189 }
4190
4191 const TargetRegisterClass *RC =
4192 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4193 if (NeedTrunc) {
4194 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4195 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4196 }
4197 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4198 if (NeedTrunc)
4199 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4200 return ResultReg;
4201}
4202
4203unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4204 uint64_t Shift, bool IsZExt) {
4205 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4206 "Unexpected source/return type pair.");
4207 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4208 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4209 "Unexpected source value type.");
4210 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4211 RetVT == MVT::i64) && "Unexpected return value type.");
4212
4213 bool Is64Bit = (RetVT == MVT::i64);
4214 unsigned RegSize = Is64Bit ? 64 : 32;
4215 unsigned DstBits = RetVT.getSizeInBits();
4216 unsigned SrcBits = SrcVT.getSizeInBits();
4217 const TargetRegisterClass *RC =
4218 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4219
4220 // Just emit a copy for "zero" shifts.
4221 if (Shift == 0) {
4222 if (RetVT == SrcVT) {
4223 Register ResultReg = createResultReg(RC);
4224 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4225 TII.get(TargetOpcode::COPY), ResultReg)
4226 .addReg(Op0);
4227 return ResultReg;
4228 } else
4229 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4230 }
4231
4232 // Don't deal with undefined shifts.
4233 if (Shift >= DstBits)
4234 return 0;
4235
4236 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4237 // {S|U}BFM Wd, Wn, #r, #s
4238 // Wd<s-r:0> = Wn<s:r> when r <= s
4239
4240 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4241 // %2 = lshr i16 %1, 4
4242 // Wd<7-4:0> = Wn<7:4>
4243 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4244 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4245 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4246
4247 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4248 // %2 = lshr i16 %1, 8
4249 // Wd<7-7,0> = Wn<7:7>
4250 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4251 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4252 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4253
4254 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4255 // %2 = lshr i16 %1, 12
4256 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4257 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4258 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4260
4261 if (Shift >= SrcBits && IsZExt)
4262 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4263
4264 // It is not possible to fold a sign-extend into the LShr instruction. In this
4265 // case emit a sign-extend.
4266 if (!IsZExt) {
4267 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4268 if (!Op0)
4269 return 0;
4270 SrcVT = RetVT;
4271 SrcBits = SrcVT.getSizeInBits();
4272 IsZExt = true;
4273 }
4274
4275 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4276 unsigned ImmS = SrcBits - 1;
4277 static const unsigned OpcTable[2][2] = {
4278 {AArch64::SBFMWri, AArch64::SBFMXri},
4279 {AArch64::UBFMWri, AArch64::UBFMXri}
4280 };
4281 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4282 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4283 Register TmpReg = MRI.createVirtualRegister(RC);
4284 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4285 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4286 .addImm(0)
4287 .addReg(Op0)
4288 .addImm(AArch64::sub_32);
4289 Op0 = TmpReg;
4290 }
4291 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4292}
4293
4294unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4295 unsigned Op1Reg) {
4296 unsigned Opc = 0;
4297 bool NeedTrunc = false;
4298 uint64_t Mask = 0;
4299 switch (RetVT.SimpleTy) {
4300 default: return 0;
4301 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4302 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4303 case MVT::i32: Opc = AArch64::ASRVWr; break;
4304 case MVT::i64: Opc = AArch64::ASRVXr; break;
4305 }
4306
4307 const TargetRegisterClass *RC =
4308 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4309 if (NeedTrunc) {
4310 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4311 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4312 }
4313 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4314 if (NeedTrunc)
4315 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4316 return ResultReg;
4317}
4318
4319unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4320 uint64_t Shift, bool IsZExt) {
4321 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4322 "Unexpected source/return type pair.");
4323 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4324 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4325 "Unexpected source value type.");
4326 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4327 RetVT == MVT::i64) && "Unexpected return value type.");
4328
4329 bool Is64Bit = (RetVT == MVT::i64);
4330 unsigned RegSize = Is64Bit ? 64 : 32;
4331 unsigned DstBits = RetVT.getSizeInBits();
4332 unsigned SrcBits = SrcVT.getSizeInBits();
4333 const TargetRegisterClass *RC =
4334 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4335
4336 // Just emit a copy for "zero" shifts.
4337 if (Shift == 0) {
4338 if (RetVT == SrcVT) {
4339 Register ResultReg = createResultReg(RC);
4340 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4341 TII.get(TargetOpcode::COPY), ResultReg)
4342 .addReg(Op0);
4343 return ResultReg;
4344 } else
4345 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4346 }
4347
4348 // Don't deal with undefined shifts.
4349 if (Shift >= DstBits)
4350 return 0;
4351
4352 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4353 // {S|U}BFM Wd, Wn, #r, #s
4354 // Wd<s-r:0> = Wn<s:r> when r <= s
4355
4356 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4357 // %2 = ashr i16 %1, 4
4358 // Wd<7-4:0> = Wn<7:4>
4359 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4360 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4361 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4362
4363 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4364 // %2 = ashr i16 %1, 8
4365 // Wd<7-7,0> = Wn<7:7>
4366 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4367 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4368 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4369
4370 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4371 // %2 = ashr i16 %1, 12
4372 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4373 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4374 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4376
4377 if (Shift >= SrcBits && IsZExt)
4378 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4379
4380 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4381 unsigned ImmS = SrcBits - 1;
4382 static const unsigned OpcTable[2][2] = {
4383 {AArch64::SBFMWri, AArch64::SBFMXri},
4384 {AArch64::UBFMWri, AArch64::UBFMXri}
4385 };
4386 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4387 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4388 Register TmpReg = MRI.createVirtualRegister(RC);
4389 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4390 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4391 .addImm(0)
4392 .addReg(Op0)
4393 .addImm(AArch64::sub_32);
4394 Op0 = TmpReg;
4395 }
4396 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4397}
4398
4399unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4400 bool IsZExt) {
4401 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4402
4403 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4404 // DestVT are odd things, so test to make sure that they are both types we can
4405 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4406 // bail out to SelectionDAG.
4407 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4408 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4409 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4410 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4411 return 0;
4412
4413 unsigned Opc;
4414 unsigned Imm = 0;
4415
4416 switch (SrcVT.SimpleTy) {
4417 default:
4418 return 0;
4419 case MVT::i1:
4420 return emiti1Ext(SrcReg, DestVT, IsZExt);
4421 case MVT::i8:
4422 if (DestVT == MVT::i64)
4423 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4424 else
4425 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4426 Imm = 7;
4427 break;
4428 case MVT::i16:
4429 if (DestVT == MVT::i64)
4430 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4431 else
4432 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4433 Imm = 15;
4434 break;
4435 case MVT::i32:
4436 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4437 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4438 Imm = 31;
4439 break;
4440 }
4441
4442 // Handle i8 and i16 as i32.
4443 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4444 DestVT = MVT::i32;
4445 else if (DestVT == MVT::i64) {
4446 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4448 TII.get(AArch64::SUBREG_TO_REG), Src64)
4449 .addImm(0)
4450 .addReg(SrcReg)
4451 .addImm(AArch64::sub_32);
4452 SrcReg = Src64;
4453 }
4454
4455 const TargetRegisterClass *RC =
4456 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4457 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4458}
4459
4460static bool isZExtLoad(const MachineInstr *LI) {
4461 switch (LI->getOpcode()) {
4462 default:
4463 return false;
4464 case AArch64::LDURBBi:
4465 case AArch64::LDURHHi:
4466 case AArch64::LDURWi:
4467 case AArch64::LDRBBui:
4468 case AArch64::LDRHHui:
4469 case AArch64::LDRWui:
4470 case AArch64::LDRBBroX:
4471 case AArch64::LDRHHroX:
4472 case AArch64::LDRWroX:
4473 case AArch64::LDRBBroW:
4474 case AArch64::LDRHHroW:
4475 case AArch64::LDRWroW:
4476 return true;
4477 }
4478}
4479
4480static bool isSExtLoad(const MachineInstr *LI) {
4481 switch (LI->getOpcode()) {
4482 default:
4483 return false;
4484 case AArch64::LDURSBWi:
4485 case AArch64::LDURSHWi:
4486 case AArch64::LDURSBXi:
4487 case AArch64::LDURSHXi:
4488 case AArch64::LDURSWi:
4489 case AArch64::LDRSBWui:
4490 case AArch64::LDRSHWui:
4491 case AArch64::LDRSBXui:
4492 case AArch64::LDRSHXui:
4493 case AArch64::LDRSWui:
4494 case AArch64::LDRSBWroX:
4495 case AArch64::LDRSHWroX:
4496 case AArch64::LDRSBXroX:
4497 case AArch64::LDRSHXroX:
4498 case AArch64::LDRSWroX:
4499 case AArch64::LDRSBWroW:
4500 case AArch64::LDRSHWroW:
4501 case AArch64::LDRSBXroW:
4502 case AArch64::LDRSHXroW:
4503 case AArch64::LDRSWroW:
4504 return true;
4505 }
4506}
4507
4508bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4509 MVT SrcVT) {
4510 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4511 if (!LI || !LI->hasOneUse())
4512 return false;
4513
4514 // Check if the load instruction has already been selected.
4515 Register Reg = lookUpRegForValue(LI);
4516 if (!Reg)
4517 return false;
4518
4519 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4520 if (!MI)
4521 return false;
4522
4523 // Check if the correct load instruction has been emitted - SelectionDAG might
4524 // have emitted a zero-extending load, but we need a sign-extending load.
4525 bool IsZExt = isa<ZExtInst>(I);
4526 const auto *LoadMI = MI;
4527 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4528 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4529 Register LoadReg = MI->getOperand(1).getReg();
4530 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4531 assert(LoadMI && "Expected valid instruction");
4532 }
4533 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4534 return false;
4535
4536 // Nothing to be done.
4537 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4538 updateValueMap(I, Reg);
4539 return true;
4540 }
4541
4542 if (IsZExt) {
4543 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4544 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4545 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4546 .addImm(0)
4547 .addReg(Reg, getKillRegState(true))
4548 .addImm(AArch64::sub_32);
4549 Reg = Reg64;
4550 } else {
4551 assert((MI->getOpcode() == TargetOpcode::COPY &&
4552 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4553 "Expected copy instruction");
4554 Reg = MI->getOperand(1).getReg();
4556 removeDeadCode(I, std::next(I));
4557 }
4558 updateValueMap(I, Reg);
4559 return true;
4560}
4561
4562bool AArch64FastISel::selectIntExt(const Instruction *I) {
4563 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4564 "Unexpected integer extend instruction.");
4565 MVT RetVT;
4566 MVT SrcVT;
4567 if (!isTypeSupported(I->getType(), RetVT))
4568 return false;
4569
4570 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4571 return false;
4572
4573 // Try to optimize already sign-/zero-extended values from load instructions.
4574 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4575 return true;
4576
4577 Register SrcReg = getRegForValue(I->getOperand(0));
4578 if (!SrcReg)
4579 return false;
4580
4581 // Try to optimize already sign-/zero-extended values from function arguments.
4582 bool IsZExt = isa<ZExtInst>(I);
4583 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4584 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4585 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4586 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4587 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4588 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4589 .addImm(0)
4590 .addReg(SrcReg)
4591 .addImm(AArch64::sub_32);
4592 SrcReg = ResultReg;
4593 }
4594
4595 updateValueMap(I, SrcReg);
4596 return true;
4597 }
4598 }
4599
4600 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4601 if (!ResultReg)
4602 return false;
4603
4604 updateValueMap(I, ResultReg);
4605 return true;
4606}
4607
4608bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4609 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4610 if (!DestEVT.isSimple())
4611 return false;
4612
4613 MVT DestVT = DestEVT.getSimpleVT();
4614 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4615 return false;
4616
4617 unsigned DivOpc;
4618 bool Is64bit = (DestVT == MVT::i64);
4619 switch (ISDOpcode) {
4620 default:
4621 return false;
4622 case ISD::SREM:
4623 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4624 break;
4625 case ISD::UREM:
4626 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4627 break;
4628 }
4629 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4630 Register Src0Reg = getRegForValue(I->getOperand(0));
4631 if (!Src0Reg)
4632 return false;
4633
4634 Register Src1Reg = getRegForValue(I->getOperand(1));
4635 if (!Src1Reg)
4636 return false;
4637
4638 const TargetRegisterClass *RC =
4639 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4640 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4641 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4642 // The remainder is computed as numerator - (quotient * denominator) using the
4643 // MSUB instruction.
4644 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4645 updateValueMap(I, ResultReg);
4646 return true;
4647}
4648
4649bool AArch64FastISel::selectMul(const Instruction *I) {
4650 MVT VT;
4651 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4652 return false;
4653
4654 if (VT.isVector())
4655 return selectBinaryOp(I, ISD::MUL);
4656
4657 const Value *Src0 = I->getOperand(0);
4658 const Value *Src1 = I->getOperand(1);
4659 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4660 if (C->getValue().isPowerOf2())
4661 std::swap(Src0, Src1);
4662
4663 // Try to simplify to a shift instruction.
4664 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4665 if (C->getValue().isPowerOf2()) {
4666 uint64_t ShiftVal = C->getValue().logBase2();
4667 MVT SrcVT = VT;
4668 bool IsZExt = true;
4669 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4670 if (!isIntExtFree(ZExt)) {
4671 MVT VT;
4672 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4673 SrcVT = VT;
4674 IsZExt = true;
4675 Src0 = ZExt->getOperand(0);
4676 }
4677 }
4678 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4679 if (!isIntExtFree(SExt)) {
4680 MVT VT;
4681 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4682 SrcVT = VT;
4683 IsZExt = false;
4684 Src0 = SExt->getOperand(0);
4685 }
4686 }
4687 }
4688
4689 Register Src0Reg = getRegForValue(Src0);
4690 if (!Src0Reg)
4691 return false;
4692
4693 unsigned ResultReg =
4694 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4695
4696 if (ResultReg) {
4697 updateValueMap(I, ResultReg);
4698 return true;
4699 }
4700 }
4701
4702 Register Src0Reg = getRegForValue(I->getOperand(0));
4703 if (!Src0Reg)
4704 return false;
4705
4706 Register Src1Reg = getRegForValue(I->getOperand(1));
4707 if (!Src1Reg)
4708 return false;
4709
4710 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4711
4712 if (!ResultReg)
4713 return false;
4714
4715 updateValueMap(I, ResultReg);
4716 return true;
4717}
4718
4719bool AArch64FastISel::selectShift(const Instruction *I) {
4720 MVT RetVT;
4721 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4722 return false;
4723
4724 if (RetVT.isVector())
4725 return selectOperator(I, I->getOpcode());
4726
4727 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4728 unsigned ResultReg = 0;
4729 uint64_t ShiftVal = C->getZExtValue();
4730 MVT SrcVT = RetVT;
4731 bool IsZExt = I->getOpcode() != Instruction::AShr;
4732 const Value *Op0 = I->getOperand(0);
4733 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4734 if (!isIntExtFree(ZExt)) {
4735 MVT TmpVT;
4736 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4737 SrcVT = TmpVT;
4738 IsZExt = true;
4739 Op0 = ZExt->getOperand(0);
4740 }
4741 }
4742 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4743 if (!isIntExtFree(SExt)) {
4744 MVT TmpVT;
4745 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4746 SrcVT = TmpVT;
4747 IsZExt = false;
4748 Op0 = SExt->getOperand(0);
4749 }
4750 }
4751 }
4752
4753 Register Op0Reg = getRegForValue(Op0);
4754 if (!Op0Reg)
4755 return false;
4756
4757 switch (I->getOpcode()) {
4758 default: llvm_unreachable("Unexpected instruction.");
4759 case Instruction::Shl:
4760 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4761 break;
4762 case Instruction::AShr:
4763 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4764 break;
4765 case Instruction::LShr:
4766 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4767 break;
4768 }
4769 if (!ResultReg)
4770 return false;
4771
4772 updateValueMap(I, ResultReg);
4773 return true;
4774 }
4775
4776 Register Op0Reg = getRegForValue(I->getOperand(0));
4777 if (!Op0Reg)
4778 return false;
4779
4780 Register Op1Reg = getRegForValue(I->getOperand(1));
4781 if (!Op1Reg)
4782 return false;
4783
4784 unsigned ResultReg = 0;
4785 switch (I->getOpcode()) {
4786 default: llvm_unreachable("Unexpected instruction.");
4787 case Instruction::Shl:
4788 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4789 break;
4790 case Instruction::AShr:
4791 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4792 break;
4793 case Instruction::LShr:
4794 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4795 break;
4796 }
4797
4798 if (!ResultReg)
4799 return false;
4800
4801 updateValueMap(I, ResultReg);
4802 return true;
4803}
4804
4805bool AArch64FastISel::selectBitCast(const Instruction *I) {
4806 MVT RetVT, SrcVT;
4807
4808 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4809 return false;
4810 if (!isTypeLegal(I->getType(), RetVT))
4811 return false;
4812
4813 unsigned Opc;
4814 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4815 Opc = AArch64::FMOVWSr;
4816 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4817 Opc = AArch64::FMOVXDr;
4818 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4819 Opc = AArch64::FMOVSWr;
4820 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4821 Opc = AArch64::FMOVDXr;
4822 else
4823 return false;
4824
4825 const TargetRegisterClass *RC = nullptr;
4826 switch (RetVT.SimpleTy) {
4827 default: llvm_unreachable("Unexpected value type.");
4828 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4829 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4830 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4831 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4832 }
4833 Register Op0Reg = getRegForValue(I->getOperand(0));
4834 if (!Op0Reg)
4835 return false;
4836
4837 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4838 if (!ResultReg)
4839 return false;
4840
4841 updateValueMap(I, ResultReg);
4842 return true;
4843}
4844
4845bool AArch64FastISel::selectFRem(const Instruction *I) {
4846 MVT RetVT;
4847 if (!isTypeLegal(I->getType(), RetVT))
4848 return false;
4849
4850 RTLIB::Libcall LC;
4851 switch (RetVT.SimpleTy) {
4852 default:
4853 return false;
4854 case MVT::f32:
4855 LC = RTLIB::REM_F32;
4856 break;
4857 case MVT::f64:
4858 LC = RTLIB::REM_F64;
4859 break;
4860 }
4861
4862 ArgListTy Args;
4863 Args.reserve(I->getNumOperands());
4864
4865 // Populate the argument list.
4866 for (auto &Arg : I->operands()) {
4867 ArgListEntry Entry;
4868 Entry.Val = Arg;
4869 Entry.Ty = Arg->getType();
4870 Args.push_back(Entry);
4871 }
4872
4873 CallLoweringInfo CLI;
4874 MCContext &Ctx = MF->getContext();
4875 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4876 TLI.getLibcallName(LC), std::move(Args));
4877 if (!lowerCallTo(CLI))
4878 return false;
4879 updateValueMap(I, CLI.ResultReg);
4880 return true;
4881}
4882
4883bool AArch64FastISel::selectSDiv(const Instruction *I) {
4884 MVT VT;
4885 if (!isTypeLegal(I->getType(), VT))
4886 return false;
4887
4888 if (!isa<ConstantInt>(I->getOperand(1)))
4889 return selectBinaryOp(I, ISD::SDIV);
4890
4891 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4892 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4893 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4894 return selectBinaryOp(I, ISD::SDIV);
4895
4896 unsigned Lg2 = C.countr_zero();
4897 Register Src0Reg = getRegForValue(I->getOperand(0));
4898 if (!Src0Reg)
4899 return false;
4900
4901 if (cast<BinaryOperator>(I)->isExact()) {
4902 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4903 if (!ResultReg)
4904 return false;
4905 updateValueMap(I, ResultReg);
4906 return true;
4907 }
4908
4909 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4910 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4911 if (!AddReg)
4912 return false;
4913
4914 // (Src0 < 0) ? Pow2 - 1 : 0;
4915 if (!emitICmp_ri(VT, Src0Reg, 0))
4916 return false;
4917
4918 unsigned SelectOpc;
4919 const TargetRegisterClass *RC;
4920 if (VT == MVT::i64) {
4921 SelectOpc = AArch64::CSELXr;
4922 RC = &AArch64::GPR64RegClass;
4923 } else {
4924 SelectOpc = AArch64::CSELWr;
4925 RC = &AArch64::GPR32RegClass;
4926 }
4927 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4929 if (!SelectReg)
4930 return false;
4931
4932 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4933 // negate the result.
4934 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4935 unsigned ResultReg;
4936 if (C.isNegative())
4937 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4938 AArch64_AM::ASR, Lg2);
4939 else
4940 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4941
4942 if (!ResultReg)
4943 return false;
4944
4945 updateValueMap(I, ResultReg);
4946 return true;
4947}
4948
4949/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4950/// have to duplicate it for AArch64, because otherwise we would fail during the
4951/// sign-extend emission.
4952unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4953 Register IdxN = getRegForValue(Idx);
4954 if (IdxN == 0)
4955 // Unhandled operand. Halt "fast" selection and bail.
4956 return 0;
4957
4958 // If the index is smaller or larger than intptr_t, truncate or extend it.
4959 MVT PtrVT = TLI.getPointerTy(DL);
4960 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4961 if (IdxVT.bitsLT(PtrVT)) {
4962 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4963 } else if (IdxVT.bitsGT(PtrVT))
4964 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4965 return IdxN;
4966}
4967
4968/// This is mostly a copy of the existing FastISel GEP code, but we have to
4969/// duplicate it for AArch64, because otherwise we would bail out even for
4970/// simple cases. This is because the standard fastEmit functions don't cover
4971/// MUL at all and ADD is lowered very inefficientily.
4972bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4973 if (Subtarget->isTargetILP32())
4974 return false;
4975
4976 Register N = getRegForValue(I->getOperand(0));
4977 if (!N)
4978 return false;
4979
4980 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4981 // into a single N = N + TotalOffset.
4982 uint64_t TotalOffs = 0;
4983 MVT VT = TLI.getPointerTy(DL);
4985 GTI != E; ++GTI) {
4986 const Value *Idx = GTI.getOperand();
4987 if (auto *StTy = GTI.getStructTypeOrNull()) {
4988 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4989 // N = N + Offset
4990 if (Field)
4991 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4992 } else {
4993 // If this is a constant subscript, handle it quickly.
4994 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4995 if (CI->isZero())
4996 continue;
4997 // N = N + Offset
4998 TotalOffs += GTI.getSequentialElementStride(DL) *
4999 cast<ConstantInt>(CI)->getSExtValue();
5000 continue;
5001 }
5002 if (TotalOffs) {
5003 N = emitAdd_ri_(VT, N, TotalOffs);
5004 if (!N)
5005 return false;
5006 TotalOffs = 0;
5007 }
5008
5009 // N = N + Idx * ElementSize;
5010 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5011 unsigned IdxN = getRegForGEPIndex(Idx);
5012 if (!IdxN)
5013 return false;
5014
5015 if (ElementSize != 1) {
5016 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5017 if (!C)
5018 return false;
5019 IdxN = emitMul_rr(VT, IdxN, C);
5020 if (!IdxN)
5021 return false;
5022 }
5023 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5024 if (!N)
5025 return false;
5026 }
5027 }
5028 if (TotalOffs) {
5029 N = emitAdd_ri_(VT, N, TotalOffs);
5030 if (!N)
5031 return false;
5032 }
5033 updateValueMap(I, N);
5034 return true;
5035}
5036
5037bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5038 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5039 "cmpxchg survived AtomicExpand at optlevel > -O0");
5040
5041 auto *RetPairTy = cast<StructType>(I->getType());
5042 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5043 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5044 "cmpxchg has a non-i1 status result");
5045
5046 MVT VT;
5047 if (!isTypeLegal(RetTy, VT))
5048 return false;
5049
5050 const TargetRegisterClass *ResRC;
5051 unsigned Opc, CmpOpc;
5052 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5053 // extractvalue selection doesn't support that.
5054 if (VT == MVT::i32) {
5055 Opc = AArch64::CMP_SWAP_32;
5056 CmpOpc = AArch64::SUBSWrs;
5057 ResRC = &AArch64::GPR32RegClass;
5058 } else if (VT == MVT::i64) {
5059 Opc = AArch64::CMP_SWAP_64;
5060 CmpOpc = AArch64::SUBSXrs;
5061 ResRC = &AArch64::GPR64RegClass;
5062 } else {
5063 return false;
5064 }
5065
5066 const MCInstrDesc &II = TII.get(Opc);
5067
5068 const Register AddrReg = constrainOperandRegClass(
5069 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5070 const Register DesiredReg = constrainOperandRegClass(
5071 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5072 const Register NewReg = constrainOperandRegClass(
5073 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5074
5075 const Register ResultReg1 = createResultReg(ResRC);
5076 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5077 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5078
5079 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5080 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5081 .addDef(ResultReg1)
5082 .addDef(ScratchReg)
5083 .addUse(AddrReg)
5084 .addUse(DesiredReg)
5085 .addUse(NewReg);
5086
5087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5088 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5089 .addUse(ResultReg1)
5090 .addUse(DesiredReg)
5091 .addImm(0);
5092
5093 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5094 .addDef(ResultReg2)
5095 .addUse(AArch64::WZR)
5096 .addUse(AArch64::WZR)
5098
5099 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5100 updateValueMap(I, ResultReg1, 2);
5101 return true;
5102}
5103
5104bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5105 if (TLI.fallBackToDAGISel(*I))
5106 return false;
5107 switch (I->getOpcode()) {
5108 default:
5109 break;
5110 case Instruction::Add:
5111 case Instruction::Sub:
5112 return selectAddSub(I);
5113 case Instruction::Mul:
5114 return selectMul(I);
5115 case Instruction::SDiv:
5116 return selectSDiv(I);
5117 case Instruction::SRem:
5118 if (!selectBinaryOp(I, ISD::SREM))
5119 return selectRem(I, ISD::SREM);
5120 return true;
5121 case Instruction::URem:
5122 if (!selectBinaryOp(I, ISD::UREM))
5123 return selectRem(I, ISD::UREM);
5124 return true;
5125 case Instruction::Shl:
5126 case Instruction::LShr:
5127 case Instruction::AShr:
5128 return selectShift(I);
5129 case Instruction::And:
5130 case Instruction::Or:
5131 case Instruction::Xor:
5132 return selectLogicalOp(I);
5133 case Instruction::Br:
5134 return selectBranch(I);
5135 case Instruction::IndirectBr:
5136 return selectIndirectBr(I);
5137 case Instruction::BitCast:
5139 return selectBitCast(I);
5140 return true;
5141 case Instruction::FPToSI:
5142 if (!selectCast(I, ISD::FP_TO_SINT))
5143 return selectFPToInt(I, /*Signed=*/true);
5144 return true;
5145 case Instruction::FPToUI:
5146 return selectFPToInt(I, /*Signed=*/false);
5147 case Instruction::ZExt:
5148 case Instruction::SExt:
5149 return selectIntExt(I);
5150 case Instruction::Trunc:
5151 if (!selectCast(I, ISD::TRUNCATE))
5152 return selectTrunc(I);
5153 return true;
5154 case Instruction::FPExt:
5155 return selectFPExt(I);
5156 case Instruction::FPTrunc:
5157 return selectFPTrunc(I);
5158 case Instruction::SIToFP:
5159 if (!selectCast(I, ISD::SINT_TO_FP))
5160 return selectIntToFP(I, /*Signed=*/true);
5161 return true;
5162 case Instruction::UIToFP:
5163 return selectIntToFP(I, /*Signed=*/false);
5164 case Instruction::Load:
5165 return selectLoad(I);
5166 case Instruction::Store:
5167 return selectStore(I);
5168 case Instruction::FCmp:
5169 case Instruction::ICmp:
5170 return selectCmp(I);
5171 case Instruction::Select:
5172 return selectSelect(I);
5173 case Instruction::Ret:
5174 return selectRet(I);
5175 case Instruction::FRem:
5176 return selectFRem(I);
5177 case Instruction::GetElementPtr:
5178 return selectGetElementPtr(I);
5179 case Instruction::AtomicCmpXchg:
5180 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5181 }
5182
5183 // fall-back to target-independent instruction selection.
5184 return selectOperator(I, I->getOpcode());
5185}
5186
5188 const TargetLibraryInfo *LibInfo) {
5189
5190 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5191 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5192 CallerAttrs.hasStreamingInterfaceOrBody() ||
5193 CallerAttrs.hasStreamingCompatibleInterface())
5194 return nullptr;
5195 return new AArch64FastISel(FuncInfo, LibInfo);
5196}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr