LLVM 20.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
65#include "llvm/MC/MCSymbol.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <utility>
77
78using namespace llvm;
79
80namespace {
81
82class AArch64FastISel final : public FastISel {
83 class Address {
84 public:
85 using BaseKind = enum {
86 RegBase,
87 FrameIndexBase
88 };
89
90 private:
91 BaseKind Kind = RegBase;
93 union {
94 unsigned Reg;
95 int FI;
96 } Base;
97 unsigned OffsetReg = 0;
98 unsigned Shift = 0;
99 int64_t Offset = 0;
100 const GlobalValue *GV = nullptr;
101
102 public:
103 Address() { Base.Reg = 0; }
104
105 void setKind(BaseKind K) { Kind = K; }
106 BaseKind getKind() const { return Kind; }
107 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
108 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
109 bool isRegBase() const { return Kind == RegBase; }
110 bool isFIBase() const { return Kind == FrameIndexBase; }
111
112 void setReg(unsigned Reg) {
113 assert(isRegBase() && "Invalid base register access!");
114 Base.Reg = Reg;
115 }
116
117 unsigned getReg() const {
118 assert(isRegBase() && "Invalid base register access!");
119 return Base.Reg;
120 }
121
122 void setOffsetReg(unsigned Reg) {
123 OffsetReg = Reg;
124 }
125
126 unsigned getOffsetReg() const {
127 return OffsetReg;
128 }
129
130 void setFI(unsigned FI) {
131 assert(isFIBase() && "Invalid base frame index access!");
132 Base.FI = FI;
133 }
134
135 unsigned getFI() const {
136 assert(isFIBase() && "Invalid base frame index access!");
137 return Base.FI;
138 }
139
140 void setOffset(int64_t O) { Offset = O; }
141 int64_t getOffset() { return Offset; }
142 void setShift(unsigned S) { Shift = S; }
143 unsigned getShift() { return Shift; }
144
145 void setGlobalValue(const GlobalValue *G) { GV = G; }
146 const GlobalValue *getGlobalValue() { return GV; }
147 };
148
149 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
150 /// make the right decision when generating code for different targets.
151 const AArch64Subtarget *Subtarget;
152 LLVMContext *Context;
153
154 bool fastLowerArguments() override;
155 bool fastLowerCall(CallLoweringInfo &CLI) override;
156 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
157
158private:
159 // Selection routines.
160 bool selectAddSub(const Instruction *I);
161 bool selectLogicalOp(const Instruction *I);
162 bool selectLoad(const Instruction *I);
163 bool selectStore(const Instruction *I);
164 bool selectBranch(const Instruction *I);
165 bool selectIndirectBr(const Instruction *I);
166 bool selectCmp(const Instruction *I);
167 bool selectSelect(const Instruction *I);
168 bool selectFPExt(const Instruction *I);
169 bool selectFPTrunc(const Instruction *I);
170 bool selectFPToInt(const Instruction *I, bool Signed);
171 bool selectIntToFP(const Instruction *I, bool Signed);
172 bool selectRem(const Instruction *I, unsigned ISDOpcode);
173 bool selectRet(const Instruction *I);
174 bool selectTrunc(const Instruction *I);
175 bool selectIntExt(const Instruction *I);
176 bool selectMul(const Instruction *I);
177 bool selectShift(const Instruction *I);
178 bool selectBitCast(const Instruction *I);
179 bool selectFRem(const Instruction *I);
180 bool selectSDiv(const Instruction *I);
181 bool selectGetElementPtr(const Instruction *I);
182 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
183
184 // Utility helper routines.
185 bool isTypeLegal(Type *Ty, MVT &VT);
186 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
187 bool isValueAvailable(const Value *V) const;
188 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
189 bool computeCallAddress(const Value *V, Address &Addr);
190 bool simplifyAddress(Address &Addr, MVT VT);
191 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
193 unsigned ScaleFactor, MachineMemOperand *MMO);
194 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
195 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
196 MaybeAlign Alignment);
197 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
198 const Value *Cond);
199 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
200 bool optimizeSelect(const SelectInst *SI);
201 unsigned getRegForGEPIndex(const Value *Idx);
202
203 // Emit helper routines.
204 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
205 const Value *RHS, bool SetFlags = false,
206 bool WantResult = true, bool IsZExt = false);
207 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
208 unsigned RHSReg, bool SetFlags = false,
209 bool WantResult = true);
210 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
211 uint64_t Imm, bool SetFlags = false,
212 bool WantResult = true);
213 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
214 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
215 uint64_t ShiftImm, bool SetFlags = false,
216 bool WantResult = true);
217 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
218 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
219 uint64_t ShiftImm, bool SetFlags = false,
220 bool WantResult = true);
221
222 // Emit functions.
223 bool emitCompareAndBranch(const BranchInst *BI);
224 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
227 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229 MachineMemOperand *MMO = nullptr);
230 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231 MachineMemOperand *MMO = nullptr);
232 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233 MachineMemOperand *MMO = nullptr);
234 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237 bool SetFlags = false, bool WantResult = true,
238 bool IsZExt = false);
239 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
240 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241 bool SetFlags = false, bool WantResult = true,
242 bool IsZExt = false);
243 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244 bool WantResult = true);
245 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247 bool WantResult = true);
248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249 const Value *RHS);
250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251 uint64_t Imm);
252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253 unsigned RHSReg, uint64_t ShiftImm);
254 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
255 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
258 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260 bool IsZExt = true);
261 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263 bool IsZExt = true);
264 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
265 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
266 bool IsZExt = false);
267
268 unsigned materializeInt(const ConstantInt *CI, MVT VT);
269 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
270 unsigned materializeGV(const GlobalValue *GV);
271
272 // Call handling routines.
273private:
274 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
275 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
276 unsigned &NumBytes);
277 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
278
279public:
280 // Backend specific FastISel code.
281 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
282 unsigned fastMaterializeConstant(const Constant *C) override;
283 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
284
285 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
286 const TargetLibraryInfo *LibInfo)
287 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
288 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
289 Context = &FuncInfo.Fn->getContext();
290 }
291
292 bool fastSelectInstruction(const Instruction *I) override;
293
294#include "AArch64GenFastISel.inc"
295};
296
297} // end anonymous namespace
298
299/// Check if the sign-/zero-extend will be a noop.
300static bool isIntExtFree(const Instruction *I) {
301 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
302 "Unexpected integer extend instruction.");
303 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
304 "Unexpected value type.");
305 bool IsZExt = isa<ZExtInst>(I);
306
307 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
308 if (LI->hasOneUse())
309 return true;
310
311 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
312 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
313 return true;
314
315 return false;
316}
317
318/// Determine the implicit scale factor that is applied by a memory
319/// operation for a given value type.
320static unsigned getImplicitScaleFactor(MVT VT) {
321 switch (VT.SimpleTy) {
322 default:
323 return 0; // invalid
324 case MVT::i1: // fall-through
325 case MVT::i8:
326 return 1;
327 case MVT::i16:
328 return 2;
329 case MVT::i32: // fall-through
330 case MVT::f32:
331 return 4;
332 case MVT::i64: // fall-through
333 case MVT::f64:
334 return 8;
335 }
336}
337
338CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339 if (CC == CallingConv::GHC)
340 return CC_AArch64_GHC;
343 if (Subtarget->isTargetDarwin())
345 if (Subtarget->isTargetWindows())
346 return CC_AArch64_Win64PCS;
347 return CC_AArch64_AAPCS;
348}
349
350unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
351 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
352 "Alloca should always return a pointer.");
353
354 // Don't handle dynamic allocas.
355 if (!FuncInfo.StaticAllocaMap.count(AI))
356 return 0;
357
359 FuncInfo.StaticAllocaMap.find(AI);
360
361 if (SI != FuncInfo.StaticAllocaMap.end()) {
362 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
363 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
364 ResultReg)
365 .addFrameIndex(SI->second)
366 .addImm(0)
367 .addImm(0);
368 return ResultReg;
369 }
370
371 return 0;
372}
373
374unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
375 if (VT > MVT::i64)
376 return 0;
377
378 if (!CI->isZero())
379 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
380
381 // Create a copy from the zero register to materialize a "0" value.
382 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
383 : &AArch64::GPR32RegClass;
384 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
385 Register ResultReg = createResultReg(RC);
386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
387 ResultReg).addReg(ZeroReg, getKillRegState(true));
388 return ResultReg;
389}
390
391unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
392 // Positive zero (+0.0) has to be materialized with a fmov from the zero
393 // register, because the immediate version of fmov cannot encode zero.
394 if (CFP->isNullValue())
395 return fastMaterializeFloatZero(CFP);
396
397 if (VT != MVT::f32 && VT != MVT::f64)
398 return 0;
399
400 const APFloat Val = CFP->getValueAPF();
401 bool Is64Bit = (VT == MVT::f64);
402 // This checks to see if we can use FMOV instructions to materialize
403 // a constant, otherwise we have to materialize via the constant pool.
404 int Imm =
405 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
406 if (Imm != -1) {
407 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
408 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
409 }
410
411 // For the large code model materialize the FP constant in code.
412 if (TM.getCodeModel() == CodeModel::Large) {
413 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
414 const TargetRegisterClass *RC = Is64Bit ?
415 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
416
417 Register TmpReg = createResultReg(RC);
418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
420
421 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
423 TII.get(TargetOpcode::COPY), ResultReg)
424 .addReg(TmpReg, getKillRegState(true));
425
426 return ResultReg;
427 }
428
429 // Materialize via constant pool. MachineConstantPool wants an explicit
430 // alignment.
431 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
432
433 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
434 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
437
438 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
439 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
441 .addReg(ADRPReg)
443 return ResultReg;
444}
445
446unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
447 // We can't handle thread-local variables quickly yet.
448 if (GV->isThreadLocal())
449 return 0;
450
451 // MachO still uses GOT for large code-model accesses, but ELF requires
452 // movz/movk sequences, which FastISel doesn't handle yet.
453 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
454 return 0;
455
456 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
457
458 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
459 if (!DestEVT.isSimple())
460 return 0;
461
462 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
463 unsigned ResultReg;
464
465 if (OpFlags & AArch64II::MO_GOT) {
466 // ADRP + LDRX
467 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
468 ADRPReg)
469 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
470
471 unsigned LdrOpc;
472 if (Subtarget->isTargetILP32()) {
473 ResultReg = createResultReg(&AArch64::GPR32RegClass);
474 LdrOpc = AArch64::LDRWui;
475 } else {
476 ResultReg = createResultReg(&AArch64::GPR64RegClass);
477 LdrOpc = AArch64::LDRXui;
478 }
479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
480 ResultReg)
481 .addReg(ADRPReg)
483 AArch64II::MO_NC | OpFlags);
484 if (!Subtarget->isTargetILP32())
485 return ResultReg;
486
487 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
488 // so we must extend the result on ILP32.
489 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
491 TII.get(TargetOpcode::SUBREG_TO_REG))
492 .addDef(Result64)
493 .addImm(0)
494 .addReg(ResultReg, RegState::Kill)
495 .addImm(AArch64::sub_32);
496 return Result64;
497 } else {
498 // ADRP + ADDX
499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
500 ADRPReg)
501 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
502
503 if (OpFlags & AArch64II::MO_TAGGED) {
504 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
505 // We do so by creating a MOVK that sets bits 48-63 of the register to
506 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
507 // the small code model so we can assume a binary size of <= 4GB, which
508 // makes the untagged PC relative offset positive. The binary must also be
509 // loaded into address range [0, 2^48). Both of these properties need to
510 // be ensured at runtime when using tagged addresses.
511 //
512 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
513 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
514 // are not exactly 1:1 with FastISel so we cannot easily abstract this
515 // out. At some point, it would be nice to find a way to not have this
516 // duplciate code.
517 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
518 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
519 DstReg)
520 .addReg(ADRPReg)
521 .addGlobalAddress(GV, /*Offset=*/0x100000000,
523 .addImm(48);
524 ADRPReg = DstReg;
525 }
526
527 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
529 ResultReg)
530 .addReg(ADRPReg)
531 .addGlobalAddress(GV, 0,
533 .addImm(0);
534 }
535 return ResultReg;
536}
537
538unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
539 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
540
541 // Only handle simple types.
542 if (!CEVT.isSimple())
543 return 0;
544 MVT VT = CEVT.getSimpleVT();
545 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
546 // 'null' pointers need to have a somewhat special treatment.
547 if (isa<ConstantPointerNull>(C)) {
548 assert(VT == MVT::i64 && "Expected 64-bit pointers");
549 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
550 }
551
552 if (const auto *CI = dyn_cast<ConstantInt>(C))
553 return materializeInt(CI, VT);
554 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
555 return materializeFP(CFP, VT);
556 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
557 return materializeGV(GV);
558
559 return 0;
560}
561
562unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
563 assert(CFP->isNullValue() &&
564 "Floating-point constant is not a positive zero.");
565 MVT VT;
566 if (!isTypeLegal(CFP->getType(), VT))
567 return 0;
568
569 if (VT != MVT::f32 && VT != MVT::f64)
570 return 0;
571
572 bool Is64Bit = (VT == MVT::f64);
573 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
574 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
575 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
576}
577
578/// Check if the multiply is by a power-of-2 constant.
579static bool isMulPowOf2(const Value *I) {
580 if (const auto *MI = dyn_cast<MulOperator>(I)) {
581 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
582 if (C->getValue().isPowerOf2())
583 return true;
584 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
585 if (C->getValue().isPowerOf2())
586 return true;
587 }
588 return false;
589}
590
591// Computes the address to get to an object.
592bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
593{
594 const User *U = nullptr;
595 unsigned Opcode = Instruction::UserOp1;
596 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
597 // Don't walk into other basic blocks unless the object is an alloca from
598 // another block, otherwise it may not have a virtual register assigned.
599 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
600 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
601 Opcode = I->getOpcode();
602 U = I;
603 }
604 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
605 Opcode = C->getOpcode();
606 U = C;
607 }
608
609 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
610 if (Ty->getAddressSpace() > 255)
611 // Fast instruction selection doesn't support the special
612 // address spaces.
613 return false;
614
615 switch (Opcode) {
616 default:
617 break;
618 case Instruction::BitCast:
619 // Look through bitcasts.
620 return computeAddress(U->getOperand(0), Addr, Ty);
621
622 case Instruction::IntToPtr:
623 // Look past no-op inttoptrs.
624 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
625 TLI.getPointerTy(DL))
626 return computeAddress(U->getOperand(0), Addr, Ty);
627 break;
628
629 case Instruction::PtrToInt:
630 // Look past no-op ptrtoints.
631 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
632 return computeAddress(U->getOperand(0), Addr, Ty);
633 break;
634
635 case Instruction::GetElementPtr: {
636 Address SavedAddr = Addr;
637 uint64_t TmpOffset = Addr.getOffset();
638
639 // Iterate through the GEP folding the constants into offsets where
640 // we can.
641 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
642 GTI != E; ++GTI) {
643 const Value *Op = GTI.getOperand();
644 if (StructType *STy = GTI.getStructTypeOrNull()) {
645 const StructLayout *SL = DL.getStructLayout(STy);
646 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
647 TmpOffset += SL->getElementOffset(Idx);
648 } else {
649 uint64_t S = GTI.getSequentialElementStride(DL);
650 while (true) {
651 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
652 // Constant-offset addressing.
653 TmpOffset += CI->getSExtValue() * S;
654 break;
655 }
656 if (canFoldAddIntoGEP(U, Op)) {
657 // A compatible add with a constant operand. Fold the constant.
658 ConstantInt *CI =
659 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
660 TmpOffset += CI->getSExtValue() * S;
661 // Iterate on the other operand.
662 Op = cast<AddOperator>(Op)->getOperand(0);
663 continue;
664 }
665 // Unsupported
666 goto unsupported_gep;
667 }
668 }
669 }
670
671 // Try to grab the base operand now.
672 Addr.setOffset(TmpOffset);
673 if (computeAddress(U->getOperand(0), Addr, Ty))
674 return true;
675
676 // We failed, restore everything and try the other options.
677 Addr = SavedAddr;
678
679 unsupported_gep:
680 break;
681 }
682 case Instruction::Alloca: {
683 const AllocaInst *AI = cast<AllocaInst>(Obj);
685 FuncInfo.StaticAllocaMap.find(AI);
686 if (SI != FuncInfo.StaticAllocaMap.end()) {
687 Addr.setKind(Address::FrameIndexBase);
688 Addr.setFI(SI->second);
689 return true;
690 }
691 break;
692 }
693 case Instruction::Add: {
694 // Adds of constants are common and easy enough.
695 const Value *LHS = U->getOperand(0);
696 const Value *RHS = U->getOperand(1);
697
698 if (isa<ConstantInt>(LHS))
699 std::swap(LHS, RHS);
700
701 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
702 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
703 return computeAddress(LHS, Addr, Ty);
704 }
705
706 Address Backup = Addr;
707 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
708 return true;
709 Addr = Backup;
710
711 break;
712 }
713 case Instruction::Sub: {
714 // Subs of constants are common and easy enough.
715 const Value *LHS = U->getOperand(0);
716 const Value *RHS = U->getOperand(1);
717
718 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
719 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
720 return computeAddress(LHS, Addr, Ty);
721 }
722 break;
723 }
724 case Instruction::Shl: {
725 if (Addr.getOffsetReg())
726 break;
727
728 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
729 if (!CI)
730 break;
731
732 unsigned Val = CI->getZExtValue();
733 if (Val < 1 || Val > 3)
734 break;
735
736 uint64_t NumBytes = 0;
737 if (Ty && Ty->isSized()) {
738 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
739 NumBytes = NumBits / 8;
740 if (!isPowerOf2_64(NumBits))
741 NumBytes = 0;
742 }
743
744 if (NumBytes != (1ULL << Val))
745 break;
746
747 Addr.setShift(Val);
748 Addr.setExtendType(AArch64_AM::LSL);
749
750 const Value *Src = U->getOperand(0);
751 if (const auto *I = dyn_cast<Instruction>(Src)) {
752 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
753 // Fold the zext or sext when it won't become a noop.
754 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
755 if (!isIntExtFree(ZE) &&
756 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757 Addr.setExtendType(AArch64_AM::UXTW);
758 Src = ZE->getOperand(0);
759 }
760 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
761 if (!isIntExtFree(SE) &&
762 SE->getOperand(0)->getType()->isIntegerTy(32)) {
763 Addr.setExtendType(AArch64_AM::SXTW);
764 Src = SE->getOperand(0);
765 }
766 }
767 }
768 }
769
770 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
771 if (AI->getOpcode() == Instruction::And) {
772 const Value *LHS = AI->getOperand(0);
773 const Value *RHS = AI->getOperand(1);
774
775 if (const auto *C = dyn_cast<ConstantInt>(LHS))
776 if (C->getValue() == 0xffffffff)
777 std::swap(LHS, RHS);
778
779 if (const auto *C = dyn_cast<ConstantInt>(RHS))
780 if (C->getValue() == 0xffffffff) {
781 Addr.setExtendType(AArch64_AM::UXTW);
782 Register Reg = getRegForValue(LHS);
783 if (!Reg)
784 return false;
785 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
786 Addr.setOffsetReg(Reg);
787 return true;
788 }
789 }
790
791 Register Reg = getRegForValue(Src);
792 if (!Reg)
793 return false;
794 Addr.setOffsetReg(Reg);
795 return true;
796 }
797 case Instruction::Mul: {
798 if (Addr.getOffsetReg())
799 break;
800
801 if (!isMulPowOf2(U))
802 break;
803
804 const Value *LHS = U->getOperand(0);
805 const Value *RHS = U->getOperand(1);
806
807 // Canonicalize power-of-2 value to the RHS.
808 if (const auto *C = dyn_cast<ConstantInt>(LHS))
809 if (C->getValue().isPowerOf2())
810 std::swap(LHS, RHS);
811
812 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
813 const auto *C = cast<ConstantInt>(RHS);
814 unsigned Val = C->getValue().logBase2();
815 if (Val < 1 || Val > 3)
816 break;
817
818 uint64_t NumBytes = 0;
819 if (Ty && Ty->isSized()) {
820 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
821 NumBytes = NumBits / 8;
822 if (!isPowerOf2_64(NumBits))
823 NumBytes = 0;
824 }
825
826 if (NumBytes != (1ULL << Val))
827 break;
828
829 Addr.setShift(Val);
830 Addr.setExtendType(AArch64_AM::LSL);
831
832 const Value *Src = LHS;
833 if (const auto *I = dyn_cast<Instruction>(Src)) {
834 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
835 // Fold the zext or sext when it won't become a noop.
836 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
837 if (!isIntExtFree(ZE) &&
838 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
839 Addr.setExtendType(AArch64_AM::UXTW);
840 Src = ZE->getOperand(0);
841 }
842 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
843 if (!isIntExtFree(SE) &&
844 SE->getOperand(0)->getType()->isIntegerTy(32)) {
845 Addr.setExtendType(AArch64_AM::SXTW);
846 Src = SE->getOperand(0);
847 }
848 }
849 }
850 }
851
852 Register Reg = getRegForValue(Src);
853 if (!Reg)
854 return false;
855 Addr.setOffsetReg(Reg);
856 return true;
857 }
858 case Instruction::And: {
859 if (Addr.getOffsetReg())
860 break;
861
862 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
863 break;
864
865 const Value *LHS = U->getOperand(0);
866 const Value *RHS = U->getOperand(1);
867
868 if (const auto *C = dyn_cast<ConstantInt>(LHS))
869 if (C->getValue() == 0xffffffff)
870 std::swap(LHS, RHS);
871
872 if (const auto *C = dyn_cast<ConstantInt>(RHS))
873 if (C->getValue() == 0xffffffff) {
874 Addr.setShift(0);
875 Addr.setExtendType(AArch64_AM::LSL);
876 Addr.setExtendType(AArch64_AM::UXTW);
877
878 Register Reg = getRegForValue(LHS);
879 if (!Reg)
880 return false;
881 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
882 Addr.setOffsetReg(Reg);
883 return true;
884 }
885 break;
886 }
887 case Instruction::SExt:
888 case Instruction::ZExt: {
889 if (!Addr.getReg() || Addr.getOffsetReg())
890 break;
891
892 const Value *Src = nullptr;
893 // Fold the zext or sext when it won't become a noop.
894 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
895 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
896 Addr.setExtendType(AArch64_AM::UXTW);
897 Src = ZE->getOperand(0);
898 }
899 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
900 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
901 Addr.setExtendType(AArch64_AM::SXTW);
902 Src = SE->getOperand(0);
903 }
904 }
905
906 if (!Src)
907 break;
908
909 Addr.setShift(0);
910 Register Reg = getRegForValue(Src);
911 if (!Reg)
912 return false;
913 Addr.setOffsetReg(Reg);
914 return true;
915 }
916 } // end switch
917
918 if (Addr.isRegBase() && !Addr.getReg()) {
919 Register Reg = getRegForValue(Obj);
920 if (!Reg)
921 return false;
922 Addr.setReg(Reg);
923 return true;
924 }
925
926 if (!Addr.getOffsetReg()) {
927 Register Reg = getRegForValue(Obj);
928 if (!Reg)
929 return false;
930 Addr.setOffsetReg(Reg);
931 return true;
932 }
933
934 return false;
935}
936
937bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
938 const User *U = nullptr;
939 unsigned Opcode = Instruction::UserOp1;
940 bool InMBB = true;
941
942 if (const auto *I = dyn_cast<Instruction>(V)) {
943 Opcode = I->getOpcode();
944 U = I;
945 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
946 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
947 Opcode = C->getOpcode();
948 U = C;
949 }
950
951 switch (Opcode) {
952 default: break;
953 case Instruction::BitCast:
954 // Look past bitcasts if its operand is in the same BB.
955 if (InMBB)
956 return computeCallAddress(U->getOperand(0), Addr);
957 break;
958 case Instruction::IntToPtr:
959 // Look past no-op inttoptrs if its operand is in the same BB.
960 if (InMBB &&
961 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
962 TLI.getPointerTy(DL))
963 return computeCallAddress(U->getOperand(0), Addr);
964 break;
965 case Instruction::PtrToInt:
966 // Look past no-op ptrtoints if its operand is in the same BB.
967 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
968 return computeCallAddress(U->getOperand(0), Addr);
969 break;
970 }
971
972 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
973 Addr.setGlobalValue(GV);
974 return true;
975 }
976
977 // If all else fails, try to materialize the value in a register.
978 if (!Addr.getGlobalValue()) {
979 Addr.setReg(getRegForValue(V));
980 return Addr.getReg() != 0;
981 }
982
983 return false;
984}
985
986bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
987 EVT evt = TLI.getValueType(DL, Ty, true);
988
989 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
990 return false;
991
992 // Only handle simple types.
993 if (evt == MVT::Other || !evt.isSimple())
994 return false;
995 VT = evt.getSimpleVT();
996
997 // This is a legal type, but it's not something we handle in fast-isel.
998 if (VT == MVT::f128)
999 return false;
1000
1001 // Handle all other legal types, i.e. a register that will directly hold this
1002 // value.
1003 return TLI.isTypeLegal(VT);
1004}
1005
1006/// Determine if the value type is supported by FastISel.
1007///
1008/// FastISel for AArch64 can handle more value types than are legal. This adds
1009/// simple value type such as i1, i8, and i16.
1010bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1011 if (Ty->isVectorTy() && !IsVectorAllowed)
1012 return false;
1013
1014 if (isTypeLegal(Ty, VT))
1015 return true;
1016
1017 // If this is a type than can be sign or zero-extended to a basic operation
1018 // go ahead and accept it now.
1019 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1020 return true;
1021
1022 return false;
1023}
1024
1025bool AArch64FastISel::isValueAvailable(const Value *V) const {
1026 if (!isa<Instruction>(V))
1027 return true;
1028
1029 const auto *I = cast<Instruction>(V);
1030 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1031}
1032
1033bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1034 if (Subtarget->isTargetILP32())
1035 return false;
1036
1037 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1038 if (!ScaleFactor)
1039 return false;
1040
1041 bool ImmediateOffsetNeedsLowering = false;
1042 bool RegisterOffsetNeedsLowering = false;
1043 int64_t Offset = Addr.getOffset();
1044 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1045 ImmediateOffsetNeedsLowering = true;
1046 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1047 !isUInt<12>(Offset / ScaleFactor))
1048 ImmediateOffsetNeedsLowering = true;
1049
1050 // Cannot encode an offset register and an immediate offset in the same
1051 // instruction. Fold the immediate offset into the load/store instruction and
1052 // emit an additional add to take care of the offset register.
1053 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1054 RegisterOffsetNeedsLowering = true;
1055
1056 // Cannot encode zero register as base.
1057 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1058 RegisterOffsetNeedsLowering = true;
1059
1060 // If this is a stack pointer and the offset needs to be simplified then put
1061 // the alloca address into a register, set the base type back to register and
1062 // continue. This should almost never happen.
1063 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1064 {
1065 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1067 ResultReg)
1068 .addFrameIndex(Addr.getFI())
1069 .addImm(0)
1070 .addImm(0);
1071 Addr.setKind(Address::RegBase);
1072 Addr.setReg(ResultReg);
1073 }
1074
1075 if (RegisterOffsetNeedsLowering) {
1076 unsigned ResultReg = 0;
1077 if (Addr.getReg()) {
1078 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1079 Addr.getExtendType() == AArch64_AM::UXTW )
1080 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081 Addr.getOffsetReg(), Addr.getExtendType(),
1082 Addr.getShift());
1083 else
1084 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1085 Addr.getOffsetReg(), AArch64_AM::LSL,
1086 Addr.getShift());
1087 } else {
1088 if (Addr.getExtendType() == AArch64_AM::UXTW)
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090 Addr.getShift(), /*IsZExt=*/true);
1091 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1093 Addr.getShift(), /*IsZExt=*/false);
1094 else
1095 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1096 Addr.getShift());
1097 }
1098 if (!ResultReg)
1099 return false;
1100
1101 Addr.setReg(ResultReg);
1102 Addr.setOffsetReg(0);
1103 Addr.setShift(0);
1104 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1105 }
1106
1107 // Since the offset is too large for the load/store instruction get the
1108 // reg+offset into a register.
1109 if (ImmediateOffsetNeedsLowering) {
1110 unsigned ResultReg;
1111 if (Addr.getReg())
1112 // Try to fold the immediate into the add instruction.
1113 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1114 else
1115 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1116
1117 if (!ResultReg)
1118 return false;
1119 Addr.setReg(ResultReg);
1120 Addr.setOffset(0);
1121 }
1122 return true;
1123}
1124
1125void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1126 const MachineInstrBuilder &MIB,
1128 unsigned ScaleFactor,
1129 MachineMemOperand *MMO) {
1130 int64_t Offset = Addr.getOffset() / ScaleFactor;
1131 // Frame base works a bit differently. Handle it separately.
1132 if (Addr.isFIBase()) {
1133 int FI = Addr.getFI();
1134 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1135 // and alignment should be based on the VT.
1136 MMO = FuncInfo.MF->getMachineMemOperand(
1137 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1138 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139 // Now add the rest of the operands.
1140 MIB.addFrameIndex(FI).addImm(Offset);
1141 } else {
1142 assert(Addr.isRegBase() && "Unexpected address kind.");
1143 const MCInstrDesc &II = MIB->getDesc();
1144 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1145 Addr.setReg(
1146 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1147 Addr.setOffsetReg(
1148 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1149 if (Addr.getOffsetReg()) {
1150 assert(Addr.getOffset() == 0 && "Unexpected offset");
1151 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1152 Addr.getExtendType() == AArch64_AM::SXTX;
1153 MIB.addReg(Addr.getReg());
1154 MIB.addReg(Addr.getOffsetReg());
1155 MIB.addImm(IsSigned);
1156 MIB.addImm(Addr.getShift() != 0);
1157 } else
1158 MIB.addReg(Addr.getReg()).addImm(Offset);
1159 }
1160
1161 if (MMO)
1162 MIB.addMemOperand(MMO);
1163}
1164
1165unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1166 const Value *RHS, bool SetFlags,
1167 bool WantResult, bool IsZExt) {
1169 bool NeedExtend = false;
1170 switch (RetVT.SimpleTy) {
1171 default:
1172 return 0;
1173 case MVT::i1:
1174 NeedExtend = true;
1175 break;
1176 case MVT::i8:
1177 NeedExtend = true;
1178 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1179 break;
1180 case MVT::i16:
1181 NeedExtend = true;
1182 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1183 break;
1184 case MVT::i32: // fall-through
1185 case MVT::i64:
1186 break;
1187 }
1188 MVT SrcVT = RetVT;
1189 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1190
1191 // Canonicalize immediates to the RHS first.
1192 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193 std::swap(LHS, RHS);
1194
1195 // Canonicalize mul by power of 2 to the RHS.
1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197 if (isMulPowOf2(LHS))
1198 std::swap(LHS, RHS);
1199
1200 // Canonicalize shift immediate to the RHS.
1201 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1202 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1203 if (isa<ConstantInt>(SI->getOperand(1)))
1204 if (SI->getOpcode() == Instruction::Shl ||
1205 SI->getOpcode() == Instruction::LShr ||
1206 SI->getOpcode() == Instruction::AShr )
1207 std::swap(LHS, RHS);
1208
1209 Register LHSReg = getRegForValue(LHS);
1210 if (!LHSReg)
1211 return 0;
1212
1213 if (NeedExtend)
1214 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1215
1216 unsigned ResultReg = 0;
1217 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1218 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1219 if (C->isNegative())
1220 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1221 WantResult);
1222 else
1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1224 WantResult);
1225 } else if (const auto *C = dyn_cast<Constant>(RHS))
1226 if (C->isNullValue())
1227 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1228
1229 if (ResultReg)
1230 return ResultReg;
1231
1232 // Only extend the RHS within the instruction if there is a valid extend type.
1233 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1234 isValueAvailable(RHS)) {
1235 Register RHSReg = getRegForValue(RHS);
1236 if (!RHSReg)
1237 return 0;
1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1239 SetFlags, WantResult);
1240 }
1241
1242 // Check if the mul can be folded into the instruction.
1243 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244 if (isMulPowOf2(RHS)) {
1245 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1246 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1247
1248 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1249 if (C->getValue().isPowerOf2())
1250 std::swap(MulLHS, MulRHS);
1251
1252 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1253 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1254 Register RHSReg = getRegForValue(MulLHS);
1255 if (!RHSReg)
1256 return 0;
1257 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1258 ShiftVal, SetFlags, WantResult);
1259 if (ResultReg)
1260 return ResultReg;
1261 }
1262 }
1263
1264 // Check if the shift can be folded into the instruction.
1265 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1266 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1267 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1269 switch (SI->getOpcode()) {
1270 default: break;
1271 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1272 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1273 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1274 }
1275 uint64_t ShiftVal = C->getZExtValue();
1276 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1277 Register RHSReg = getRegForValue(SI->getOperand(0));
1278 if (!RHSReg)
1279 return 0;
1280 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1281 ShiftVal, SetFlags, WantResult);
1282 if (ResultReg)
1283 return ResultReg;
1284 }
1285 }
1286 }
1287 }
1288
1289 Register RHSReg = getRegForValue(RHS);
1290 if (!RHSReg)
1291 return 0;
1292
1293 if (NeedExtend)
1294 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1295
1296 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1297}
1298
1299unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1300 unsigned RHSReg, bool SetFlags,
1301 bool WantResult) {
1302 assert(LHSReg && RHSReg && "Invalid register number.");
1303
1304 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1305 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1306 return 0;
1307
1308 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309 return 0;
1310
1311 static const unsigned OpcTable[2][2][2] = {
1312 { { AArch64::SUBWrr, AArch64::SUBXrr },
1313 { AArch64::ADDWrr, AArch64::ADDXrr } },
1314 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1315 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1316 };
1317 bool Is64Bit = RetVT == MVT::i64;
1318 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319 const TargetRegisterClass *RC =
1320 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1321 unsigned ResultReg;
1322 if (WantResult)
1323 ResultReg = createResultReg(RC);
1324 else
1325 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1326
1327 const MCInstrDesc &II = TII.get(Opc);
1328 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1329 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1330 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1331 .addReg(LHSReg)
1332 .addReg(RHSReg);
1333 return ResultReg;
1334}
1335
1336unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1337 uint64_t Imm, bool SetFlags,
1338 bool WantResult) {
1339 assert(LHSReg && "Invalid register number.");
1340
1341 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1342 return 0;
1343
1344 unsigned ShiftImm;
1345 if (isUInt<12>(Imm))
1346 ShiftImm = 0;
1347 else if ((Imm & 0xfff000) == Imm) {
1348 ShiftImm = 12;
1349 Imm >>= 12;
1350 } else
1351 return 0;
1352
1353 static const unsigned OpcTable[2][2][2] = {
1354 { { AArch64::SUBWri, AArch64::SUBXri },
1355 { AArch64::ADDWri, AArch64::ADDXri } },
1356 { { AArch64::SUBSWri, AArch64::SUBSXri },
1357 { AArch64::ADDSWri, AArch64::ADDSXri } }
1358 };
1359 bool Is64Bit = RetVT == MVT::i64;
1360 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1361 const TargetRegisterClass *RC;
1362 if (SetFlags)
1363 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1364 else
1365 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1366 unsigned ResultReg;
1367 if (WantResult)
1368 ResultReg = createResultReg(RC);
1369 else
1370 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1371
1372 const MCInstrDesc &II = TII.get(Opc);
1373 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1375 .addReg(LHSReg)
1376 .addImm(Imm)
1377 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1378 return ResultReg;
1379}
1380
1381unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1382 unsigned RHSReg,
1384 uint64_t ShiftImm, bool SetFlags,
1385 bool WantResult) {
1386 assert(LHSReg && RHSReg && "Invalid register number.");
1387 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1388 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1389
1390 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1391 return 0;
1392
1393 // Don't deal with undefined shifts.
1394 if (ShiftImm >= RetVT.getSizeInBits())
1395 return 0;
1396
1397 static const unsigned OpcTable[2][2][2] = {
1398 { { AArch64::SUBWrs, AArch64::SUBXrs },
1399 { AArch64::ADDWrs, AArch64::ADDXrs } },
1400 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1401 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1402 };
1403 bool Is64Bit = RetVT == MVT::i64;
1404 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1405 const TargetRegisterClass *RC =
1406 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1407 unsigned ResultReg;
1408 if (WantResult)
1409 ResultReg = createResultReg(RC);
1410 else
1411 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1412
1413 const MCInstrDesc &II = TII.get(Opc);
1414 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1415 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1417 .addReg(LHSReg)
1418 .addReg(RHSReg)
1419 .addImm(getShifterImm(ShiftType, ShiftImm));
1420 return ResultReg;
1421}
1422
1423unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1424 unsigned RHSReg,
1426 uint64_t ShiftImm, bool SetFlags,
1427 bool WantResult) {
1428 assert(LHSReg && RHSReg && "Invalid register number.");
1429 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1430 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1431
1432 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1433 return 0;
1434
1435 if (ShiftImm >= 4)
1436 return 0;
1437
1438 static const unsigned OpcTable[2][2][2] = {
1439 { { AArch64::SUBWrx, AArch64::SUBXrx },
1440 { AArch64::ADDWrx, AArch64::ADDXrx } },
1441 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1442 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1443 };
1444 bool Is64Bit = RetVT == MVT::i64;
1445 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1446 const TargetRegisterClass *RC = nullptr;
1447 if (SetFlags)
1448 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1449 else
1450 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1451 unsigned ResultReg;
1452 if (WantResult)
1453 ResultReg = createResultReg(RC);
1454 else
1455 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1456
1457 const MCInstrDesc &II = TII.get(Opc);
1458 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1459 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1461 .addReg(LHSReg)
1462 .addReg(RHSReg)
1463 .addImm(getArithExtendImm(ExtType, ShiftImm));
1464 return ResultReg;
1465}
1466
1467bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1468 Type *Ty = LHS->getType();
1469 EVT EVT = TLI.getValueType(DL, Ty, true);
1470 if (!EVT.isSimple())
1471 return false;
1472 MVT VT = EVT.getSimpleVT();
1473
1474 switch (VT.SimpleTy) {
1475 default:
1476 return false;
1477 case MVT::i1:
1478 case MVT::i8:
1479 case MVT::i16:
1480 case MVT::i32:
1481 case MVT::i64:
1482 return emitICmp(VT, LHS, RHS, IsZExt);
1483 case MVT::f32:
1484 case MVT::f64:
1485 return emitFCmp(VT, LHS, RHS);
1486 }
1487}
1488
1489bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1490 bool IsZExt) {
1491 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1492 IsZExt) != 0;
1493}
1494
1495bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1496 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1497 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1498}
1499
1500bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1501 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1502 return false;
1503
1504 // Check to see if the 2nd operand is a constant that we can encode directly
1505 // in the compare.
1506 bool UseImm = false;
1507 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1508 if (CFP->isZero() && !CFP->isNegative())
1509 UseImm = true;
1510
1511 Register LHSReg = getRegForValue(LHS);
1512 if (!LHSReg)
1513 return false;
1514
1515 if (UseImm) {
1516 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1518 .addReg(LHSReg);
1519 return true;
1520 }
1521
1522 Register RHSReg = getRegForValue(RHS);
1523 if (!RHSReg)
1524 return false;
1525
1526 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1528 .addReg(LHSReg)
1529 .addReg(RHSReg);
1530 return true;
1531}
1532
1533unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1534 bool SetFlags, bool WantResult, bool IsZExt) {
1535 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1536 IsZExt);
1537}
1538
1539/// This method is a wrapper to simplify add emission.
1540///
1541/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1542/// that fails, then try to materialize the immediate into a register and use
1543/// emitAddSub_rr instead.
1544unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1545 unsigned ResultReg;
1546 if (Imm < 0)
1547 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1548 else
1549 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1550
1551 if (ResultReg)
1552 return ResultReg;
1553
1554 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1555 if (!CReg)
1556 return 0;
1557
1558 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1559 return ResultReg;
1560}
1561
1562unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1563 bool SetFlags, bool WantResult, bool IsZExt) {
1564 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1565 IsZExt);
1566}
1567
1568unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1569 unsigned RHSReg, bool WantResult) {
1570 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1571 /*SetFlags=*/true, WantResult);
1572}
1573
1574unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1575 unsigned RHSReg,
1577 uint64_t ShiftImm, bool WantResult) {
1578 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1579 ShiftImm, /*SetFlags=*/true, WantResult);
1580}
1581
1582unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1583 const Value *LHS, const Value *RHS) {
1584 // Canonicalize immediates to the RHS first.
1585 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1586 std::swap(LHS, RHS);
1587
1588 // Canonicalize mul by power-of-2 to the RHS.
1589 if (LHS->hasOneUse() && isValueAvailable(LHS))
1590 if (isMulPowOf2(LHS))
1591 std::swap(LHS, RHS);
1592
1593 // Canonicalize shift immediate to the RHS.
1594 if (LHS->hasOneUse() && isValueAvailable(LHS))
1595 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1596 if (isa<ConstantInt>(SI->getOperand(1)))
1597 std::swap(LHS, RHS);
1598
1599 Register LHSReg = getRegForValue(LHS);
1600 if (!LHSReg)
1601 return 0;
1602
1603 unsigned ResultReg = 0;
1604 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1605 uint64_t Imm = C->getZExtValue();
1606 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1607 }
1608 if (ResultReg)
1609 return ResultReg;
1610
1611 // Check if the mul can be folded into the instruction.
1612 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1613 if (isMulPowOf2(RHS)) {
1614 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1615 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1616
1617 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1618 if (C->getValue().isPowerOf2())
1619 std::swap(MulLHS, MulRHS);
1620
1621 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1622 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1623
1624 Register RHSReg = getRegForValue(MulLHS);
1625 if (!RHSReg)
1626 return 0;
1627 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1628 if (ResultReg)
1629 return ResultReg;
1630 }
1631 }
1632
1633 // Check if the shift can be folded into the instruction.
1634 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1635 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1636 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1637 uint64_t ShiftVal = C->getZExtValue();
1638 Register RHSReg = getRegForValue(SI->getOperand(0));
1639 if (!RHSReg)
1640 return 0;
1641 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1642 if (ResultReg)
1643 return ResultReg;
1644 }
1645 }
1646
1647 Register RHSReg = getRegForValue(RHS);
1648 if (!RHSReg)
1649 return 0;
1650
1651 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1652 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1653 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1654 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1655 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1656 }
1657 return ResultReg;
1658}
1659
1660unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1661 unsigned LHSReg, uint64_t Imm) {
1662 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1663 "ISD nodes are not consecutive!");
1664 static const unsigned OpcTable[3][2] = {
1665 { AArch64::ANDWri, AArch64::ANDXri },
1666 { AArch64::ORRWri, AArch64::ORRXri },
1667 { AArch64::EORWri, AArch64::EORXri }
1668 };
1669 const TargetRegisterClass *RC;
1670 unsigned Opc;
1671 unsigned RegSize;
1672 switch (RetVT.SimpleTy) {
1673 default:
1674 return 0;
1675 case MVT::i1:
1676 case MVT::i8:
1677 case MVT::i16:
1678 case MVT::i32: {
1679 unsigned Idx = ISDOpc - ISD::AND;
1680 Opc = OpcTable[Idx][0];
1681 RC = &AArch64::GPR32spRegClass;
1682 RegSize = 32;
1683 break;
1684 }
1685 case MVT::i64:
1686 Opc = OpcTable[ISDOpc - ISD::AND][1];
1687 RC = &AArch64::GPR64spRegClass;
1688 RegSize = 64;
1689 break;
1690 }
1691
1693 return 0;
1694
1695 Register ResultReg =
1696 fastEmitInst_ri(Opc, RC, LHSReg,
1698 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1699 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1700 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1701 }
1702 return ResultReg;
1703}
1704
1705unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1706 unsigned LHSReg, unsigned RHSReg,
1707 uint64_t ShiftImm) {
1708 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709 "ISD nodes are not consecutive!");
1710 static const unsigned OpcTable[3][2] = {
1711 { AArch64::ANDWrs, AArch64::ANDXrs },
1712 { AArch64::ORRWrs, AArch64::ORRXrs },
1713 { AArch64::EORWrs, AArch64::EORXrs }
1714 };
1715
1716 // Don't deal with undefined shifts.
1717 if (ShiftImm >= RetVT.getSizeInBits())
1718 return 0;
1719
1720 const TargetRegisterClass *RC;
1721 unsigned Opc;
1722 switch (RetVT.SimpleTy) {
1723 default:
1724 return 0;
1725 case MVT::i1:
1726 case MVT::i8:
1727 case MVT::i16:
1728 case MVT::i32:
1729 Opc = OpcTable[ISDOpc - ISD::AND][0];
1730 RC = &AArch64::GPR32RegClass;
1731 break;
1732 case MVT::i64:
1733 Opc = OpcTable[ISDOpc - ISD::AND][1];
1734 RC = &AArch64::GPR64RegClass;
1735 break;
1736 }
1737 Register ResultReg =
1738 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1740 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1743 }
1744 return ResultReg;
1745}
1746
1747unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1748 uint64_t Imm) {
1749 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1750}
1751
1752unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753 bool WantZExt, MachineMemOperand *MMO) {
1754 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755 return 0;
1756
1757 // Simplify this down to something we can handle.
1758 if (!simplifyAddress(Addr, VT))
1759 return 0;
1760
1761 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762 if (!ScaleFactor)
1763 llvm_unreachable("Unexpected value type.");
1764
1765 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767 bool UseScaled = true;
1768 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769 UseScaled = false;
1770 ScaleFactor = 1;
1771 }
1772
1773 static const unsigned GPOpcTable[2][8][4] = {
1774 // Sign-extend.
1775 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776 AArch64::LDURXi },
1777 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778 AArch64::LDURXi },
1779 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780 AArch64::LDRXui },
1781 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782 AArch64::LDRXui },
1783 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784 AArch64::LDRXroX },
1785 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786 AArch64::LDRXroX },
1787 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788 AArch64::LDRXroW },
1789 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790 AArch64::LDRXroW }
1791 },
1792 // Zero-extend.
1793 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794 AArch64::LDURXi },
1795 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796 AArch64::LDURXi },
1797 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798 AArch64::LDRXui },
1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800 AArch64::LDRXui },
1801 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802 AArch64::LDRXroX },
1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804 AArch64::LDRXroX },
1805 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806 AArch64::LDRXroW },
1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808 AArch64::LDRXroW }
1809 }
1810 };
1811
1812 static const unsigned FPOpcTable[4][2] = {
1813 { AArch64::LDURSi, AArch64::LDURDi },
1814 { AArch64::LDRSui, AArch64::LDRDui },
1815 { AArch64::LDRSroX, AArch64::LDRDroX },
1816 { AArch64::LDRSroW, AArch64::LDRDroW }
1817 };
1818
1819 unsigned Opc;
1820 const TargetRegisterClass *RC;
1821 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822 Addr.getOffsetReg();
1823 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825 Addr.getExtendType() == AArch64_AM::SXTW)
1826 Idx++;
1827
1828 bool IsRet64Bit = RetVT == MVT::i64;
1829 switch (VT.SimpleTy) {
1830 default:
1831 llvm_unreachable("Unexpected value type.");
1832 case MVT::i1: // Intentional fall-through.
1833 case MVT::i8:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i16:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i32:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845 RC = (IsRet64Bit && !WantZExt) ?
1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847 break;
1848 case MVT::i64:
1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850 RC = &AArch64::GPR64RegClass;
1851 break;
1852 case MVT::f32:
1853 Opc = FPOpcTable[Idx][0];
1854 RC = &AArch64::FPR32RegClass;
1855 break;
1856 case MVT::f64:
1857 Opc = FPOpcTable[Idx][1];
1858 RC = &AArch64::FPR64RegClass;
1859 break;
1860 }
1861
1862 // Create the base instruction, then add the operands.
1863 Register ResultReg = createResultReg(RC);
1864 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1865 TII.get(Opc), ResultReg);
1866 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868 // Loading an i1 requires special handling.
1869 if (VT == MVT::i1) {
1870 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1871 assert(ANDReg && "Unexpected AND instruction emission failure.");
1872 ResultReg = ANDReg;
1873 }
1874
1875 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876 // the 32bit reg to a 64bit reg.
1877 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1880 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881 .addImm(0)
1882 .addReg(ResultReg, getKillRegState(true))
1883 .addImm(AArch64::sub_32);
1884 ResultReg = Reg64;
1885 }
1886 return ResultReg;
1887}
1888
1889bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890 MVT VT;
1891 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892 return false;
1893
1894 if (VT.isVector())
1895 return selectOperator(I, I->getOpcode());
1896
1897 unsigned ResultReg;
1898 switch (I->getOpcode()) {
1899 default:
1900 llvm_unreachable("Unexpected instruction.");
1901 case Instruction::Add:
1902 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903 break;
1904 case Instruction::Sub:
1905 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906 break;
1907 }
1908 if (!ResultReg)
1909 return false;
1910
1911 updateValueMap(I, ResultReg);
1912 return true;
1913}
1914
1915bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916 MVT VT;
1917 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918 return false;
1919
1920 if (VT.isVector())
1921 return selectOperator(I, I->getOpcode());
1922
1923 unsigned ResultReg;
1924 switch (I->getOpcode()) {
1925 default:
1926 llvm_unreachable("Unexpected instruction.");
1927 case Instruction::And:
1928 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929 break;
1930 case Instruction::Or:
1931 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932 break;
1933 case Instruction::Xor:
1934 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935 break;
1936 }
1937 if (!ResultReg)
1938 return false;
1939
1940 updateValueMap(I, ResultReg);
1941 return true;
1942}
1943
1944bool AArch64FastISel::selectLoad(const Instruction *I) {
1945 MVT VT;
1946 // Verify we have a legal type before going any further. Currently, we handle
1947 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950 cast<LoadInst>(I)->isAtomic())
1951 return false;
1952
1953 const Value *SV = I->getOperand(0);
1954 if (TLI.supportSwiftError()) {
1955 // Swifterror values can come from either a function parameter with
1956 // swifterror attribute or an alloca with swifterror attribute.
1957 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958 if (Arg->hasSwiftErrorAttr())
1959 return false;
1960 }
1961
1962 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963 if (Alloca->isSwiftError())
1964 return false;
1965 }
1966 }
1967
1968 // See if we can handle this address.
1969 Address Addr;
1970 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971 return false;
1972
1973 // Fold the following sign-/zero-extend into the load instruction.
1974 bool WantZExt = true;
1975 MVT RetVT = VT;
1976 const Value *IntExtVal = nullptr;
1977 if (I->hasOneUse()) {
1978 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979 if (isTypeSupported(ZE->getType(), RetVT))
1980 IntExtVal = ZE;
1981 else
1982 RetVT = VT;
1983 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984 if (isTypeSupported(SE->getType(), RetVT))
1985 IntExtVal = SE;
1986 else
1987 RetVT = VT;
1988 WantZExt = false;
1989 }
1990 }
1991
1992 unsigned ResultReg =
1993 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994 if (!ResultReg)
1995 return false;
1996
1997 // There are a few different cases we have to handle, because the load or the
1998 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999 // SelectionDAG. There is also an ordering issue when both instructions are in
2000 // different basic blocks.
2001 // 1.) The load instruction is selected by FastISel, but the integer extend
2002 // not. This usually happens when the integer extend is in a different
2003 // basic block and SelectionDAG took over for that basic block.
2004 // 2.) The load instruction is selected before the integer extend. This only
2005 // happens when the integer extend is in a different basic block.
2006 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007 // by FastISel. This happens if there are instructions between the load
2008 // and the integer extend that couldn't be selected by FastISel.
2009 if (IntExtVal) {
2010 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012 // it when it selects the integer extend.
2013 Register Reg = lookUpRegForValue(IntExtVal);
2014 auto *MI = MRI.getUniqueVRegDef(Reg);
2015 if (!MI) {
2016 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017 if (WantZExt) {
2018 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020 ResultReg = std::prev(I)->getOperand(0).getReg();
2021 removeDeadCode(I, std::next(I));
2022 } else
2023 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024 AArch64::sub_32);
2025 }
2026 updateValueMap(I, ResultReg);
2027 return true;
2028 }
2029
2030 // The integer extend has already been emitted - delete all the instructions
2031 // that have been emitted by the integer extend lowering code and use the
2032 // result from the load instruction directly.
2033 while (MI) {
2034 Reg = 0;
2035 for (auto &Opnd : MI->uses()) {
2036 if (Opnd.isReg()) {
2037 Reg = Opnd.getReg();
2038 break;
2039 }
2040 }
2042 removeDeadCode(I, std::next(I));
2043 MI = nullptr;
2044 if (Reg)
2045 MI = MRI.getUniqueVRegDef(Reg);
2046 }
2047 updateValueMap(IntExtVal, ResultReg);
2048 return true;
2049 }
2050
2051 updateValueMap(I, ResultReg);
2052 return true;
2053}
2054
2055bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2056 unsigned AddrReg,
2057 MachineMemOperand *MMO) {
2058 unsigned Opc;
2059 switch (VT.SimpleTy) {
2060 default: return false;
2061 case MVT::i8: Opc = AArch64::STLRB; break;
2062 case MVT::i16: Opc = AArch64::STLRH; break;
2063 case MVT::i32: Opc = AArch64::STLRW; break;
2064 case MVT::i64: Opc = AArch64::STLRX; break;
2065 }
2066
2067 const MCInstrDesc &II = TII.get(Opc);
2068 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2069 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2071 .addReg(SrcReg)
2072 .addReg(AddrReg)
2073 .addMemOperand(MMO);
2074 return true;
2075}
2076
2077bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2078 MachineMemOperand *MMO) {
2079 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2080 return false;
2081
2082 // Simplify this down to something we can handle.
2083 if (!simplifyAddress(Addr, VT))
2084 return false;
2085
2086 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2087 if (!ScaleFactor)
2088 llvm_unreachable("Unexpected value type.");
2089
2090 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2091 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2092 bool UseScaled = true;
2093 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2094 UseScaled = false;
2095 ScaleFactor = 1;
2096 }
2097
2098 static const unsigned OpcTable[4][6] = {
2099 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2100 AArch64::STURSi, AArch64::STURDi },
2101 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2102 AArch64::STRSui, AArch64::STRDui },
2103 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2104 AArch64::STRSroX, AArch64::STRDroX },
2105 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2106 AArch64::STRSroW, AArch64::STRDroW }
2107 };
2108
2109 unsigned Opc;
2110 bool VTIsi1 = false;
2111 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2112 Addr.getOffsetReg();
2113 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2114 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2115 Addr.getExtendType() == AArch64_AM::SXTW)
2116 Idx++;
2117
2118 switch (VT.SimpleTy) {
2119 default: llvm_unreachable("Unexpected value type.");
2120 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2121 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2122 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2123 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2124 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2125 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2126 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2127 }
2128
2129 // Storing an i1 requires special handling.
2130 if (VTIsi1 && SrcReg != AArch64::WZR) {
2131 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2132 assert(ANDReg && "Unexpected AND instruction emission failure.");
2133 SrcReg = ANDReg;
2134 }
2135 // Create the base instruction, then add the operands.
2136 const MCInstrDesc &II = TII.get(Opc);
2137 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2139 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2140 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2141
2142 return true;
2143}
2144
2145bool AArch64FastISel::selectStore(const Instruction *I) {
2146 MVT VT;
2147 const Value *Op0 = I->getOperand(0);
2148 // Verify we have a legal type before going any further. Currently, we handle
2149 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2150 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2151 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2152 return false;
2153
2154 const Value *PtrV = I->getOperand(1);
2155 if (TLI.supportSwiftError()) {
2156 // Swifterror values can come from either a function parameter with
2157 // swifterror attribute or an alloca with swifterror attribute.
2158 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2159 if (Arg->hasSwiftErrorAttr())
2160 return false;
2161 }
2162
2163 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2164 if (Alloca->isSwiftError())
2165 return false;
2166 }
2167 }
2168
2169 // Get the value to be stored into a register. Use the zero register directly
2170 // when possible to avoid an unnecessary copy and a wasted register.
2171 unsigned SrcReg = 0;
2172 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2173 if (CI->isZero())
2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2176 if (CF->isZero() && !CF->isNegative()) {
2178 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179 }
2180 }
2181
2182 if (!SrcReg)
2183 SrcReg = getRegForValue(Op0);
2184
2185 if (!SrcReg)
2186 return false;
2187
2188 auto *SI = cast<StoreInst>(I);
2189
2190 // Try to emit a STLR for seq_cst/release.
2191 if (SI->isAtomic()) {
2192 AtomicOrdering Ord = SI->getOrdering();
2193 // The non-atomic instructions are sufficient for relaxed stores.
2194 if (isReleaseOrStronger(Ord)) {
2195 // The STLR addressing mode only supports a base reg; pass that directly.
2196 Register AddrReg = getRegForValue(PtrV);
2197 return emitStoreRelease(VT, SrcReg, AddrReg,
2198 createMachineMemOperandFor(I));
2199 }
2200 }
2201
2202 // See if we can handle this address.
2203 Address Addr;
2204 if (!computeAddress(PtrV, Addr, Op0->getType()))
2205 return false;
2206
2207 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2208 return false;
2209 return true;
2210}
2211
2213 switch (Pred) {
2214 case CmpInst::FCMP_ONE:
2215 case CmpInst::FCMP_UEQ:
2216 default:
2217 // AL is our "false" for now. The other two need more compares.
2218 return AArch64CC::AL;
2219 case CmpInst::ICMP_EQ:
2220 case CmpInst::FCMP_OEQ:
2221 return AArch64CC::EQ;
2222 case CmpInst::ICMP_SGT:
2223 case CmpInst::FCMP_OGT:
2224 return AArch64CC::GT;
2225 case CmpInst::ICMP_SGE:
2226 case CmpInst::FCMP_OGE:
2227 return AArch64CC::GE;
2228 case CmpInst::ICMP_UGT:
2229 case CmpInst::FCMP_UGT:
2230 return AArch64CC::HI;
2231 case CmpInst::FCMP_OLT:
2232 return AArch64CC::MI;
2233 case CmpInst::ICMP_ULE:
2234 case CmpInst::FCMP_OLE:
2235 return AArch64CC::LS;
2236 case CmpInst::FCMP_ORD:
2237 return AArch64CC::VC;
2238 case CmpInst::FCMP_UNO:
2239 return AArch64CC::VS;
2240 case CmpInst::FCMP_UGE:
2241 return AArch64CC::PL;
2242 case CmpInst::ICMP_SLT:
2243 case CmpInst::FCMP_ULT:
2244 return AArch64CC::LT;
2245 case CmpInst::ICMP_SLE:
2246 case CmpInst::FCMP_ULE:
2247 return AArch64CC::LE;
2248 case CmpInst::FCMP_UNE:
2249 case CmpInst::ICMP_NE:
2250 return AArch64CC::NE;
2251 case CmpInst::ICMP_UGE:
2252 return AArch64CC::HS;
2253 case CmpInst::ICMP_ULT:
2254 return AArch64CC::LO;
2255 }
2256}
2257
2258/// Try to emit a combined compare-and-branch instruction.
2259bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2260 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2261 // will not be produced, as they are conditional branch instructions that do
2262 // not set flags.
2263 if (FuncInfo.MF->getFunction().hasFnAttribute(
2264 Attribute::SpeculativeLoadHardening))
2265 return false;
2266
2267 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2268 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2269 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2270
2271 const Value *LHS = CI->getOperand(0);
2272 const Value *RHS = CI->getOperand(1);
2273
2274 MVT VT;
2275 if (!isTypeSupported(LHS->getType(), VT))
2276 return false;
2277
2278 unsigned BW = VT.getSizeInBits();
2279 if (BW > 64)
2280 return false;
2281
2282 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2283 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2284
2285 // Try to take advantage of fallthrough opportunities.
2286 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2287 std::swap(TBB, FBB);
2289 }
2290
2291 int TestBit = -1;
2292 bool IsCmpNE;
2293 switch (Predicate) {
2294 default:
2295 return false;
2296 case CmpInst::ICMP_EQ:
2297 case CmpInst::ICMP_NE:
2298 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2299 std::swap(LHS, RHS);
2300
2301 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2302 return false;
2303
2304 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2305 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2306 const Value *AndLHS = AI->getOperand(0);
2307 const Value *AndRHS = AI->getOperand(1);
2308
2309 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2310 if (C->getValue().isPowerOf2())
2311 std::swap(AndLHS, AndRHS);
2312
2313 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2314 if (C->getValue().isPowerOf2()) {
2315 TestBit = C->getValue().logBase2();
2316 LHS = AndLHS;
2317 }
2318 }
2319
2320 if (VT == MVT::i1)
2321 TestBit = 0;
2322
2323 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2324 break;
2325 case CmpInst::ICMP_SLT:
2326 case CmpInst::ICMP_SGE:
2327 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2328 return false;
2329
2330 TestBit = BW - 1;
2331 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2332 break;
2333 case CmpInst::ICMP_SGT:
2334 case CmpInst::ICMP_SLE:
2335 if (!isa<ConstantInt>(RHS))
2336 return false;
2337
2338 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2339 return false;
2340
2341 TestBit = BW - 1;
2342 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2343 break;
2344 } // end switch
2345
2346 static const unsigned OpcTable[2][2][2] = {
2347 { {AArch64::CBZW, AArch64::CBZX },
2348 {AArch64::CBNZW, AArch64::CBNZX} },
2349 { {AArch64::TBZW, AArch64::TBZX },
2350 {AArch64::TBNZW, AArch64::TBNZX} }
2351 };
2352
2353 bool IsBitTest = TestBit != -1;
2354 bool Is64Bit = BW == 64;
2355 if (TestBit < 32 && TestBit >= 0)
2356 Is64Bit = false;
2357
2358 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2359 const MCInstrDesc &II = TII.get(Opc);
2360
2361 Register SrcReg = getRegForValue(LHS);
2362 if (!SrcReg)
2363 return false;
2364
2365 if (BW == 64 && !Is64Bit)
2366 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2367
2368 if ((BW < 32) && !IsBitTest)
2369 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370
2371 // Emit the combined compare and branch instruction.
2372 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2375 .addReg(SrcReg);
2376 if (IsBitTest)
2377 MIB.addImm(TestBit);
2378 MIB.addMBB(TBB);
2379
2380 finishCondBranch(BI->getParent(), TBB, FBB);
2381 return true;
2382}
2383
2384bool AArch64FastISel::selectBranch(const Instruction *I) {
2385 const BranchInst *BI = cast<BranchInst>(I);
2386 if (BI->isUnconditional()) {
2387 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2388 fastEmitBranch(MSucc, BI->getDebugLoc());
2389 return true;
2390 }
2391
2392 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2393 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2394
2395 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2396 if (CI->hasOneUse() && isValueAvailable(CI)) {
2397 // Try to optimize or fold the cmp.
2398 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2399 switch (Predicate) {
2400 default:
2401 break;
2403 fastEmitBranch(FBB, MIMD.getDL());
2404 return true;
2405 case CmpInst::FCMP_TRUE:
2406 fastEmitBranch(TBB, MIMD.getDL());
2407 return true;
2408 }
2409
2410 // Try to emit a combined compare-and-branch first.
2411 if (emitCompareAndBranch(BI))
2412 return true;
2413
2414 // Try to take advantage of fallthrough opportunities.
2415 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2416 std::swap(TBB, FBB);
2418 }
2419
2420 // Emit the cmp.
2421 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2422 return false;
2423
2424 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2425 // instruction.
2426 AArch64CC::CondCode CC = getCompareCC(Predicate);
2428 switch (Predicate) {
2429 default:
2430 break;
2431 case CmpInst::FCMP_UEQ:
2432 ExtraCC = AArch64CC::EQ;
2433 CC = AArch64CC::VS;
2434 break;
2435 case CmpInst::FCMP_ONE:
2436 ExtraCC = AArch64CC::MI;
2437 CC = AArch64CC::GT;
2438 break;
2439 }
2440 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441
2442 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2443 if (ExtraCC != AArch64CC::AL) {
2444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2445 .addImm(ExtraCC)
2446 .addMBB(TBB);
2447 }
2448
2449 // Emit the branch.
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451 .addImm(CC)
2452 .addMBB(TBB);
2453
2454 finishCondBranch(BI->getParent(), TBB, FBB);
2455 return true;
2456 }
2457 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2458 uint64_t Imm = CI->getZExtValue();
2459 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2461 .addMBB(Target);
2462
2463 // Obtain the branch probability and add the target to the successor list.
2464 if (FuncInfo.BPI) {
2465 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2466 BI->getParent(), Target->getBasicBlock());
2467 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2468 } else
2469 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2470 return true;
2471 } else {
2473 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2474 // Fake request the condition, otherwise the intrinsic might be completely
2475 // optimized away.
2476 Register CondReg = getRegForValue(BI->getCondition());
2477 if (!CondReg)
2478 return false;
2479
2480 // Emit the branch.
2481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2482 .addImm(CC)
2483 .addMBB(TBB);
2484
2485 finishCondBranch(BI->getParent(), TBB, FBB);
2486 return true;
2487 }
2488 }
2489
2490 Register CondReg = getRegForValue(BI->getCondition());
2491 if (CondReg == 0)
2492 return false;
2493
2494 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495 unsigned Opcode = AArch64::TBNZW;
2496 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497 std::swap(TBB, FBB);
2498 Opcode = AArch64::TBZW;
2499 }
2500
2501 const MCInstrDesc &II = TII.get(Opcode);
2502 Register ConstrainedCondReg
2503 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2505 .addReg(ConstrainedCondReg)
2506 .addImm(0)
2507 .addMBB(TBB);
2508
2509 finishCondBranch(BI->getParent(), TBB, FBB);
2510 return true;
2511}
2512
2513bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515 Register AddrReg = getRegForValue(BI->getOperand(0));
2516 if (AddrReg == 0)
2517 return false;
2518
2519 // Authenticated indirectbr is not implemented yet.
2520 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2521 return false;
2522
2523 // Emit the indirect branch.
2524 const MCInstrDesc &II = TII.get(AArch64::BR);
2525 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2527
2528 // Make sure the CFG is up-to-date.
2529 for (const auto *Succ : BI->successors())
2530 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2531
2532 return true;
2533}
2534
2535bool AArch64FastISel::selectCmp(const Instruction *I) {
2536 const CmpInst *CI = cast<CmpInst>(I);
2537
2538 // Vectors of i1 are weird: bail out.
2539 if (CI->getType()->isVectorTy())
2540 return false;
2541
2542 // Try to optimize or fold the cmp.
2543 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544 unsigned ResultReg = 0;
2545 switch (Predicate) {
2546 default:
2547 break;
2549 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2551 TII.get(TargetOpcode::COPY), ResultReg)
2552 .addReg(AArch64::WZR, getKillRegState(true));
2553 break;
2554 case CmpInst::FCMP_TRUE:
2555 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556 break;
2557 }
2558
2559 if (ResultReg) {
2560 updateValueMap(I, ResultReg);
2561 return true;
2562 }
2563
2564 // Emit the cmp.
2565 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566 return false;
2567
2568 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569
2570 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571 // condition codes are inverted, because they are used by CSINC.
2572 static unsigned CondCodeTable[2][2] = {
2575 };
2576 unsigned *CondCodes = nullptr;
2577 switch (Predicate) {
2578 default:
2579 break;
2580 case CmpInst::FCMP_UEQ:
2581 CondCodes = &CondCodeTable[0][0];
2582 break;
2583 case CmpInst::FCMP_ONE:
2584 CondCodes = &CondCodeTable[1][0];
2585 break;
2586 }
2587
2588 if (CondCodes) {
2589 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 TmpReg1)
2592 .addReg(AArch64::WZR, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[0]);
2595 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2596 ResultReg)
2597 .addReg(TmpReg1, getKillRegState(true))
2598 .addReg(AArch64::WZR, getKillRegState(true))
2599 .addImm(CondCodes[1]);
2600
2601 updateValueMap(I, ResultReg);
2602 return true;
2603 }
2604
2605 // Now set a register based on the comparison.
2606 AArch64CC::CondCode CC = getCompareCC(Predicate);
2607 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2610 ResultReg)
2611 .addReg(AArch64::WZR, getKillRegState(true))
2612 .addReg(AArch64::WZR, getKillRegState(true))
2613 .addImm(invertedCC);
2614
2615 updateValueMap(I, ResultReg);
2616 return true;
2617}
2618
2619/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620/// value.
2621bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622 if (!SI->getType()->isIntegerTy(1))
2623 return false;
2624
2625 const Value *Src1Val, *Src2Val;
2626 unsigned Opc = 0;
2627 bool NeedExtraOp = false;
2628 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629 if (CI->isOne()) {
2630 Src1Val = SI->getCondition();
2631 Src2Val = SI->getFalseValue();
2632 Opc = AArch64::ORRWrr;
2633 } else {
2634 assert(CI->isZero());
2635 Src1Val = SI->getFalseValue();
2636 Src2Val = SI->getCondition();
2637 Opc = AArch64::BICWrr;
2638 }
2639 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640 if (CI->isOne()) {
2641 Src1Val = SI->getCondition();
2642 Src2Val = SI->getTrueValue();
2643 Opc = AArch64::ORRWrr;
2644 NeedExtraOp = true;
2645 } else {
2646 assert(CI->isZero());
2647 Src1Val = SI->getCondition();
2648 Src2Val = SI->getTrueValue();
2649 Opc = AArch64::ANDWrr;
2650 }
2651 }
2652
2653 if (!Opc)
2654 return false;
2655
2656 Register Src1Reg = getRegForValue(Src1Val);
2657 if (!Src1Reg)
2658 return false;
2659
2660 Register Src2Reg = getRegForValue(Src2Val);
2661 if (!Src2Reg)
2662 return false;
2663
2664 if (NeedExtraOp)
2665 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2666
2667 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2668 Src2Reg);
2669 updateValueMap(SI, ResultReg);
2670 return true;
2671}
2672
2673bool AArch64FastISel::selectSelect(const Instruction *I) {
2674 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2675 MVT VT;
2676 if (!isTypeSupported(I->getType(), VT))
2677 return false;
2678
2679 unsigned Opc;
2680 const TargetRegisterClass *RC;
2681 switch (VT.SimpleTy) {
2682 default:
2683 return false;
2684 case MVT::i1:
2685 case MVT::i8:
2686 case MVT::i16:
2687 case MVT::i32:
2688 Opc = AArch64::CSELWr;
2689 RC = &AArch64::GPR32RegClass;
2690 break;
2691 case MVT::i64:
2692 Opc = AArch64::CSELXr;
2693 RC = &AArch64::GPR64RegClass;
2694 break;
2695 case MVT::f32:
2696 Opc = AArch64::FCSELSrrr;
2697 RC = &AArch64::FPR32RegClass;
2698 break;
2699 case MVT::f64:
2700 Opc = AArch64::FCSELDrrr;
2701 RC = &AArch64::FPR64RegClass;
2702 break;
2703 }
2704
2705 const SelectInst *SI = cast<SelectInst>(I);
2706 const Value *Cond = SI->getCondition();
2709
2710 if (optimizeSelect(SI))
2711 return true;
2712
2713 // Try to pickup the flags, so we don't have to emit another compare.
2714 if (foldXALUIntrinsic(CC, I, Cond)) {
2715 // Fake request the condition to force emission of the XALU intrinsic.
2716 Register CondReg = getRegForValue(Cond);
2717 if (!CondReg)
2718 return false;
2719 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2720 isValueAvailable(Cond)) {
2721 const auto *Cmp = cast<CmpInst>(Cond);
2722 // Try to optimize or fold the cmp.
2723 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2724 const Value *FoldSelect = nullptr;
2725 switch (Predicate) {
2726 default:
2727 break;
2729 FoldSelect = SI->getFalseValue();
2730 break;
2731 case CmpInst::FCMP_TRUE:
2732 FoldSelect = SI->getTrueValue();
2733 break;
2734 }
2735
2736 if (FoldSelect) {
2737 Register SrcReg = getRegForValue(FoldSelect);
2738 if (!SrcReg)
2739 return false;
2740
2741 updateValueMap(I, SrcReg);
2742 return true;
2743 }
2744
2745 // Emit the cmp.
2746 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2747 return false;
2748
2749 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2750 CC = getCompareCC(Predicate);
2751 switch (Predicate) {
2752 default:
2753 break;
2754 case CmpInst::FCMP_UEQ:
2755 ExtraCC = AArch64CC::EQ;
2756 CC = AArch64CC::VS;
2757 break;
2758 case CmpInst::FCMP_ONE:
2759 ExtraCC = AArch64CC::MI;
2760 CC = AArch64CC::GT;
2761 break;
2762 }
2763 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2764 } else {
2765 Register CondReg = getRegForValue(Cond);
2766 if (!CondReg)
2767 return false;
2768
2769 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2770 CondReg = constrainOperandRegClass(II, CondReg, 1);
2771
2772 // Emit a TST instruction (ANDS wzr, reg, #imm).
2773 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2774 AArch64::WZR)
2775 .addReg(CondReg)
2777 }
2778
2779 Register Src1Reg = getRegForValue(SI->getTrueValue());
2780 Register Src2Reg = getRegForValue(SI->getFalseValue());
2781
2782 if (!Src1Reg || !Src2Reg)
2783 return false;
2784
2785 if (ExtraCC != AArch64CC::AL)
2786 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2787
2788 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2789 updateValueMap(I, ResultReg);
2790 return true;
2791}
2792
2793bool AArch64FastISel::selectFPExt(const Instruction *I) {
2794 Value *V = I->getOperand(0);
2795 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2796 return false;
2797
2798 Register Op = getRegForValue(V);
2799 if (Op == 0)
2800 return false;
2801
2802 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2803 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2804 ResultReg).addReg(Op);
2805 updateValueMap(I, ResultReg);
2806 return true;
2807}
2808
2809bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2810 Value *V = I->getOperand(0);
2811 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2812 return false;
2813
2814 Register Op = getRegForValue(V);
2815 if (Op == 0)
2816 return false;
2817
2818 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2819 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2820 ResultReg).addReg(Op);
2821 updateValueMap(I, ResultReg);
2822 return true;
2823}
2824
2825// FPToUI and FPToSI
2826bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2827 MVT DestVT;
2828 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2829 return false;
2830
2831 Register SrcReg = getRegForValue(I->getOperand(0));
2832 if (SrcReg == 0)
2833 return false;
2834
2835 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2836 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2837 return false;
2838
2839 unsigned Opc;
2840 if (SrcVT == MVT::f64) {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2845 } else {
2846 if (Signed)
2847 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2848 else
2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2850 }
2851 Register ResultReg = createResultReg(
2852 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2853 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2854 .addReg(SrcReg);
2855 updateValueMap(I, ResultReg);
2856 return true;
2857}
2858
2859bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2860 MVT DestVT;
2861 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2862 return false;
2863 // Let regular ISEL handle FP16
2864 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2865 return false;
2866
2867 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2868 "Unexpected value type.");
2869
2870 Register SrcReg = getRegForValue(I->getOperand(0));
2871 if (!SrcReg)
2872 return false;
2873
2874 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2875
2876 // Handle sign-extension.
2877 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2878 SrcReg =
2879 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2880 if (!SrcReg)
2881 return false;
2882 }
2883
2884 unsigned Opc;
2885 if (SrcVT == MVT::i64) {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2890 } else {
2891 if (Signed)
2892 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2893 else
2894 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2895 }
2896
2897 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2898 updateValueMap(I, ResultReg);
2899 return true;
2900}
2901
2902bool AArch64FastISel::fastLowerArguments() {
2903 if (!FuncInfo.CanLowerReturn)
2904 return false;
2905
2906 const Function *F = FuncInfo.Fn;
2907 if (F->isVarArg())
2908 return false;
2909
2910 CallingConv::ID CC = F->getCallingConv();
2912 return false;
2913
2914 if (Subtarget->hasCustomCallingConv())
2915 return false;
2916
2917 // Only handle simple cases of up to 8 GPR and FPR each.
2918 unsigned GPRCnt = 0;
2919 unsigned FPRCnt = 0;
2920 for (auto const &Arg : F->args()) {
2921 if (Arg.hasAttribute(Attribute::ByVal) ||
2922 Arg.hasAttribute(Attribute::InReg) ||
2923 Arg.hasAttribute(Attribute::StructRet) ||
2924 Arg.hasAttribute(Attribute::SwiftSelf) ||
2925 Arg.hasAttribute(Attribute::SwiftAsync) ||
2926 Arg.hasAttribute(Attribute::SwiftError) ||
2927 Arg.hasAttribute(Attribute::Nest))
2928 return false;
2929
2930 Type *ArgTy = Arg.getType();
2931 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2932 return false;
2933
2934 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2935 if (!ArgVT.isSimple())
2936 return false;
2937
2938 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2939 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2940 return false;
2941
2942 if (VT.isVector() &&
2943 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2944 return false;
2945
2946 if (VT >= MVT::i1 && VT <= MVT::i64)
2947 ++GPRCnt;
2948 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2949 VT.is128BitVector())
2950 ++FPRCnt;
2951 else
2952 return false;
2953
2954 if (GPRCnt > 8 || FPRCnt > 8)
2955 return false;
2956 }
2957
2958 static const MCPhysReg Registers[6][8] = {
2959 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2960 AArch64::W5, AArch64::W6, AArch64::W7 },
2961 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2962 AArch64::X5, AArch64::X6, AArch64::X7 },
2963 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2964 AArch64::H5, AArch64::H6, AArch64::H7 },
2965 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2966 AArch64::S5, AArch64::S6, AArch64::S7 },
2967 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2968 AArch64::D5, AArch64::D6, AArch64::D7 },
2969 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2970 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2971 };
2972
2973 unsigned GPRIdx = 0;
2974 unsigned FPRIdx = 0;
2975 for (auto const &Arg : F->args()) {
2976 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2977 unsigned SrcReg;
2978 const TargetRegisterClass *RC;
2979 if (VT >= MVT::i1 && VT <= MVT::i32) {
2980 SrcReg = Registers[0][GPRIdx++];
2981 RC = &AArch64::GPR32RegClass;
2982 VT = MVT::i32;
2983 } else if (VT == MVT::i64) {
2984 SrcReg = Registers[1][GPRIdx++];
2985 RC = &AArch64::GPR64RegClass;
2986 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2987 SrcReg = Registers[2][FPRIdx++];
2988 RC = &AArch64::FPR16RegClass;
2989 } else if (VT == MVT::f32) {
2990 SrcReg = Registers[3][FPRIdx++];
2991 RC = &AArch64::FPR32RegClass;
2992 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2993 SrcReg = Registers[4][FPRIdx++];
2994 RC = &AArch64::FPR64RegClass;
2995 } else if (VT.is128BitVector()) {
2996 SrcReg = Registers[5][FPRIdx++];
2997 RC = &AArch64::FPR128RegClass;
2998 } else
2999 llvm_unreachable("Unexpected value type.");
3000
3001 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3002 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3003 // Without this, EmitLiveInCopies may eliminate the livein if its only
3004 // use is a bitcast (which isn't turned into an instruction).
3005 Register ResultReg = createResultReg(RC);
3006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3007 TII.get(TargetOpcode::COPY), ResultReg)
3008 .addReg(DstReg, getKillRegState(true));
3009 updateValueMap(&Arg, ResultReg);
3010 }
3011 return true;
3012}
3013
3014bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3015 SmallVectorImpl<MVT> &OutVTs,
3016 unsigned &NumBytes) {
3017 CallingConv::ID CC = CLI.CallConv;
3019 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3020 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3021
3022 // Get a count of how many bytes are to be pushed on the stack.
3023 NumBytes = CCInfo.getStackSize();
3024
3025 // Issue CALLSEQ_START
3026 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028 .addImm(NumBytes).addImm(0);
3029
3030 // Process the args.
3031 for (CCValAssign &VA : ArgLocs) {
3032 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033 MVT ArgVT = OutVTs[VA.getValNo()];
3034
3035 Register ArgReg = getRegForValue(ArgVal);
3036 if (!ArgReg)
3037 return false;
3038
3039 // Handle arg promotion: SExt, ZExt, AExt.
3040 switch (VA.getLocInfo()) {
3041 case CCValAssign::Full:
3042 break;
3043 case CCValAssign::SExt: {
3044 MVT DestVT = VA.getLocVT();
3045 MVT SrcVT = ArgVT;
3046 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047 if (!ArgReg)
3048 return false;
3049 break;
3050 }
3051 case CCValAssign::AExt:
3052 // Intentional fall-through.
3053 case CCValAssign::ZExt: {
3054 MVT DestVT = VA.getLocVT();
3055 MVT SrcVT = ArgVT;
3056 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057 if (!ArgReg)
3058 return false;
3059 break;
3060 }
3061 default:
3062 llvm_unreachable("Unknown arg promotion!");
3063 }
3064
3065 // Now copy/store arg to correct locations.
3066 if (VA.isRegLoc() && !VA.needsCustom()) {
3067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069 CLI.OutRegs.push_back(VA.getLocReg());
3070 } else if (VA.needsCustom()) {
3071 // FIXME: Handle custom args.
3072 return false;
3073 } else {
3074 assert(VA.isMemLoc() && "Assuming store on stack.");
3075
3076 // Don't emit stores for undef values.
3077 if (isa<UndefValue>(ArgVal))
3078 continue;
3079
3080 // Need to store on the stack.
3081 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082
3083 unsigned BEAlign = 0;
3084 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085 BEAlign = 8 - ArgSize;
3086
3087 Address Addr;
3088 Addr.setKind(Address::RegBase);
3089 Addr.setReg(AArch64::SP);
3090 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091
3092 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096
3097 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098 return false;
3099 }
3100 }
3101 return true;
3102}
3103
3104bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105 CallingConv::ID CC = CLI.CallConv;
3106
3107 // Issue CALLSEQ_END
3108 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110 .addImm(NumBytes).addImm(0);
3111
3112 // Now the return values.
3114 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116
3117 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119 CCValAssign &VA = RVLocs[i];
3120 MVT CopyVT = VA.getValVT();
3121 unsigned CopyReg = ResultReg + i;
3122
3123 // TODO: Handle big-endian results
3124 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125 return false;
3126
3127 // Copy result out of their specified physreg.
3128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129 CopyReg)
3130 .addReg(VA.getLocReg());
3131 CLI.InRegs.push_back(VA.getLocReg());
3132 }
3133
3134 CLI.ResultReg = ResultReg;
3135 CLI.NumResultRegs = RVLocs.size();
3136
3137 return true;
3138}
3139
3140bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141 CallingConv::ID CC = CLI.CallConv;
3142 bool IsTailCall = CLI.IsTailCall;
3143 bool IsVarArg = CLI.IsVarArg;
3144 const Value *Callee = CLI.Callee;
3145 MCSymbol *Symbol = CLI.Symbol;
3146
3147 if (!Callee && !Symbol)
3148 return false;
3149
3150 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151 // a bti instruction following the call.
3152 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153 !Subtarget->noBTIAtReturnTwice() &&
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158 if (CLI.CB && CLI.CB->isIndirectCall() &&
3159 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160 return false;
3161
3162 // Allow SelectionDAG isel to handle tail calls.
3163 if (IsTailCall)
3164 return false;
3165
3166 // FIXME: we could and should support this, but for now correctness at -O0 is
3167 // more important.
3168 if (Subtarget->isTargetILP32())
3169 return false;
3170
3171 CodeModel::Model CM = TM.getCodeModel();
3172 // Only support the small-addressing and large code models.
3173 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3174 return false;
3175
3176 // FIXME: Add large code model support for ELF.
3177 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3178 return false;
3179
3180 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3181 // attribute. Check "RtLibUseGOT" instead.
3182 if (MF->getFunction().getParent()->getRtLibUseGOT())
3183 return false;
3184
3185 // Let SDISel handle vararg functions.
3186 if (IsVarArg)
3187 return false;
3188
3189 if (Subtarget->isWindowsArm64EC())
3190 return false;
3191
3192 for (auto Flag : CLI.OutFlags)
3193 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3195 return false;
3196
3197 // Set up the argument vectors.
3198 SmallVector<MVT, 16> OutVTs;
3199 OutVTs.reserve(CLI.OutVals.size());
3200
3201 for (auto *Val : CLI.OutVals) {
3202 MVT VT;
3203 if (!isTypeLegal(Val->getType(), VT) &&
3204 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205 return false;
3206
3207 // We don't handle vector parameters yet.
3208 if (VT.isVector() || VT.getSizeInBits() > 64)
3209 return false;
3210
3211 OutVTs.push_back(VT);
3212 }
3213
3214 Address Addr;
3215 if (Callee && !computeCallAddress(Callee, Addr))
3216 return false;
3217
3218 // The weak function target may be zero; in that case we must use indirect
3219 // addressing via a stub on windows as it may be out of range for a
3220 // PC-relative jump.
3221 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3222 Addr.getGlobalValue()->hasExternalWeakLinkage())
3223 return false;
3224
3225 // Handle the arguments now that we've gotten them.
3226 unsigned NumBytes;
3227 if (!processCallArgs(CLI, OutVTs, NumBytes))
3228 return false;
3229
3230 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3231 if (RegInfo->isAnyArgRegReserved(*MF))
3232 RegInfo->emitReservedArgRegCallError(*MF);
3233
3234 // Issue the call.
3236 if (Subtarget->useSmallAddressing()) {
3237 const MCInstrDesc &II =
3238 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3239 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3240 if (Symbol)
3241 MIB.addSym(Symbol, 0);
3242 else if (Addr.getGlobalValue())
3243 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3244 else if (Addr.getReg()) {
3246 MIB.addReg(Reg);
3247 } else
3248 return false;
3249 } else {
3250 unsigned CallReg = 0;
3251 if (Symbol) {
3252 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3254 ADRPReg)
3256
3257 CallReg = createResultReg(&AArch64::GPR64RegClass);
3258 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3259 TII.get(AArch64::LDRXui), CallReg)
3260 .addReg(ADRPReg)
3261 .addSym(Symbol,
3263 } else if (Addr.getGlobalValue())
3264 CallReg = materializeGV(Addr.getGlobalValue());
3265 else if (Addr.getReg())
3266 CallReg = Addr.getReg();
3267
3268 if (!CallReg)
3269 return false;
3270
3271 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3272 CallReg = constrainOperandRegClass(II, CallReg, 0);
3273 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3274 }
3275
3276 // Add implicit physical register uses to the call.
3277 for (auto Reg : CLI.OutRegs)
3278 MIB.addReg(Reg, RegState::Implicit);
3279
3280 // Add a register mask with the call-preserved registers.
3281 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3282 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3283
3284 CLI.Call = MIB;
3285
3286 // Finish off the call including any return values.
3287 return finishCall(CLI, NumBytes);
3288}
3289
3290bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3291 if (Alignment)
3292 return Len / Alignment->value() <= 4;
3293 else
3294 return Len < 32;
3295}
3296
3297bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3298 uint64_t Len, MaybeAlign Alignment) {
3299 // Make sure we don't bloat code by inlining very large memcpy's.
3300 if (!isMemCpySmall(Len, Alignment))
3301 return false;
3302
3303 int64_t UnscaledOffset = 0;
3304 Address OrigDest = Dest;
3305 Address OrigSrc = Src;
3306
3307 while (Len) {
3308 MVT VT;
3309 if (!Alignment || *Alignment >= 8) {
3310 if (Len >= 8)
3311 VT = MVT::i64;
3312 else if (Len >= 4)
3313 VT = MVT::i32;
3314 else if (Len >= 2)
3315 VT = MVT::i16;
3316 else {
3317 VT = MVT::i8;
3318 }
3319 } else {
3320 assert(Alignment && "Alignment is set in this branch");
3321 // Bound based on alignment.
3322 if (Len >= 4 && *Alignment == 4)
3323 VT = MVT::i32;
3324 else if (Len >= 2 && *Alignment == 2)
3325 VT = MVT::i16;
3326 else {
3327 VT = MVT::i8;
3328 }
3329 }
3330
3331 unsigned ResultReg = emitLoad(VT, VT, Src);
3332 if (!ResultReg)
3333 return false;
3334
3335 if (!emitStore(VT, ResultReg, Dest))
3336 return false;
3337
3338 int64_t Size = VT.getSizeInBits() / 8;
3339 Len -= Size;
3340 UnscaledOffset += Size;
3341
3342 // We need to recompute the unscaled offset for each iteration.
3343 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3344 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3345 }
3346
3347 return true;
3348}
3349
3350/// Check if it is possible to fold the condition from the XALU intrinsic
3351/// into the user. The condition code will only be updated on success.
3352bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3353 const Instruction *I,
3354 const Value *Cond) {
3355 if (!isa<ExtractValueInst>(Cond))
3356 return false;
3357
3358 const auto *EV = cast<ExtractValueInst>(Cond);
3359 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3360 return false;
3361
3362 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3363 MVT RetVT;
3364 const Function *Callee = II->getCalledFunction();
3365 Type *RetTy =
3366 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3367 if (!isTypeLegal(RetTy, RetVT))
3368 return false;
3369
3370 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3371 return false;
3372
3373 const Value *LHS = II->getArgOperand(0);
3374 const Value *RHS = II->getArgOperand(1);
3375
3376 // Canonicalize immediate to the RHS.
3377 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3378 std::swap(LHS, RHS);
3379
3380 // Simplify multiplies.
3381 Intrinsic::ID IID = II->getIntrinsicID();
3382 switch (IID) {
3383 default:
3384 break;
3385 case Intrinsic::smul_with_overflow:
3386 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387 if (C->getValue() == 2)
3388 IID = Intrinsic::sadd_with_overflow;
3389 break;
3390 case Intrinsic::umul_with_overflow:
3391 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3392 if (C->getValue() == 2)
3393 IID = Intrinsic::uadd_with_overflow;
3394 break;
3395 }
3396
3397 AArch64CC::CondCode TmpCC;
3398 switch (IID) {
3399 default:
3400 return false;
3401 case Intrinsic::sadd_with_overflow:
3402 case Intrinsic::ssub_with_overflow:
3403 TmpCC = AArch64CC::VS;
3404 break;
3405 case Intrinsic::uadd_with_overflow:
3406 TmpCC = AArch64CC::HS;
3407 break;
3408 case Intrinsic::usub_with_overflow:
3409 TmpCC = AArch64CC::LO;
3410 break;
3411 case Intrinsic::smul_with_overflow:
3412 case Intrinsic::umul_with_overflow:
3413 TmpCC = AArch64CC::NE;
3414 break;
3415 }
3416
3417 // Check if both instructions are in the same basic block.
3418 if (!isValueAvailable(II))
3419 return false;
3420
3421 // Make sure nothing is in the way
3424 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3425 // We only expect extractvalue instructions between the intrinsic and the
3426 // instruction to be selected.
3427 if (!isa<ExtractValueInst>(Itr))
3428 return false;
3429
3430 // Check that the extractvalue operand comes from the intrinsic.
3431 const auto *EVI = cast<ExtractValueInst>(Itr);
3432 if (EVI->getAggregateOperand() != II)
3433 return false;
3434 }
3435
3436 CC = TmpCC;
3437 return true;
3438}
3439
3440bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3441 // FIXME: Handle more intrinsics.
3442 switch (II->getIntrinsicID()) {
3443 default: return false;
3444 case Intrinsic::frameaddress: {
3445 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446 MFI.setFrameAddressIsTaken(true);
3447
3448 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3449 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3450 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3451 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3452 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3453 // Recursively load frame address
3454 // ldr x0, [fp]
3455 // ldr x0, [x0]
3456 // ldr x0, [x0]
3457 // ...
3458 unsigned DestReg;
3459 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3460 while (Depth--) {
3461 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3462 SrcReg, 0);
3463 assert(DestReg && "Unexpected LDR instruction emission failure.");
3464 SrcReg = DestReg;
3465 }
3466
3467 updateValueMap(II, SrcReg);
3468 return true;
3469 }
3470 case Intrinsic::sponentry: {
3471 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3472
3473 // SP = FP + Fixed Object + 16
3474 int FI = MFI.CreateFixedObject(4, 0, false);
3475 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3477 TII.get(AArch64::ADDXri), ResultReg)
3478 .addFrameIndex(FI)
3479 .addImm(0)
3480 .addImm(0);
3481
3482 updateValueMap(II, ResultReg);
3483 return true;
3484 }
3485 case Intrinsic::memcpy:
3486 case Intrinsic::memmove: {
3487 const auto *MTI = cast<MemTransferInst>(II);
3488 // Don't handle volatile.
3489 if (MTI->isVolatile())
3490 return false;
3491
3492 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3493 // we would emit dead code because we don't currently handle memmoves.
3494 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3495 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3496 // Small memcpy's are common enough that we want to do them without a call
3497 // if possible.
3498 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3499 MaybeAlign Alignment;
3500 if (MTI->getDestAlign() || MTI->getSourceAlign())
3501 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3502 MTI->getSourceAlign().valueOrOne());
3503 if (isMemCpySmall(Len, Alignment)) {
3504 Address Dest, Src;
3505 if (!computeAddress(MTI->getRawDest(), Dest) ||
3506 !computeAddress(MTI->getRawSource(), Src))
3507 return false;
3508 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3509 return true;
3510 }
3511 }
3512
3513 if (!MTI->getLength()->getType()->isIntegerTy(64))
3514 return false;
3515
3516 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3517 // Fast instruction selection doesn't support the special
3518 // address spaces.
3519 return false;
3520
3521 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3522 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3523 }
3524 case Intrinsic::memset: {
3525 const MemSetInst *MSI = cast<MemSetInst>(II);
3526 // Don't handle volatile.
3527 if (MSI->isVolatile())
3528 return false;
3529
3530 if (!MSI->getLength()->getType()->isIntegerTy(64))
3531 return false;
3532
3533 if (MSI->getDestAddressSpace() > 255)
3534 // Fast instruction selection doesn't support the special
3535 // address spaces.
3536 return false;
3537
3538 return lowerCallTo(II, "memset", II->arg_size() - 1);
3539 }
3540 case Intrinsic::sin:
3541 case Intrinsic::cos:
3542 case Intrinsic::tan:
3543 case Intrinsic::pow: {
3544 MVT RetVT;
3545 if (!isTypeLegal(II->getType(), RetVT))
3546 return false;
3547
3548 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3549 return false;
3550
3551 static const RTLIB::Libcall LibCallTable[4][2] = {
3552 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3553 {RTLIB::COS_F32, RTLIB::COS_F64},
3554 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3555 {RTLIB::POW_F32, RTLIB::POW_F64}};
3556 RTLIB::Libcall LC;
3557 bool Is64Bit = RetVT == MVT::f64;
3558 switch (II->getIntrinsicID()) {
3559 default:
3560 llvm_unreachable("Unexpected intrinsic.");
3561 case Intrinsic::sin:
3562 LC = LibCallTable[0][Is64Bit];
3563 break;
3564 case Intrinsic::cos:
3565 LC = LibCallTable[1][Is64Bit];
3566 break;
3567 case Intrinsic::tan:
3568 LC = LibCallTable[2][Is64Bit];
3569 break;
3570 case Intrinsic::pow:
3571 LC = LibCallTable[3][Is64Bit];
3572 break;
3573 }
3574
3575 ArgListTy Args;
3576 Args.reserve(II->arg_size());
3577
3578 // Populate the argument list.
3579 for (auto &Arg : II->args()) {
3580 ArgListEntry Entry;
3581 Entry.Val = Arg;
3582 Entry.Ty = Arg->getType();
3583 Args.push_back(Entry);
3584 }
3585
3586 CallLoweringInfo CLI;
3587 MCContext &Ctx = MF->getContext();
3588 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3589 TLI.getLibcallName(LC), std::move(Args));
3590 if (!lowerCallTo(CLI))
3591 return false;
3592 updateValueMap(II, CLI.ResultReg);
3593 return true;
3594 }
3595 case Intrinsic::fabs: {
3596 MVT VT;
3597 if (!isTypeLegal(II->getType(), VT))
3598 return false;
3599
3600 unsigned Opc;
3601 switch (VT.SimpleTy) {
3602 default:
3603 return false;
3604 case MVT::f32:
3605 Opc = AArch64::FABSSr;
3606 break;
3607 case MVT::f64:
3608 Opc = AArch64::FABSDr;
3609 break;
3610 }
3611 Register SrcReg = getRegForValue(II->getOperand(0));
3612 if (!SrcReg)
3613 return false;
3614 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3616 .addReg(SrcReg);
3617 updateValueMap(II, ResultReg);
3618 return true;
3619 }
3620 case Intrinsic::trap:
3621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622 .addImm(1);
3623 return true;
3624 case Intrinsic::debugtrap:
3625 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3626 .addImm(0xF000);
3627 return true;
3628
3629 case Intrinsic::sqrt: {
3630 Type *RetTy = II->getCalledFunction()->getReturnType();
3631
3632 MVT VT;
3633 if (!isTypeLegal(RetTy, VT))
3634 return false;
3635
3636 Register Op0Reg = getRegForValue(II->getOperand(0));
3637 if (!Op0Reg)
3638 return false;
3639
3640 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3641 if (!ResultReg)
3642 return false;
3643
3644 updateValueMap(II, ResultReg);
3645 return true;
3646 }
3647 case Intrinsic::sadd_with_overflow:
3648 case Intrinsic::uadd_with_overflow:
3649 case Intrinsic::ssub_with_overflow:
3650 case Intrinsic::usub_with_overflow:
3651 case Intrinsic::smul_with_overflow:
3652 case Intrinsic::umul_with_overflow: {
3653 // This implements the basic lowering of the xalu with overflow intrinsics.
3654 const Function *Callee = II->getCalledFunction();
3655 auto *Ty = cast<StructType>(Callee->getReturnType());
3656 Type *RetTy = Ty->getTypeAtIndex(0U);
3657
3658 MVT VT;
3659 if (!isTypeLegal(RetTy, VT))
3660 return false;
3661
3662 if (VT != MVT::i32 && VT != MVT::i64)
3663 return false;
3664
3665 const Value *LHS = II->getArgOperand(0);
3666 const Value *RHS = II->getArgOperand(1);
3667 // Canonicalize immediate to the RHS.
3668 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3669 std::swap(LHS, RHS);
3670
3671 // Simplify multiplies.
3672 Intrinsic::ID IID = II->getIntrinsicID();
3673 switch (IID) {
3674 default:
3675 break;
3676 case Intrinsic::smul_with_overflow:
3677 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3678 if (C->getValue() == 2) {
3679 IID = Intrinsic::sadd_with_overflow;
3680 RHS = LHS;
3681 }
3682 break;
3683 case Intrinsic::umul_with_overflow:
3684 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3685 if (C->getValue() == 2) {
3686 IID = Intrinsic::uadd_with_overflow;
3687 RHS = LHS;
3688 }
3689 break;
3690 }
3691
3692 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3694 switch (IID) {
3695 default: llvm_unreachable("Unexpected intrinsic!");
3696 case Intrinsic::sadd_with_overflow:
3697 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698 CC = AArch64CC::VS;
3699 break;
3700 case Intrinsic::uadd_with_overflow:
3701 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3702 CC = AArch64CC::HS;
3703 break;
3704 case Intrinsic::ssub_with_overflow:
3705 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706 CC = AArch64CC::VS;
3707 break;
3708 case Intrinsic::usub_with_overflow:
3709 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3710 CC = AArch64CC::LO;
3711 break;
3712 case Intrinsic::smul_with_overflow: {
3713 CC = AArch64CC::NE;
3714 Register LHSReg = getRegForValue(LHS);
3715 if (!LHSReg)
3716 return false;
3717
3718 Register RHSReg = getRegForValue(RHS);
3719 if (!RHSReg)
3720 return false;
3721
3722 if (VT == MVT::i32) {
3723 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3724 Register MulSubReg =
3725 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3726 // cmp xreg, wreg, sxtw
3727 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3728 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3729 /*WantResult=*/false);
3730 MulReg = MulSubReg;
3731 } else {
3732 assert(VT == MVT::i64 && "Unexpected value type.");
3733 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3734 // reused in the next instruction.
3735 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3736 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3737 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3738 /*WantResult=*/false);
3739 }
3740 break;
3741 }
3742 case Intrinsic::umul_with_overflow: {
3743 CC = AArch64CC::NE;
3744 Register LHSReg = getRegForValue(LHS);
3745 if (!LHSReg)
3746 return false;
3747
3748 Register RHSReg = getRegForValue(RHS);
3749 if (!RHSReg)
3750 return false;
3751
3752 if (VT == MVT::i32) {
3753 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3754 // tst xreg, #0xffffffff00000000
3755 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3756 TII.get(AArch64::ANDSXri), AArch64::XZR)
3757 .addReg(MulReg)
3758 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3759 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3760 } else {
3761 assert(VT == MVT::i64 && "Unexpected value type.");
3762 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3763 // reused in the next instruction.
3764 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3765 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3766 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3767 }
3768 break;
3769 }
3770 }
3771
3772 if (MulReg) {
3773 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3774 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3775 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3776 }
3777
3778 if (!ResultReg1)
3779 return false;
3780
3781 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3782 AArch64::WZR, AArch64::WZR,
3783 getInvertedCondCode(CC));
3784 (void)ResultReg2;
3785 assert((ResultReg1 + 1) == ResultReg2 &&
3786 "Nonconsecutive result registers.");
3787 updateValueMap(II, ResultReg1, 2);
3788 return true;
3789 }
3790 case Intrinsic::aarch64_crc32b:
3791 case Intrinsic::aarch64_crc32h:
3792 case Intrinsic::aarch64_crc32w:
3793 case Intrinsic::aarch64_crc32x:
3794 case Intrinsic::aarch64_crc32cb:
3795 case Intrinsic::aarch64_crc32ch:
3796 case Intrinsic::aarch64_crc32cw:
3797 case Intrinsic::aarch64_crc32cx: {
3798 if (!Subtarget->hasCRC())
3799 return false;
3800
3801 unsigned Opc;
3802 switch (II->getIntrinsicID()) {
3803 default:
3804 llvm_unreachable("Unexpected intrinsic!");
3805 case Intrinsic::aarch64_crc32b:
3806 Opc = AArch64::CRC32Brr;
3807 break;
3808 case Intrinsic::aarch64_crc32h:
3809 Opc = AArch64::CRC32Hrr;
3810 break;
3811 case Intrinsic::aarch64_crc32w:
3812 Opc = AArch64::CRC32Wrr;
3813 break;
3814 case Intrinsic::aarch64_crc32x:
3815 Opc = AArch64::CRC32Xrr;
3816 break;
3817 case Intrinsic::aarch64_crc32cb:
3818 Opc = AArch64::CRC32CBrr;
3819 break;
3820 case Intrinsic::aarch64_crc32ch:
3821 Opc = AArch64::CRC32CHrr;
3822 break;
3823 case Intrinsic::aarch64_crc32cw:
3824 Opc = AArch64::CRC32CWrr;
3825 break;
3826 case Intrinsic::aarch64_crc32cx:
3827 Opc = AArch64::CRC32CXrr;
3828 break;
3829 }
3830
3831 Register LHSReg = getRegForValue(II->getArgOperand(0));
3832 Register RHSReg = getRegForValue(II->getArgOperand(1));
3833 if (!LHSReg || !RHSReg)
3834 return false;
3835
3836 Register ResultReg =
3837 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3838 updateValueMap(II, ResultReg);
3839 return true;
3840 }
3841 }
3842 return false;
3843}
3844
3845bool AArch64FastISel::selectRet(const Instruction *I) {
3846 const ReturnInst *Ret = cast<ReturnInst>(I);
3847 const Function &F = *I->getParent()->getParent();
3848
3849 if (!FuncInfo.CanLowerReturn)
3850 return false;
3851
3852 if (F.isVarArg())
3853 return false;
3854
3855 if (TLI.supportSwiftError() &&
3856 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3857 return false;
3858
3859 if (TLI.supportSplitCSR(FuncInfo.MF))
3860 return false;
3861
3862 // Build a list of return value registers.
3864
3865 if (Ret->getNumOperands() > 0) {
3866 CallingConv::ID CC = F.getCallingConv();
3868 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3869
3870 // Analyze operands of the call, assigning locations to each operand.
3872 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3873 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3874
3875 // Only handle a single return value for now.
3876 if (ValLocs.size() != 1)
3877 return false;
3878
3879 CCValAssign &VA = ValLocs[0];
3880 const Value *RV = Ret->getOperand(0);
3881
3882 // Don't bother handling odd stuff for now.
3883 if ((VA.getLocInfo() != CCValAssign::Full) &&
3884 (VA.getLocInfo() != CCValAssign::BCvt))
3885 return false;
3886
3887 // Only handle register returns for now.
3888 if (!VA.isRegLoc())
3889 return false;
3890
3891 Register Reg = getRegForValue(RV);
3892 if (Reg == 0)
3893 return false;
3894
3895 unsigned SrcReg = Reg + VA.getValNo();
3896 Register DestReg = VA.getLocReg();
3897 // Avoid a cross-class copy. This is very unlikely.
3898 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3899 return false;
3900
3901 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3902 if (!RVEVT.isSimple())
3903 return false;
3904
3905 // Vectors (of > 1 lane) in big endian need tricky handling.
3906 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3907 !Subtarget->isLittleEndian())
3908 return false;
3909
3910 MVT RVVT = RVEVT.getSimpleVT();
3911 if (RVVT == MVT::f128)
3912 return false;
3913
3914 MVT DestVT = VA.getValVT();
3915 // Special handling for extended integers.
3916 if (RVVT != DestVT) {
3917 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3918 return false;
3919
3920 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3921 return false;
3922
3923 bool IsZExt = Outs[0].Flags.isZExt();
3924 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3925 if (SrcReg == 0)
3926 return false;
3927 }
3928
3929 // "Callee" (i.e. value producer) zero extends pointers at function
3930 // boundary.
3931 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3932 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3933
3934 // Make the copy.
3935 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3936 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3937
3938 // Add register to return instruction.
3939 RetRegs.push_back(VA.getLocReg());
3940 }
3941
3942 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3943 TII.get(AArch64::RET_ReallyLR));
3944 for (unsigned RetReg : RetRegs)
3945 MIB.addReg(RetReg, RegState::Implicit);
3946 return true;
3947}
3948
3949bool AArch64FastISel::selectTrunc(const Instruction *I) {
3950 Type *DestTy = I->getType();
3951 Value *Op = I->getOperand(0);
3952 Type *SrcTy = Op->getType();
3953
3954 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3955 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3956 if (!SrcEVT.isSimple())
3957 return false;
3958 if (!DestEVT.isSimple())
3959 return false;
3960
3961 MVT SrcVT = SrcEVT.getSimpleVT();
3962 MVT DestVT = DestEVT.getSimpleVT();
3963
3964 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3965 SrcVT != MVT::i8)
3966 return false;
3967 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3968 DestVT != MVT::i1)
3969 return false;
3970
3971 Register SrcReg = getRegForValue(Op);
3972 if (!SrcReg)
3973 return false;
3974
3975 // If we're truncating from i64 to a smaller non-legal type then generate an
3976 // AND. Otherwise, we know the high bits are undefined and a truncate only
3977 // generate a COPY. We cannot mark the source register also as result
3978 // register, because this can incorrectly transfer the kill flag onto the
3979 // source register.
3980 unsigned ResultReg;
3981 if (SrcVT == MVT::i64) {
3982 uint64_t Mask = 0;
3983 switch (DestVT.SimpleTy) {
3984 default:
3985 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3986 return false;
3987 case MVT::i1:
3988 Mask = 0x1;
3989 break;
3990 case MVT::i8:
3991 Mask = 0xff;
3992 break;
3993 case MVT::i16:
3994 Mask = 0xffff;
3995 break;
3996 }
3997 // Issue an extract_subreg to get the lower 32-bits.
3998 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3999 AArch64::sub_32);
4000 // Create the AND instruction which performs the actual truncation.
4001 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4002 assert(ResultReg && "Unexpected AND instruction emission failure.");
4003 } else {
4004 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4006 TII.get(TargetOpcode::COPY), ResultReg)
4007 .addReg(SrcReg);
4008 }
4009
4010 updateValueMap(I, ResultReg);
4011 return true;
4012}
4013
4014unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4015 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4016 DestVT == MVT::i64) &&
4017 "Unexpected value type.");
4018 // Handle i8 and i16 as i32.
4019 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4020 DestVT = MVT::i32;
4021
4022 if (IsZExt) {
4023 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4024 assert(ResultReg && "Unexpected AND instruction emission failure.");
4025 if (DestVT == MVT::i64) {
4026 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4027 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4028 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4030 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4031 .addImm(0)
4032 .addReg(ResultReg)
4033 .addImm(AArch64::sub_32);
4034 ResultReg = Reg64;
4035 }
4036 return ResultReg;
4037 } else {
4038 if (DestVT == MVT::i64) {
4039 // FIXME: We're SExt i1 to i64.
4040 return 0;
4041 }
4042 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4043 0, 0);
4044 }
4045}
4046
4047unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4048 unsigned Opc, ZReg;
4049 switch (RetVT.SimpleTy) {
4050 default: return 0;
4051 case MVT::i8:
4052 case MVT::i16:
4053 case MVT::i32:
4054 RetVT = MVT::i32;
4055 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056 case MVT::i64:
4057 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058 }
4059
4060 const TargetRegisterClass *RC =
4061 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4063}
4064
4065unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066 if (RetVT != MVT::i64)
4067 return 0;
4068
4069 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4070 Op0, Op1, AArch64::XZR);
4071}
4072
4073unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4074 if (RetVT != MVT::i64)
4075 return 0;
4076
4077 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4078 Op0, Op1, AArch64::XZR);
4079}
4080
4081unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4082 unsigned Op1Reg) {
4083 unsigned Opc = 0;
4084 bool NeedTrunc = false;
4085 uint64_t Mask = 0;
4086 switch (RetVT.SimpleTy) {
4087 default: return 0;
4088 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4089 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4090 case MVT::i32: Opc = AArch64::LSLVWr; break;
4091 case MVT::i64: Opc = AArch64::LSLVXr; break;
4092 }
4093
4094 const TargetRegisterClass *RC =
4095 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4096 if (NeedTrunc)
4097 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4098
4099 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4100 if (NeedTrunc)
4101 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4102 return ResultReg;
4103}
4104
4105unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4106 uint64_t Shift, bool IsZExt) {
4107 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4108 "Unexpected source/return type pair.");
4109 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4110 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4111 "Unexpected source value type.");
4112 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4113 RetVT == MVT::i64) && "Unexpected return value type.");
4114
4115 bool Is64Bit = (RetVT == MVT::i64);
4116 unsigned RegSize = Is64Bit ? 64 : 32;
4117 unsigned DstBits = RetVT.getSizeInBits();
4118 unsigned SrcBits = SrcVT.getSizeInBits();
4119 const TargetRegisterClass *RC =
4120 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121
4122 // Just emit a copy for "zero" shifts.
4123 if (Shift == 0) {
4124 if (RetVT == SrcVT) {
4125 Register ResultReg = createResultReg(RC);
4126 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4127 TII.get(TargetOpcode::COPY), ResultReg)
4128 .addReg(Op0);
4129 return ResultReg;
4130 } else
4131 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4132 }
4133
4134 // Don't deal with undefined shifts.
4135 if (Shift >= DstBits)
4136 return 0;
4137
4138 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4139 // {S|U}BFM Wd, Wn, #r, #s
4140 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4141
4142 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143 // %2 = shl i16 %1, 4
4144 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4145 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4146 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4147 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4148
4149 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4150 // %2 = shl i16 %1, 8
4151 // Wd<32+7-24,32-24> = Wn<7:0>
4152 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4153 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4154 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4155
4156 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157 // %2 = shl i16 %1, 12
4158 // Wd<32+3-20,32-20> = Wn<3:0>
4159 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4160 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4161 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4162
4163 unsigned ImmR = RegSize - Shift;
4164 // Limit the width to the length of the source type.
4165 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4166 static const unsigned OpcTable[2][2] = {
4167 {AArch64::SBFMWri, AArch64::SBFMXri},
4168 {AArch64::UBFMWri, AArch64::UBFMXri}
4169 };
4170 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4171 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4172 Register TmpReg = MRI.createVirtualRegister(RC);
4173 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4174 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4175 .addImm(0)
4176 .addReg(Op0)
4177 .addImm(AArch64::sub_32);
4178 Op0 = TmpReg;
4179 }
4180 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4181}
4182
4183unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4184 unsigned Op1Reg) {
4185 unsigned Opc = 0;
4186 bool NeedTrunc = false;
4187 uint64_t Mask = 0;
4188 switch (RetVT.SimpleTy) {
4189 default: return 0;
4190 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4191 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4192 case MVT::i32: Opc = AArch64::LSRVWr; break;
4193 case MVT::i64: Opc = AArch64::LSRVXr; break;
4194 }
4195
4196 const TargetRegisterClass *RC =
4197 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4198 if (NeedTrunc) {
4199 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4200 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4201 }
4202 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4203 if (NeedTrunc)
4204 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4205 return ResultReg;
4206}
4207
4208unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4209 uint64_t Shift, bool IsZExt) {
4210 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4211 "Unexpected source/return type pair.");
4212 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4213 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4214 "Unexpected source value type.");
4215 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4216 RetVT == MVT::i64) && "Unexpected return value type.");
4217
4218 bool Is64Bit = (RetVT == MVT::i64);
4219 unsigned RegSize = Is64Bit ? 64 : 32;
4220 unsigned DstBits = RetVT.getSizeInBits();
4221 unsigned SrcBits = SrcVT.getSizeInBits();
4222 const TargetRegisterClass *RC =
4223 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4224
4225 // Just emit a copy for "zero" shifts.
4226 if (Shift == 0) {
4227 if (RetVT == SrcVT) {
4228 Register ResultReg = createResultReg(RC);
4229 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4230 TII.get(TargetOpcode::COPY), ResultReg)
4231 .addReg(Op0);
4232 return ResultReg;
4233 } else
4234 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4235 }
4236
4237 // Don't deal with undefined shifts.
4238 if (Shift >= DstBits)
4239 return 0;
4240
4241 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4242 // {S|U}BFM Wd, Wn, #r, #s
4243 // Wd<s-r:0> = Wn<s:r> when r <= s
4244
4245 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246 // %2 = lshr i16 %1, 4
4247 // Wd<7-4:0> = Wn<7:4>
4248 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4250 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4251
4252 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253 // %2 = lshr i16 %1, 8
4254 // Wd<7-7,0> = Wn<7:7>
4255 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4257 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4258
4259 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260 // %2 = lshr i16 %1, 12
4261 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4262 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265
4266 if (Shift >= SrcBits && IsZExt)
4267 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4268
4269 // It is not possible to fold a sign-extend into the LShr instruction. In this
4270 // case emit a sign-extend.
4271 if (!IsZExt) {
4272 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273 if (!Op0)
4274 return 0;
4275 SrcVT = RetVT;
4276 SrcBits = SrcVT.getSizeInBits();
4277 IsZExt = true;
4278 }
4279
4280 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4281 unsigned ImmS = SrcBits - 1;
4282 static const unsigned OpcTable[2][2] = {
4283 {AArch64::SBFMWri, AArch64::SBFMXri},
4284 {AArch64::UBFMWri, AArch64::UBFMXri}
4285 };
4286 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4287 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4288 Register TmpReg = MRI.createVirtualRegister(RC);
4289 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4290 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4291 .addImm(0)
4292 .addReg(Op0)
4293 .addImm(AArch64::sub_32);
4294 Op0 = TmpReg;
4295 }
4296 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4297}
4298
4299unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4300 unsigned Op1Reg) {
4301 unsigned Opc = 0;
4302 bool NeedTrunc = false;
4303 uint64_t Mask = 0;
4304 switch (RetVT.SimpleTy) {
4305 default: return 0;
4306 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4307 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4308 case MVT::i32: Opc = AArch64::ASRVWr; break;
4309 case MVT::i64: Opc = AArch64::ASRVXr; break;
4310 }
4311
4312 const TargetRegisterClass *RC =
4313 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4314 if (NeedTrunc) {
4315 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4316 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4317 }
4318 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4319 if (NeedTrunc)
4320 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4321 return ResultReg;
4322}
4323
4324unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4325 uint64_t Shift, bool IsZExt) {
4326 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4327 "Unexpected source/return type pair.");
4328 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4329 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4330 "Unexpected source value type.");
4331 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4332 RetVT == MVT::i64) && "Unexpected return value type.");
4333
4334 bool Is64Bit = (RetVT == MVT::i64);
4335 unsigned RegSize = Is64Bit ? 64 : 32;
4336 unsigned DstBits = RetVT.getSizeInBits();
4337 unsigned SrcBits = SrcVT.getSizeInBits();
4338 const TargetRegisterClass *RC =
4339 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4340
4341 // Just emit a copy for "zero" shifts.
4342 if (Shift == 0) {
4343 if (RetVT == SrcVT) {
4344 Register ResultReg = createResultReg(RC);
4345 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4346 TII.get(TargetOpcode::COPY), ResultReg)
4347 .addReg(Op0);
4348 return ResultReg;
4349 } else
4350 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4351 }
4352
4353 // Don't deal with undefined shifts.
4354 if (Shift >= DstBits)
4355 return 0;
4356
4357 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4358 // {S|U}BFM Wd, Wn, #r, #s
4359 // Wd<s-r:0> = Wn<s:r> when r <= s
4360
4361 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362 // %2 = ashr i16 %1, 4
4363 // Wd<7-4:0> = Wn<7:4>
4364 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4366 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4367
4368 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369 // %2 = ashr i16 %1, 8
4370 // Wd<7-7,0> = Wn<7:7>
4371 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4373 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4374
4375 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376 // %2 = ashr i16 %1, 12
4377 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4378 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381
4382 if (Shift >= SrcBits && IsZExt)
4383 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4384
4385 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4386 unsigned ImmS = SrcBits - 1;
4387 static const unsigned OpcTable[2][2] = {
4388 {AArch64::SBFMWri, AArch64::SBFMXri},
4389 {AArch64::UBFMWri, AArch64::UBFMXri}
4390 };
4391 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4392 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4393 Register TmpReg = MRI.createVirtualRegister(RC);
4394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4395 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4396 .addImm(0)
4397 .addReg(Op0)
4398 .addImm(AArch64::sub_32);
4399 Op0 = TmpReg;
4400 }
4401 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4402}
4403
4404unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4405 bool IsZExt) {
4406 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4407
4408 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4409 // DestVT are odd things, so test to make sure that they are both types we can
4410 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4411 // bail out to SelectionDAG.
4412 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4413 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4414 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4415 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4416 return 0;
4417
4418 unsigned Opc;
4419 unsigned Imm = 0;
4420
4421 switch (SrcVT.SimpleTy) {
4422 default:
4423 return 0;
4424 case MVT::i1:
4425 return emiti1Ext(SrcReg, DestVT, IsZExt);
4426 case MVT::i8:
4427 if (DestVT == MVT::i64)
4428 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429 else
4430 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4431 Imm = 7;
4432 break;
4433 case MVT::i16:
4434 if (DestVT == MVT::i64)
4435 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4436 else
4437 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4438 Imm = 15;
4439 break;
4440 case MVT::i32:
4441 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4442 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4443 Imm = 31;
4444 break;
4445 }
4446
4447 // Handle i8 and i16 as i32.
4448 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4449 DestVT = MVT::i32;
4450 else if (DestVT == MVT::i64) {
4451 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4453 TII.get(AArch64::SUBREG_TO_REG), Src64)
4454 .addImm(0)
4455 .addReg(SrcReg)
4456 .addImm(AArch64::sub_32);
4457 SrcReg = Src64;
4458 }
4459
4460 const TargetRegisterClass *RC =
4461 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4462 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4463}
4464
4465static bool isZExtLoad(const MachineInstr *LI) {
4466 switch (LI->getOpcode()) {
4467 default:
4468 return false;
4469 case AArch64::LDURBBi:
4470 case AArch64::LDURHHi:
4471 case AArch64::LDURWi:
4472 case AArch64::LDRBBui:
4473 case AArch64::LDRHHui:
4474 case AArch64::LDRWui:
4475 case AArch64::LDRBBroX:
4476 case AArch64::LDRHHroX:
4477 case AArch64::LDRWroX:
4478 case AArch64::LDRBBroW:
4479 case AArch64::LDRHHroW:
4480 case AArch64::LDRWroW:
4481 return true;
4482 }
4483}
4484
4485static bool isSExtLoad(const MachineInstr *LI) {
4486 switch (LI->getOpcode()) {
4487 default:
4488 return false;
4489 case AArch64::LDURSBWi:
4490 case AArch64::LDURSHWi:
4491 case AArch64::LDURSBXi:
4492 case AArch64::LDURSHXi:
4493 case AArch64::LDURSWi:
4494 case AArch64::LDRSBWui:
4495 case AArch64::LDRSHWui:
4496 case AArch64::LDRSBXui:
4497 case AArch64::LDRSHXui:
4498 case AArch64::LDRSWui:
4499 case AArch64::LDRSBWroX:
4500 case AArch64::LDRSHWroX:
4501 case AArch64::LDRSBXroX:
4502 case AArch64::LDRSHXroX:
4503 case AArch64::LDRSWroX:
4504 case AArch64::LDRSBWroW:
4505 case AArch64::LDRSHWroW:
4506 case AArch64::LDRSBXroW:
4507 case AArch64::LDRSHXroW:
4508 case AArch64::LDRSWroW:
4509 return true;
4510 }
4511}
4512
4513bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4514 MVT SrcVT) {
4515 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4516 if (!LI || !LI->hasOneUse())
4517 return false;
4518
4519 // Check if the load instruction has already been selected.
4520 Register Reg = lookUpRegForValue(LI);
4521 if (!Reg)
4522 return false;
4523
4524 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4525 if (!MI)
4526 return false;
4527
4528 // Check if the correct load instruction has been emitted - SelectionDAG might
4529 // have emitted a zero-extending load, but we need a sign-extending load.
4530 bool IsZExt = isa<ZExtInst>(I);
4531 const auto *LoadMI = MI;
4532 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4533 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4534 Register LoadReg = MI->getOperand(1).getReg();
4535 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4536 assert(LoadMI && "Expected valid instruction");
4537 }
4538 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4539 return false;
4540
4541 // Nothing to be done.
4542 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4543 updateValueMap(I, Reg);
4544 return true;
4545 }
4546
4547 if (IsZExt) {
4548 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4550 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4551 .addImm(0)
4552 .addReg(Reg, getKillRegState(true))
4553 .addImm(AArch64::sub_32);
4554 Reg = Reg64;
4555 } else {
4556 assert((MI->getOpcode() == TargetOpcode::COPY &&
4557 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558 "Expected copy instruction");
4559 Reg = MI->getOperand(1).getReg();
4561 removeDeadCode(I, std::next(I));
4562 }
4563 updateValueMap(I, Reg);
4564 return true;
4565}
4566
4567bool AArch64FastISel::selectIntExt(const Instruction *I) {
4568 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4569 "Unexpected integer extend instruction.");
4570 MVT RetVT;
4571 MVT SrcVT;
4572 if (!isTypeSupported(I->getType(), RetVT))
4573 return false;
4574
4575 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576 return false;
4577
4578 // Try to optimize already sign-/zero-extended values from load instructions.
4579 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580 return true;
4581
4582 Register SrcReg = getRegForValue(I->getOperand(0));
4583 if (!SrcReg)
4584 return false;
4585
4586 // Try to optimize already sign-/zero-extended values from function arguments.
4587 bool IsZExt = isa<ZExtInst>(I);
4588 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594 .addImm(0)
4595 .addReg(SrcReg)
4596 .addImm(AArch64::sub_32);
4597 SrcReg = ResultReg;
4598 }
4599
4600 updateValueMap(I, SrcReg);
4601 return true;
4602 }
4603 }
4604
4605 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4606 if (!ResultReg)
4607 return false;
4608
4609 updateValueMap(I, ResultReg);
4610 return true;
4611}
4612
4613bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4614 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4615 if (!DestEVT.isSimple())
4616 return false;
4617
4618 MVT DestVT = DestEVT.getSimpleVT();
4619 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4620 return false;
4621
4622 unsigned DivOpc;
4623 bool Is64bit = (DestVT == MVT::i64);
4624 switch (ISDOpcode) {
4625 default:
4626 return false;
4627 case ISD::SREM:
4628 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4629 break;
4630 case ISD::UREM:
4631 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4632 break;
4633 }
4634 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4635 Register Src0Reg = getRegForValue(I->getOperand(0));
4636 if (!Src0Reg)
4637 return false;
4638
4639 Register Src1Reg = getRegForValue(I->getOperand(1));
4640 if (!Src1Reg)
4641 return false;
4642
4643 const TargetRegisterClass *RC =
4644 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4645 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4646 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4647 // The remainder is computed as numerator - (quotient * denominator) using the
4648 // MSUB instruction.
4649 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4650 updateValueMap(I, ResultReg);
4651 return true;
4652}
4653
4654bool AArch64FastISel::selectMul(const Instruction *I) {
4655 MVT VT;
4656 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4657 return false;
4658
4659 if (VT.isVector())
4660 return selectBinaryOp(I, ISD::MUL);
4661
4662 const Value *Src0 = I->getOperand(0);
4663 const Value *Src1 = I->getOperand(1);
4664 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4665 if (C->getValue().isPowerOf2())
4666 std::swap(Src0, Src1);
4667
4668 // Try to simplify to a shift instruction.
4669 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4670 if (C->getValue().isPowerOf2()) {
4671 uint64_t ShiftVal = C->getValue().logBase2();
4672 MVT SrcVT = VT;
4673 bool IsZExt = true;
4674 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4675 if (!isIntExtFree(ZExt)) {
4676 MVT VT;
4677 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4678 SrcVT = VT;
4679 IsZExt = true;
4680 Src0 = ZExt->getOperand(0);
4681 }
4682 }
4683 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4684 if (!isIntExtFree(SExt)) {
4685 MVT VT;
4686 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4687 SrcVT = VT;
4688 IsZExt = false;
4689 Src0 = SExt->getOperand(0);
4690 }
4691 }
4692 }
4693
4694 Register Src0Reg = getRegForValue(Src0);
4695 if (!Src0Reg)
4696 return false;
4697
4698 unsigned ResultReg =
4699 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4700
4701 if (ResultReg) {
4702 updateValueMap(I, ResultReg);
4703 return true;
4704 }
4705 }
4706
4707 Register Src0Reg = getRegForValue(I->getOperand(0));
4708 if (!Src0Reg)
4709 return false;
4710
4711 Register Src1Reg = getRegForValue(I->getOperand(1));
4712 if (!Src1Reg)
4713 return false;
4714
4715 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4716
4717 if (!ResultReg)
4718 return false;
4719
4720 updateValueMap(I, ResultReg);
4721 return true;
4722}
4723
4724bool AArch64FastISel::selectShift(const Instruction *I) {
4725 MVT RetVT;
4726 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4727 return false;
4728
4729 if (RetVT.isVector())
4730 return selectOperator(I, I->getOpcode());
4731
4732 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4733 unsigned ResultReg = 0;
4734 uint64_t ShiftVal = C->getZExtValue();
4735 MVT SrcVT = RetVT;
4736 bool IsZExt = I->getOpcode() != Instruction::AShr;
4737 const Value *Op0 = I->getOperand(0);
4738 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4739 if (!isIntExtFree(ZExt)) {
4740 MVT TmpVT;
4741 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4742 SrcVT = TmpVT;
4743 IsZExt = true;
4744 Op0 = ZExt->getOperand(0);
4745 }
4746 }
4747 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4748 if (!isIntExtFree(SExt)) {
4749 MVT TmpVT;
4750 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4751 SrcVT = TmpVT;
4752 IsZExt = false;
4753 Op0 = SExt->getOperand(0);
4754 }
4755 }
4756 }
4757
4758 Register Op0Reg = getRegForValue(Op0);
4759 if (!Op0Reg)
4760 return false;
4761
4762 switch (I->getOpcode()) {
4763 default: llvm_unreachable("Unexpected instruction.");
4764 case Instruction::Shl:
4765 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4766 break;
4767 case Instruction::AShr:
4768 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4769 break;
4770 case Instruction::LShr:
4771 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4772 break;
4773 }
4774 if (!ResultReg)
4775 return false;
4776
4777 updateValueMap(I, ResultReg);
4778 return true;
4779 }
4780
4781 Register Op0Reg = getRegForValue(I->getOperand(0));
4782 if (!Op0Reg)
4783 return false;
4784
4785 Register Op1Reg = getRegForValue(I->getOperand(1));
4786 if (!Op1Reg)
4787 return false;
4788
4789 unsigned ResultReg = 0;
4790 switch (I->getOpcode()) {
4791 default: llvm_unreachable("Unexpected instruction.");
4792 case Instruction::Shl:
4793 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4794 break;
4795 case Instruction::AShr:
4796 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4797 break;
4798 case Instruction::LShr:
4799 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4800 break;
4801 }
4802
4803 if (!ResultReg)
4804 return false;
4805
4806 updateValueMap(I, ResultReg);
4807 return true;
4808}
4809
4810bool AArch64FastISel::selectBitCast(const Instruction *I) {
4811 MVT RetVT, SrcVT;
4812
4813 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4814 return false;
4815 if (!isTypeLegal(I->getType(), RetVT))
4816 return false;
4817
4818 unsigned Opc;
4819 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4820 Opc = AArch64::FMOVWSr;
4821 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4822 Opc = AArch64::FMOVXDr;
4823 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4824 Opc = AArch64::FMOVSWr;
4825 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4826 Opc = AArch64::FMOVDXr;
4827 else
4828 return false;
4829
4830 const TargetRegisterClass *RC = nullptr;
4831 switch (RetVT.SimpleTy) {
4832 default: llvm_unreachable("Unexpected value type.");
4833 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4834 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4835 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4836 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4837 }
4838 Register Op0Reg = getRegForValue(I->getOperand(0));
4839 if (!Op0Reg)
4840 return false;
4841
4842 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4843 if (!ResultReg)
4844 return false;
4845
4846 updateValueMap(I, ResultReg);
4847 return true;
4848}
4849
4850bool AArch64FastISel::selectFRem(const Instruction *I) {
4851 MVT RetVT;
4852 if (!isTypeLegal(I->getType(), RetVT))
4853 return false;
4854
4855 RTLIB::Libcall LC;
4856 switch (RetVT.SimpleTy) {
4857 default:
4858 return false;
4859 case MVT::f32:
4860 LC = RTLIB::REM_F32;
4861 break;
4862 case MVT::f64:
4863 LC = RTLIB::REM_F64;
4864 break;
4865 }
4866
4867 ArgListTy Args;
4868 Args.reserve(I->getNumOperands());
4869
4870 // Populate the argument list.
4871 for (auto &Arg : I->operands()) {
4872 ArgListEntry Entry;
4873 Entry.Val = Arg;
4874 Entry.Ty = Arg->getType();
4875 Args.push_back(Entry);
4876 }
4877
4878 CallLoweringInfo CLI;
4879 MCContext &Ctx = MF->getContext();
4880 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4881 TLI.getLibcallName(LC), std::move(Args));
4882 if (!lowerCallTo(CLI))
4883 return false;
4884 updateValueMap(I, CLI.ResultReg);
4885 return true;
4886}
4887
4888bool AArch64FastISel::selectSDiv(const Instruction *I) {
4889 MVT VT;
4890 if (!isTypeLegal(I->getType(), VT))
4891 return false;
4892
4893 if (!isa<ConstantInt>(I->getOperand(1)))
4894 return selectBinaryOp(I, ISD::SDIV);
4895
4896 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4897 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4898 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4899 return selectBinaryOp(I, ISD::SDIV);
4900
4901 unsigned Lg2 = C.countr_zero();
4902 Register Src0Reg = getRegForValue(I->getOperand(0));
4903 if (!Src0Reg)
4904 return false;
4905
4906 if (cast<BinaryOperator>(I)->isExact()) {
4907 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4908 if (!ResultReg)
4909 return false;
4910 updateValueMap(I, ResultReg);
4911 return true;
4912 }
4913
4914 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4915 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4916 if (!AddReg)
4917 return false;
4918
4919 // (Src0 < 0) ? Pow2 - 1 : 0;
4920 if (!emitICmp_ri(VT, Src0Reg, 0))
4921 return false;
4922
4923 unsigned SelectOpc;
4924 const TargetRegisterClass *RC;
4925 if (VT == MVT::i64) {
4926 SelectOpc = AArch64::CSELXr;
4927 RC = &AArch64::GPR64RegClass;
4928 } else {
4929 SelectOpc = AArch64::CSELWr;
4930 RC = &AArch64::GPR32RegClass;
4931 }
4932 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4934 if (!SelectReg)
4935 return false;
4936
4937 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4938 // negate the result.
4939 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4940 unsigned ResultReg;
4941 if (C.isNegative())
4942 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4943 AArch64_AM::ASR, Lg2);
4944 else
4945 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4946
4947 if (!ResultReg)
4948 return false;
4949
4950 updateValueMap(I, ResultReg);
4951 return true;
4952}
4953
4954/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4955/// have to duplicate it for AArch64, because otherwise we would fail during the
4956/// sign-extend emission.
4957unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4958 Register IdxN = getRegForValue(Idx);
4959 if (IdxN == 0)
4960 // Unhandled operand. Halt "fast" selection and bail.
4961 return 0;
4962
4963 // If the index is smaller or larger than intptr_t, truncate or extend it.
4964 MVT PtrVT = TLI.getPointerTy(DL);
4965 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4966 if (IdxVT.bitsLT(PtrVT)) {
4967 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4968 } else if (IdxVT.bitsGT(PtrVT))
4969 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4970 return IdxN;
4971}
4972
4973/// This is mostly a copy of the existing FastISel GEP code, but we have to
4974/// duplicate it for AArch64, because otherwise we would bail out even for
4975/// simple cases. This is because the standard fastEmit functions don't cover
4976/// MUL at all and ADD is lowered very inefficientily.
4977bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4978 if (Subtarget->isTargetILP32())
4979 return false;
4980
4981 Register N = getRegForValue(I->getOperand(0));
4982 if (!N)
4983 return false;
4984
4985 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4986 // into a single N = N + TotalOffset.
4987 uint64_t TotalOffs = 0;
4988 MVT VT = TLI.getPointerTy(DL);
4990 GTI != E; ++GTI) {
4991 const Value *Idx = GTI.getOperand();
4992 if (auto *StTy = GTI.getStructTypeOrNull()) {
4993 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4994 // N = N + Offset
4995 if (Field)
4996 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4997 } else {
4998 // If this is a constant subscript, handle it quickly.
4999 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5000 if (CI->isZero())
5001 continue;
5002 // N = N + Offset
5003 TotalOffs += GTI.getSequentialElementStride(DL) *
5004 cast<ConstantInt>(CI)->getSExtValue();
5005 continue;
5006 }
5007 if (TotalOffs) {
5008 N = emitAdd_ri_(VT, N, TotalOffs);
5009 if (!N)
5010 return false;
5011 TotalOffs = 0;
5012 }
5013
5014 // N = N + Idx * ElementSize;
5015 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5016 unsigned IdxN = getRegForGEPIndex(Idx);
5017 if (!IdxN)
5018 return false;
5019
5020 if (ElementSize != 1) {
5021 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5022 if (!C)
5023 return false;
5024 IdxN = emitMul_rr(VT, IdxN, C);
5025 if (!IdxN)
5026 return false;
5027 }
5028 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5029 if (!N)
5030 return false;
5031 }
5032 }
5033 if (TotalOffs) {
5034 N = emitAdd_ri_(VT, N, TotalOffs);
5035 if (!N)
5036 return false;
5037 }
5038 updateValueMap(I, N);
5039 return true;
5040}
5041
5042bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5043 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5044 "cmpxchg survived AtomicExpand at optlevel > -O0");
5045
5046 auto *RetPairTy = cast<StructType>(I->getType());
5047 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5048 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5049 "cmpxchg has a non-i1 status result");
5050
5051 MVT VT;
5052 if (!isTypeLegal(RetTy, VT))
5053 return false;
5054
5055 const TargetRegisterClass *ResRC;
5056 unsigned Opc, CmpOpc;
5057 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5058 // extractvalue selection doesn't support that.
5059 if (VT == MVT::i32) {
5060 Opc = AArch64::CMP_SWAP_32;
5061 CmpOpc = AArch64::SUBSWrs;
5062 ResRC = &AArch64::GPR32RegClass;
5063 } else if (VT == MVT::i64) {
5064 Opc = AArch64::CMP_SWAP_64;
5065 CmpOpc = AArch64::SUBSXrs;
5066 ResRC = &AArch64::GPR64RegClass;
5067 } else {
5068 return false;
5069 }
5070
5071 const MCInstrDesc &II = TII.get(Opc);
5072
5073 const Register AddrReg = constrainOperandRegClass(
5074 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5075 const Register DesiredReg = constrainOperandRegClass(
5076 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5077 const Register NewReg = constrainOperandRegClass(
5078 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5079
5080 const Register ResultReg1 = createResultReg(ResRC);
5081 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5082 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5083
5084 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5086 .addDef(ResultReg1)
5087 .addDef(ScratchReg)
5088 .addUse(AddrReg)
5089 .addUse(DesiredReg)
5090 .addUse(NewReg);
5091
5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5093 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5094 .addUse(ResultReg1)
5095 .addUse(DesiredReg)
5096 .addImm(0);
5097
5098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5099 .addDef(ResultReg2)
5100 .addUse(AArch64::WZR)
5101 .addUse(AArch64::WZR)
5103
5104 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5105 updateValueMap(I, ResultReg1, 2);
5106 return true;
5107}
5108
5109bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5110 if (TLI.fallBackToDAGISel(*I))
5111 return false;
5112 switch (I->getOpcode()) {
5113 default:
5114 break;
5115 case Instruction::Add:
5116 case Instruction::Sub:
5117 return selectAddSub(I);
5118 case Instruction::Mul:
5119 return selectMul(I);
5120 case Instruction::SDiv:
5121 return selectSDiv(I);
5122 case Instruction::SRem:
5123 if (!selectBinaryOp(I, ISD::SREM))
5124 return selectRem(I, ISD::SREM);
5125 return true;
5126 case Instruction::URem:
5127 if (!selectBinaryOp(I, ISD::UREM))
5128 return selectRem(I, ISD::UREM);
5129 return true;
5130 case Instruction::Shl:
5131 case Instruction::LShr:
5132 case Instruction::AShr:
5133 return selectShift(I);
5134 case Instruction::And:
5135 case Instruction::Or:
5136 case Instruction::Xor:
5137 return selectLogicalOp(I);
5138 case Instruction::Br:
5139 return selectBranch(I);
5140 case Instruction::IndirectBr:
5141 return selectIndirectBr(I);
5142 case Instruction::BitCast:
5144 return selectBitCast(I);
5145 return true;
5146 case Instruction::FPToSI:
5147 if (!selectCast(I, ISD::FP_TO_SINT))
5148 return selectFPToInt(I, /*Signed=*/true);
5149 return true;
5150 case Instruction::FPToUI:
5151 return selectFPToInt(I, /*Signed=*/false);
5152 case Instruction::ZExt:
5153 case Instruction::SExt:
5154 return selectIntExt(I);
5155 case Instruction::Trunc:
5156 if (!selectCast(I, ISD::TRUNCATE))
5157 return selectTrunc(I);
5158 return true;
5159 case Instruction::FPExt:
5160 return selectFPExt(I);
5161 case Instruction::FPTrunc:
5162 return selectFPTrunc(I);
5163 case Instruction::SIToFP:
5164 if (!selectCast(I, ISD::SINT_TO_FP))
5165 return selectIntToFP(I, /*Signed=*/true);
5166 return true;
5167 case Instruction::UIToFP:
5168 return selectIntToFP(I, /*Signed=*/false);
5169 case Instruction::Load:
5170 return selectLoad(I);
5171 case Instruction::Store:
5172 return selectStore(I);
5173 case Instruction::FCmp:
5174 case Instruction::ICmp:
5175 return selectCmp(I);
5176 case Instruction::Select:
5177 return selectSelect(I);
5178 case Instruction::Ret:
5179 return selectRet(I);
5180 case Instruction::FRem:
5181 return selectFRem(I);
5182 case Instruction::GetElementPtr:
5183 return selectGetElementPtr(I);
5184 case Instruction::AtomicCmpXchg:
5185 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5186 }
5187
5188 // fall-back to target-independent instruction selection.
5189 return selectOperator(I, I->getOpcode());
5190}
5191
5193 const TargetLibraryInfo *LibInfo) {
5194
5195 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5196 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5197 CallerAttrs.hasStreamingInterfaceOrBody() ||
5198 CallerAttrs.hasStreamingCompatibleInterface())
5199 return nullptr;
5200 return new AArch64FastISel(FuncInfo, LibInfo);
5201}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: