LLVM 23.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
20#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 enum BaseKind { RegBase, FrameIndexBase };
85
86 private:
87 BaseKind Kind = RegBase;
89 union {
90 unsigned Reg;
91 int FI;
92 } Base;
93 Register OffsetReg;
94 unsigned Shift = 0;
95 int64_t Offset = 0;
96 const GlobalValue *GV = nullptr;
97
98 public:
99 Address() { Base.Reg = 0; }
100
101 void setKind(BaseKind K) { Kind = K; }
102 BaseKind getKind() const { return Kind; }
103 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
104 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
105 bool isRegBase() const { return Kind == RegBase; }
106 bool isFIBase() const { return Kind == FrameIndexBase; }
107
108 void setReg(Register Reg) {
109 assert(isRegBase() && "Invalid base register access!");
110 Base.Reg = Reg.id();
111 }
112
113 Register getReg() const {
114 assert(isRegBase() && "Invalid base register access!");
115 return Base.Reg;
116 }
117
118 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
119
120 Register getOffsetReg() const { return OffsetReg; }
121
122 void setFI(unsigned FI) {
123 assert(isFIBase() && "Invalid base frame index access!");
124 Base.FI = FI;
125 }
126
127 unsigned getFI() const {
128 assert(isFIBase() && "Invalid base frame index access!");
129 return Base.FI;
130 }
131
132 void setOffset(int64_t O) { Offset = O; }
133 int64_t getOffset() { return Offset; }
134 void setShift(unsigned S) { Shift = S; }
135 unsigned getShift() { return Shift; }
136
137 void setGlobalValue(const GlobalValue *G) { GV = G; }
138 const GlobalValue *getGlobalValue() { return GV; }
139 };
140
141 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
142 /// make the right decision when generating code for different targets.
143 const AArch64Subtarget *Subtarget;
144 LLVMContext *Context;
145
146 bool fastLowerArguments() override;
147 bool fastLowerCall(CallLoweringInfo &CLI) override;
148 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
149
150private:
151 // Selection routines.
152 bool selectAddSub(const Instruction *I);
153 bool selectLogicalOp(const Instruction *I);
154 bool selectLoad(const Instruction *I);
155 bool selectStore(const Instruction *I);
156 bool selectBranch(const Instruction *I);
157 bool selectIndirectBr(const Instruction *I);
158 bool selectCmp(const Instruction *I);
159 bool selectSelect(const Instruction *I);
160 bool selectFPExt(const Instruction *I);
161 bool selectFPTrunc(const Instruction *I);
162 bool selectFPToInt(const Instruction *I, bool Signed);
163 bool selectIntToFP(const Instruction *I, bool Signed);
164 bool selectRem(const Instruction *I, unsigned ISDOpcode);
165 bool selectRet(const Instruction *I);
166 bool selectTrunc(const Instruction *I);
167 bool selectIntExt(const Instruction *I);
168 bool selectMul(const Instruction *I);
169 bool selectShift(const Instruction *I);
170 bool selectBitCast(const Instruction *I);
171 bool selectFRem(const Instruction *I);
172 bool selectSDiv(const Instruction *I);
173 bool selectGetElementPtr(const Instruction *I);
174 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
175
176 // Utility helper routines.
177 bool isTypeLegal(Type *Ty, MVT &VT);
178 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
179 bool isValueAvailable(const Value *V) const;
180 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
181 bool computeCallAddress(const Value *V, Address &Addr);
182 bool simplifyAddress(Address &Addr, MVT VT);
183 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
185 unsigned ScaleFactor, MachineMemOperand *MMO);
186 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
187 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
188 MaybeAlign Alignment);
189 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
190 const Value *Cond);
191 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
192 bool optimizeSelect(const SelectInst *SI);
193 Register getRegForGEPIndex(const Value *Idx);
194
195 // Emit helper routines.
196 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
197 const Value *RHS, bool SetFlags = false,
198 bool WantResult = true, bool IsZExt = false);
199 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
200 Register RHSReg, bool SetFlags = false,
201 bool WantResult = true);
202 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
203 bool SetFlags = false, bool WantResult = true);
204 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
205 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
206 uint64_t ShiftImm, bool SetFlags = false,
207 bool WantResult = true);
208 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
210 uint64_t ShiftImm, bool SetFlags = false,
211 bool WantResult = true);
212
213 // Emit functions.
214 bool emitCompareAndBranch(const CondBrInst *BI);
215 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
216 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
218 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
219 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
220 MachineMemOperand *MMO = nullptr);
221 bool emitStore(MVT VT, Register SrcReg, Address Addr,
222 MachineMemOperand *MMO = nullptr);
223 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
224 MachineMemOperand *MMO = nullptr);
225 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
227 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
228 bool SetFlags = false, bool WantResult = true,
229 bool IsZExt = false);
230 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
231 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
232 bool SetFlags = false, bool WantResult = true,
233 bool IsZExt = false);
234 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
235 bool WantResult = true);
236 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
237 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
238 bool WantResult = true);
239 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
240 const Value *RHS);
241 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
242 uint64_t Imm);
243 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
244 Register RHSReg, uint64_t ShiftImm);
245 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
246 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
247 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
250 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
251 bool IsZExt = true);
252 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = false);
258
259 Register materializeInt(const ConstantInt *CI, MVT VT);
260 Register materializeFP(const ConstantFP *CFP, MVT VT);
261 Register materializeGV(const GlobalValue *GV);
262
263 // Call handling routines.
264private:
265 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
266 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
267 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
268 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
269
270public:
271 // Backend specific FastISel code.
272 Register fastMaterializeAlloca(const AllocaInst *AI) override;
273 Register fastMaterializeConstant(const Constant *C) override;
274 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
275
276 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
277 const TargetLibraryInfo *LibInfo,
278 const LibcallLoweringInfo *libcallLowering)
279 : FastISel(FuncInfo, LibInfo, libcallLowering,
280 /*SkipTargetIndependentISel=*/true) {
281 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
282 Context = &FuncInfo.Fn->getContext();
283 }
284
285 bool fastSelectInstruction(const Instruction *I) override;
286
287#include "AArch64GenFastISel.inc"
288};
289
290} // end anonymous namespace
291
292/// Check if the sign-/zero-extend will be a noop.
293static bool isIntExtFree(const Instruction *I) {
295 "Unexpected integer extend instruction.");
296 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
297 "Unexpected value type.");
298 bool IsZExt = isa<ZExtInst>(I);
299
300 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
301 if (LI->hasOneUse())
302 return true;
303
304 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
305 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
306 return true;
307
308 return false;
309}
310
311/// Determine the implicit scale factor that is applied by a memory
312/// operation for a given value type.
313static unsigned getImplicitScaleFactor(MVT VT) {
314 switch (VT.SimpleTy) {
315 default:
316 return 0; // invalid
317 case MVT::i1: // fall-through
318 case MVT::i8:
319 return 1;
320 case MVT::i16:
321 return 2;
322 case MVT::i32: // fall-through
323 case MVT::f32:
324 return 4;
325 case MVT::i64: // fall-through
326 case MVT::f64:
327 return 8;
328 }
329}
330
331CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
332 if (CC == CallingConv::GHC)
333 return CC_AArch64_GHC;
334 if (CC == CallingConv::CFGuard_Check)
336 if (Subtarget->isTargetDarwin())
338 if (Subtarget->isTargetWindows())
339 return CC_AArch64_Win64PCS;
340 return CC_AArch64_AAPCS;
341}
342
343Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
344 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
345 "Alloca should always return a pointer.");
346
347 // Don't handle dynamic allocas.
348 auto SI = FuncInfo.StaticAllocaMap.find(AI);
349 if (SI == FuncInfo.StaticAllocaMap.end())
350 return Register();
351
352 if (SI != FuncInfo.StaticAllocaMap.end()) {
353 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
354 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
355 ResultReg)
356 .addFrameIndex(SI->second)
357 .addImm(0)
358 .addImm(0);
359 return ResultReg;
360 }
361
362 return Register();
363}
364
365Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
366 if (VT > MVT::i64)
367 return Register();
368
369 if (!CI->isZero())
370 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
371
372 // Create a copy from the zero register to materialize a "0" value.
373 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
374 : &AArch64::GPR32RegClass;
375 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
376 Register ResultReg = createResultReg(RC);
377 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
378 ResultReg).addReg(ZeroReg, getKillRegState(true));
379 return ResultReg;
380}
381
382Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
383 // Positive zero (+0.0) has to be materialized with a fmov from the zero
384 // register, because the immediate version of fmov cannot encode zero.
385 if (CFP->isNullValue())
386 return fastMaterializeFloatZero(CFP);
387
388 if (VT != MVT::f32 && VT != MVT::f64)
389 return Register();
390
391 const APFloat Val = CFP->getValueAPF();
392 bool Is64Bit = (VT == MVT::f64);
393 // This checks to see if we can use FMOV instructions to materialize
394 // a constant, otherwise we have to materialize via the constant pool.
395 int Imm =
396 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
397 if (Imm != -1) {
398 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
399 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
400 }
401
402 // For the large code model materialize the FP constant in code.
403 if (TM.getCodeModel() == CodeModel::Large) {
404 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
405 const TargetRegisterClass *RC = Is64Bit ?
406 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
407
408 Register TmpReg = createResultReg(RC);
409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
410 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
411
412 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
413 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
414 TII.get(TargetOpcode::COPY), ResultReg)
415 .addReg(TmpReg, getKillRegState(true));
416
417 return ResultReg;
418 }
419
420 // Materialize via constant pool. MachineConstantPool wants an explicit
421 // alignment.
422 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
423
424 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
425 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
428
429 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
430 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
432 .addReg(ADRPReg)
434 return ResultReg;
435}
436
437Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
438 // We can't handle thread-local variables quickly yet.
439 if (GV->isThreadLocal())
440 return Register();
441
442 // MachO still uses GOT for large code-model accesses, but ELF requires
443 // movz/movk sequences, which FastISel doesn't handle yet.
444 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
445 return Register();
446
447 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
448 return Register();
449
450 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
451
452 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
453 if (!DestEVT.isSimple())
454 return Register();
455
456 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
457 Register ResultReg;
458
459 if (OpFlags & AArch64II::MO_GOT) {
460 // ADRP + LDRX
461 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
462 ADRPReg)
463 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
464
465 unsigned LdrOpc;
466 if (Subtarget->isTargetILP32()) {
467 ResultReg = createResultReg(&AArch64::GPR32RegClass);
468 LdrOpc = AArch64::LDRWui;
469 } else {
470 ResultReg = createResultReg(&AArch64::GPR64RegClass);
471 LdrOpc = AArch64::LDRXui;
472 }
473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
474 ResultReg)
475 .addReg(ADRPReg)
477 AArch64II::MO_NC | OpFlags);
478 if (!Subtarget->isTargetILP32())
479 return ResultReg;
480
481 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
482 // so we must extend the result on ILP32.
483 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
485 TII.get(TargetOpcode::SUBREG_TO_REG))
486 .addDef(Result64)
487 .addReg(ResultReg, RegState::Kill)
488 .addImm(AArch64::sub_32);
489 return Result64;
490 } else {
491 // ADRP + ADDX
492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
493 ADRPReg)
494 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
495
496 if (OpFlags & AArch64II::MO_TAGGED) {
497 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
498 // We do so by creating a MOVK that sets bits 48-63 of the register to
499 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
500 // the small code model so we can assume a binary size of <= 4GB, which
501 // makes the untagged PC relative offset positive. The binary must also be
502 // loaded into address range [0, 2^48). Both of these properties need to
503 // be ensured at runtime when using tagged addresses.
504 //
505 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
506 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
507 // are not exactly 1:1 with FastISel so we cannot easily abstract this
508 // out. At some point, it would be nice to find a way to not have this
509 // duplicate code.
510 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
512 DstReg)
513 .addReg(ADRPReg)
514 .addGlobalAddress(GV, /*Offset=*/0x100000000,
516 .addImm(48);
517 ADRPReg = DstReg;
518 }
519
520 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
522 ResultReg)
523 .addReg(ADRPReg)
524 .addGlobalAddress(GV, 0,
526 .addImm(0);
527 }
528 return ResultReg;
529}
530
531Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
532 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
533
534 // Only handle simple types.
535 if (!CEVT.isSimple())
536 return Register();
537 MVT VT = CEVT.getSimpleVT();
538 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
539 // 'null' pointers need to have a somewhat special treatment.
541 if (C->getType()->isVectorTy())
542 return Register();
543 assert(VT == MVT::i64 && "Expected 64-bit pointers");
544 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
545 }
546
547 if (const auto *CI = dyn_cast<ConstantInt>(C))
548 return materializeInt(CI, VT);
549 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
550 return materializeFP(CFP, VT);
551 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
552 return materializeGV(GV);
553
554 return Register();
555}
556
557Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
558 assert(CFP->isNullValue() &&
559 "Floating-point constant is not a positive zero.");
560 MVT VT;
561 if (!isTypeLegal(CFP->getType(), VT))
562 return Register();
563
564 if (VT != MVT::f32 && VT != MVT::f64)
565 return Register();
566
567 bool Is64Bit = (VT == MVT::f64);
568 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
569 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
570 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
571}
572
573/// Check if the multiply is by a power-of-2 constant.
574static bool isMulPowOf2(const Value *I) {
575 if (const auto *MI = dyn_cast<MulOperator>(I)) {
576 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
577 if (C->getValue().isPowerOf2())
578 return true;
579 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
580 if (C->getValue().isPowerOf2())
581 return true;
582 }
583 return false;
584}
585
586// Computes the address to get to an object.
587bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
588{
589 const User *U = nullptr;
590 unsigned Opcode = Instruction::UserOp1;
591 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
592 // Don't walk into other basic blocks unless the object is an alloca from
593 // another block, otherwise it may not have a virtual register assigned.
594 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
595 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
596 Opcode = I->getOpcode();
597 U = I;
598 }
599 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
600 Opcode = C->getOpcode();
601 U = C;
602 }
603
604 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
605 if (Ty->getAddressSpace() > 255)
606 // Fast instruction selection doesn't support the special
607 // address spaces.
608 return false;
609
610 switch (Opcode) {
611 default:
612 break;
613 case Instruction::BitCast:
614 // Look through bitcasts.
615 return computeAddress(U->getOperand(0), Addr, Ty);
616
617 case Instruction::IntToPtr:
618 // Look past no-op inttoptrs.
619 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
620 TLI.getPointerTy(DL))
621 return computeAddress(U->getOperand(0), Addr, Ty);
622 break;
623
624 case Instruction::PtrToInt:
625 // Look past no-op ptrtoints.
626 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
627 return computeAddress(U->getOperand(0), Addr, Ty);
628 break;
629
630 case Instruction::GetElementPtr: {
631 Address SavedAddr = Addr;
632 uint64_t TmpOffset = Addr.getOffset();
633
634 // Iterate through the GEP folding the constants into offsets where
635 // we can.
637 GTI != E; ++GTI) {
638 const Value *Op = GTI.getOperand();
639 if (StructType *STy = GTI.getStructTypeOrNull()) {
640 const StructLayout *SL = DL.getStructLayout(STy);
641 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
642 TmpOffset += SL->getElementOffset(Idx);
643 } else {
644 uint64_t S = GTI.getSequentialElementStride(DL);
645 while (true) {
646 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
647 // Constant-offset addressing.
648 TmpOffset += CI->getSExtValue() * S;
649 break;
650 }
651 if (canFoldAddIntoGEP(U, Op)) {
652 // A compatible add with a constant operand. Fold the constant.
653 ConstantInt *CI =
654 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
655 TmpOffset += CI->getSExtValue() * S;
656 // Iterate on the other operand.
657 Op = cast<AddOperator>(Op)->getOperand(0);
658 continue;
659 }
660 // Unsupported
661 goto unsupported_gep;
662 }
663 }
664 }
665
666 // Try to grab the base operand now.
667 Addr.setOffset(TmpOffset);
668 if (computeAddress(U->getOperand(0), Addr, Ty))
669 return true;
670
671 // We failed, restore everything and try the other options.
672 Addr = SavedAddr;
673
674 unsupported_gep:
675 break;
676 }
677 case Instruction::Alloca: {
678 const AllocaInst *AI = cast<AllocaInst>(Obj);
679 auto SI = FuncInfo.StaticAllocaMap.find(AI);
680 if (SI != FuncInfo.StaticAllocaMap.end()) {
681 Addr.setKind(Address::FrameIndexBase);
682 Addr.setFI(SI->second);
683 return true;
684 }
685 break;
686 }
687 case Instruction::Add: {
688 // Adds of constants are common and easy enough.
689 const Value *LHS = U->getOperand(0);
690 const Value *RHS = U->getOperand(1);
691
693 std::swap(LHS, RHS);
694
695 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
696 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
697 return computeAddress(LHS, Addr, Ty);
698 }
699
700 Address Backup = Addr;
701 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
702 return true;
703 Addr = Backup;
704
705 break;
706 }
707 case Instruction::Sub: {
708 // Subs of constants are common and easy enough.
709 const Value *LHS = U->getOperand(0);
710 const Value *RHS = U->getOperand(1);
711
712 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
713 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
714 return computeAddress(LHS, Addr, Ty);
715 }
716 break;
717 }
718 case Instruction::Shl: {
719 if (Addr.getOffsetReg())
720 break;
721
722 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
723 if (!CI)
724 break;
725
726 unsigned Val = CI->getZExtValue();
727 if (Val < 1 || Val > 3)
728 break;
729
730 uint64_t NumBytes = 0;
731 if (Ty && Ty->isSized()) {
732 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
733 NumBytes = NumBits / 8;
734 if (!isPowerOf2_64(NumBits))
735 NumBytes = 0;
736 }
737
738 if (NumBytes != (1ULL << Val))
739 break;
740
741 Addr.setShift(Val);
742 Addr.setExtendType(AArch64_AM::LSL);
743
744 const Value *Src = U->getOperand(0);
745 if (const auto *I = dyn_cast<Instruction>(Src)) {
746 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
747 // Fold the zext or sext when it won't become a noop.
748 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
749 if (!isIntExtFree(ZE) &&
750 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
751 Addr.setExtendType(AArch64_AM::UXTW);
752 Src = ZE->getOperand(0);
753 }
754 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
755 if (!isIntExtFree(SE) &&
756 SE->getOperand(0)->getType()->isIntegerTy(32)) {
757 Addr.setExtendType(AArch64_AM::SXTW);
758 Src = SE->getOperand(0);
759 }
760 }
761 }
762 }
763
764 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
765 if (AI->getOpcode() == Instruction::And) {
766 const Value *LHS = AI->getOperand(0);
767 const Value *RHS = AI->getOperand(1);
768
769 if (const auto *C = dyn_cast<ConstantInt>(LHS))
770 if (C->getValue() == 0xffffffff)
771 std::swap(LHS, RHS);
772
773 if (const auto *C = dyn_cast<ConstantInt>(RHS))
774 if (C->getValue() == 0xffffffff) {
775 Addr.setExtendType(AArch64_AM::UXTW);
776 Register Reg = getRegForValue(LHS);
777 if (!Reg)
778 return false;
779 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
780 Addr.setOffsetReg(Reg);
781 return true;
782 }
783 }
784
785 Register Reg = getRegForValue(Src);
786 if (!Reg)
787 return false;
788 Addr.setOffsetReg(Reg);
789 return true;
790 }
791 case Instruction::Mul: {
792 if (Addr.getOffsetReg())
793 break;
794
795 if (!isMulPowOf2(U))
796 break;
797
798 const Value *LHS = U->getOperand(0);
799 const Value *RHS = U->getOperand(1);
800
801 // Canonicalize power-of-2 value to the RHS.
802 if (const auto *C = dyn_cast<ConstantInt>(LHS))
803 if (C->getValue().isPowerOf2())
804 std::swap(LHS, RHS);
805
806 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
807 const auto *C = cast<ConstantInt>(RHS);
808 unsigned Val = C->getValue().logBase2();
809 if (Val < 1 || Val > 3)
810 break;
811
812 uint64_t NumBytes = 0;
813 if (Ty && Ty->isSized()) {
814 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
815 NumBytes = NumBits / 8;
816 if (!isPowerOf2_64(NumBits))
817 NumBytes = 0;
818 }
819
820 if (NumBytes != (1ULL << Val))
821 break;
822
823 Addr.setShift(Val);
824 Addr.setExtendType(AArch64_AM::LSL);
825
826 const Value *Src = LHS;
827 if (const auto *I = dyn_cast<Instruction>(Src)) {
828 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
829 // Fold the zext or sext when it won't become a noop.
830 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
831 if (!isIntExtFree(ZE) &&
832 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
833 Addr.setExtendType(AArch64_AM::UXTW);
834 Src = ZE->getOperand(0);
835 }
836 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
837 if (!isIntExtFree(SE) &&
838 SE->getOperand(0)->getType()->isIntegerTy(32)) {
839 Addr.setExtendType(AArch64_AM::SXTW);
840 Src = SE->getOperand(0);
841 }
842 }
843 }
844 }
845
846 Register Reg = getRegForValue(Src);
847 if (!Reg)
848 return false;
849 Addr.setOffsetReg(Reg);
850 return true;
851 }
852 case Instruction::And: {
853 if (Addr.getOffsetReg())
854 break;
855
856 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
857 break;
858
859 const Value *LHS = U->getOperand(0);
860 const Value *RHS = U->getOperand(1);
861
862 if (const auto *C = dyn_cast<ConstantInt>(LHS))
863 if (C->getValue() == 0xffffffff)
864 std::swap(LHS, RHS);
865
866 if (const auto *C = dyn_cast<ConstantInt>(RHS))
867 if (C->getValue() == 0xffffffff) {
868 Addr.setShift(0);
869 Addr.setExtendType(AArch64_AM::LSL);
870 Addr.setExtendType(AArch64_AM::UXTW);
871
872 Register Reg = getRegForValue(LHS);
873 if (!Reg)
874 return false;
875 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
876 Addr.setOffsetReg(Reg);
877 return true;
878 }
879 break;
880 }
881 case Instruction::SExt:
882 case Instruction::ZExt: {
883 if (!Addr.getReg() || Addr.getOffsetReg())
884 break;
885
886 const Value *Src = nullptr;
887 // Fold the zext or sext when it won't become a noop.
888 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
889 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
890 Addr.setExtendType(AArch64_AM::UXTW);
891 Src = ZE->getOperand(0);
892 }
893 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
894 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
895 Addr.setExtendType(AArch64_AM::SXTW);
896 Src = SE->getOperand(0);
897 }
898 }
899
900 if (!Src)
901 break;
902
903 Addr.setShift(0);
904 Register Reg = getRegForValue(Src);
905 if (!Reg)
906 return false;
907 Addr.setOffsetReg(Reg);
908 return true;
909 }
910 } // end switch
911
912 if (Addr.isRegBase() && !Addr.getReg()) {
913 Register Reg = getRegForValue(Obj);
914 if (!Reg)
915 return false;
916 Addr.setReg(Reg);
917 return true;
918 }
919
920 if (!Addr.getOffsetReg()) {
921 Register Reg = getRegForValue(Obj);
922 if (!Reg)
923 return false;
924 Addr.setOffsetReg(Reg);
925 return true;
926 }
927
928 return false;
929}
930
931bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
932 const User *U = nullptr;
933 unsigned Opcode = Instruction::UserOp1;
934 bool InMBB = true;
935
936 if (const auto *I = dyn_cast<Instruction>(V)) {
937 Opcode = I->getOpcode();
938 U = I;
939 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
940 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
941 Opcode = C->getOpcode();
942 U = C;
943 }
944
945 switch (Opcode) {
946 default: break;
947 case Instruction::BitCast:
948 // Look past bitcasts if its operand is in the same BB.
949 if (InMBB)
950 return computeCallAddress(U->getOperand(0), Addr);
951 break;
952 case Instruction::IntToPtr:
953 // Look past no-op inttoptrs if its operand is in the same BB.
954 if (InMBB &&
955 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
956 TLI.getPointerTy(DL))
957 return computeCallAddress(U->getOperand(0), Addr);
958 break;
959 case Instruction::PtrToInt:
960 // Look past no-op ptrtoints if its operand is in the same BB.
961 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
962 return computeCallAddress(U->getOperand(0), Addr);
963 break;
964 }
965
966 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
967 Addr.setGlobalValue(GV);
968 return true;
969 }
970
971 // If all else fails, try to materialize the value in a register.
972 if (!Addr.getGlobalValue()) {
973 Addr.setReg(getRegForValue(V));
974 return Addr.getReg().isValid();
975 }
976
977 return false;
978}
979
980bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
981 EVT evt = TLI.getValueType(DL, Ty, true);
982
983 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
984 return false;
985
986 // Only handle simple types.
987 if (evt == MVT::Other || !evt.isSimple())
988 return false;
989 VT = evt.getSimpleVT();
990
991 // This is a legal type, but it's not something we handle in fast-isel.
992 if (VT == MVT::f128)
993 return false;
994
995 // Handle all other legal types, i.e. a register that will directly hold this
996 // value.
997 return TLI.isTypeLegal(VT);
998}
999
1000/// Determine if the value type is supported by FastISel.
1001///
1002/// FastISel for AArch64 can handle more value types than are legal. This adds
1003/// simple value type such as i1, i8, and i16.
1004bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1005 if (Ty->isVectorTy() && !IsVectorAllowed)
1006 return false;
1007
1008 if (isTypeLegal(Ty, VT))
1009 return true;
1010
1011 // If this is a type than can be sign or zero-extended to a basic operation
1012 // go ahead and accept it now.
1013 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1014 return true;
1015
1016 return false;
1017}
1018
1019bool AArch64FastISel::isValueAvailable(const Value *V) const {
1020 if (!isa<Instruction>(V))
1021 return true;
1022
1023 const auto *I = cast<Instruction>(V);
1024 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1025}
1026
1027bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1028 if (Subtarget->isTargetILP32())
1029 return false;
1030
1031 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1032 if (!ScaleFactor)
1033 return false;
1034
1035 bool ImmediateOffsetNeedsLowering = false;
1036 bool RegisterOffsetNeedsLowering = false;
1037 int64_t Offset = Addr.getOffset();
1038 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1039 ImmediateOffsetNeedsLowering = true;
1040 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1041 !isUInt<12>(Offset / ScaleFactor))
1042 ImmediateOffsetNeedsLowering = true;
1043
1044 // Cannot encode an offset register and an immediate offset in the same
1045 // instruction. Fold the immediate offset into the load/store instruction and
1046 // emit an additional add to take care of the offset register.
1047 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1048 RegisterOffsetNeedsLowering = true;
1049
1050 // Cannot encode zero register as base.
1051 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1052 RegisterOffsetNeedsLowering = true;
1053
1054 // If this is a stack pointer and the offset needs to be simplified then put
1055 // the alloca address into a register, set the base type back to register and
1056 // continue. This should almost never happen.
1057 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1058 {
1059 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1060 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1061 ResultReg)
1062 .addFrameIndex(Addr.getFI())
1063 .addImm(0)
1064 .addImm(0);
1065 Addr.setKind(Address::RegBase);
1066 Addr.setReg(ResultReg);
1067 }
1068
1069 if (RegisterOffsetNeedsLowering) {
1070 Register ResultReg;
1071 if (Addr.getReg()) {
1072 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1073 Addr.getExtendType() == AArch64_AM::UXTW )
1074 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1075 Addr.getOffsetReg(), Addr.getExtendType(),
1076 Addr.getShift());
1077 else
1078 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1079 Addr.getOffsetReg(), AArch64_AM::LSL,
1080 Addr.getShift());
1081 } else {
1082 if (Addr.getExtendType() == AArch64_AM::UXTW)
1083 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1084 Addr.getShift(), /*IsZExt=*/true);
1085 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1086 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1087 Addr.getShift(), /*IsZExt=*/false);
1088 else
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1090 Addr.getShift());
1091 }
1092 if (!ResultReg)
1093 return false;
1094
1095 Addr.setReg(ResultReg);
1096 Addr.setOffsetReg(0);
1097 Addr.setShift(0);
1098 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1099 }
1100
1101 // Since the offset is too large for the load/store instruction get the
1102 // reg+offset into a register.
1103 if (ImmediateOffsetNeedsLowering) {
1104 Register ResultReg;
1105 if (Addr.getReg())
1106 // Try to fold the immediate into the add instruction.
1107 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1108 else
1109 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1110
1111 if (!ResultReg)
1112 return false;
1113 Addr.setReg(ResultReg);
1114 Addr.setOffset(0);
1115 }
1116 return true;
1117}
1118
1119void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1120 const MachineInstrBuilder &MIB,
1122 unsigned ScaleFactor,
1123 MachineMemOperand *MMO) {
1124 int64_t Offset = Addr.getOffset() / ScaleFactor;
1125 // Frame base works a bit differently. Handle it separately.
1126 if (Addr.isFIBase()) {
1127 int FI = Addr.getFI();
1128 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1129 // and alignment should be based on the VT.
1130 MMO = FuncInfo.MF->getMachineMemOperand(
1131 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1132 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1133 // Now add the rest of the operands.
1134 MIB.addFrameIndex(FI).addImm(Offset);
1135 } else {
1136 assert(Addr.isRegBase() && "Unexpected address kind.");
1137 const MCInstrDesc &II = MIB->getDesc();
1138 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1139 Addr.setReg(
1140 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1141 Addr.setOffsetReg(
1142 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1143 if (Addr.getOffsetReg()) {
1144 assert(Addr.getOffset() == 0 && "Unexpected offset");
1145 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1146 Addr.getExtendType() == AArch64_AM::SXTX;
1147 MIB.addReg(Addr.getReg());
1148 MIB.addReg(Addr.getOffsetReg());
1149 MIB.addImm(IsSigned);
1150 MIB.addImm(Addr.getShift() != 0);
1151 } else
1152 MIB.addReg(Addr.getReg()).addImm(Offset);
1153 }
1154
1155 if (MMO)
1156 MIB.addMemOperand(MMO);
1157}
1158
1159Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1160 const Value *RHS, bool SetFlags,
1161 bool WantResult, bool IsZExt) {
1163 bool NeedExtend = false;
1164 switch (RetVT.SimpleTy) {
1165 default:
1166 return Register();
1167 case MVT::i1:
1168 NeedExtend = true;
1169 break;
1170 case MVT::i8:
1171 NeedExtend = true;
1172 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1173 break;
1174 case MVT::i16:
1175 NeedExtend = true;
1176 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1177 break;
1178 case MVT::i32: // fall-through
1179 case MVT::i64:
1180 break;
1181 }
1182 MVT SrcVT = RetVT;
1183 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1184
1185 // Canonicalize immediates to the RHS first.
1186 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1187 std::swap(LHS, RHS);
1188
1189 // Canonicalize mul by power of 2 to the RHS.
1190 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1191 if (isMulPowOf2(LHS))
1192 std::swap(LHS, RHS);
1193
1194 // Canonicalize shift immediate to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1197 if (isa<ConstantInt>(SI->getOperand(1)))
1198 if (SI->getOpcode() == Instruction::Shl ||
1199 SI->getOpcode() == Instruction::LShr ||
1200 SI->getOpcode() == Instruction::AShr )
1201 std::swap(LHS, RHS);
1202
1203 Register LHSReg = getRegForValue(LHS);
1204 if (!LHSReg)
1205 return Register();
1206
1207 if (NeedExtend)
1208 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1209
1210 Register ResultReg;
1211 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1212 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1213 if (C->isNegative())
1214 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1215 WantResult);
1216 else
1217 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1218 WantResult);
1219 } else if (const auto *C = dyn_cast<Constant>(RHS))
1220 if (C->isNullValue())
1221 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1222
1223 if (ResultReg)
1224 return ResultReg;
1225
1226 // Only extend the RHS within the instruction if there is a valid extend type.
1227 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1228 isValueAvailable(RHS)) {
1229 Register RHSReg = getRegForValue(RHS);
1230 if (!RHSReg)
1231 return Register();
1232 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1233 SetFlags, WantResult);
1234 }
1235
1236 // Check if the mul can be folded into the instruction.
1237 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1238 if (isMulPowOf2(RHS)) {
1239 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1240 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1241
1242 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1243 if (C->getValue().isPowerOf2())
1244 std::swap(MulLHS, MulRHS);
1245
1246 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1247 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1248 Register RHSReg = getRegForValue(MulLHS);
1249 if (!RHSReg)
1250 return Register();
1251 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1252 ShiftVal, SetFlags, WantResult);
1253 if (ResultReg)
1254 return ResultReg;
1255 }
1256 }
1257
1258 // Check if the shift can be folded into the instruction.
1259 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1260 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1261 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1263 switch (SI->getOpcode()) {
1264 default: break;
1265 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1266 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1267 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1268 }
1269 uint64_t ShiftVal = C->getZExtValue();
1270 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1271 Register RHSReg = getRegForValue(SI->getOperand(0));
1272 if (!RHSReg)
1273 return Register();
1274 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1275 ShiftVal, SetFlags, WantResult);
1276 if (ResultReg)
1277 return ResultReg;
1278 }
1279 }
1280 }
1281 }
1282
1283 Register RHSReg = getRegForValue(RHS);
1284 if (!RHSReg)
1285 return Register();
1286
1287 if (NeedExtend)
1288 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1289
1290 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1291}
1292
1293Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1294 Register RHSReg, bool SetFlags,
1295 bool WantResult) {
1296 assert(LHSReg && RHSReg && "Invalid register number.");
1297
1298 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1299 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1300 return Register();
1301
1302 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1303 return Register();
1304
1305 static const unsigned OpcTable[2][2][2] = {
1306 { { AArch64::SUBWrr, AArch64::SUBXrr },
1307 { AArch64::ADDWrr, AArch64::ADDXrr } },
1308 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1309 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1310 };
1311 bool Is64Bit = RetVT == MVT::i64;
1312 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1313 const TargetRegisterClass *RC =
1314 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1315 Register ResultReg;
1316 if (WantResult)
1317 ResultReg = createResultReg(RC);
1318 else
1319 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1320
1321 const MCInstrDesc &II = TII.get(Opc);
1322 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1323 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1324 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1325 .addReg(LHSReg)
1326 .addReg(RHSReg);
1327 return ResultReg;
1328}
1329
1330Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1331 uint64_t Imm, bool SetFlags,
1332 bool WantResult) {
1333 assert(LHSReg && "Invalid register number.");
1334
1335 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1336 return Register();
1337
1338 unsigned ShiftImm;
1339 if (isUInt<12>(Imm))
1340 ShiftImm = 0;
1341 else if ((Imm & 0xfff000) == Imm) {
1342 ShiftImm = 12;
1343 Imm >>= 12;
1344 } else
1345 return Register();
1346
1347 static const unsigned OpcTable[2][2][2] = {
1348 { { AArch64::SUBWri, AArch64::SUBXri },
1349 { AArch64::ADDWri, AArch64::ADDXri } },
1350 { { AArch64::SUBSWri, AArch64::SUBSXri },
1351 { AArch64::ADDSWri, AArch64::ADDSXri } }
1352 };
1353 bool Is64Bit = RetVT == MVT::i64;
1354 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1355 const TargetRegisterClass *RC;
1356 if (SetFlags)
1357 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1358 else
1359 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1360 Register ResultReg;
1361 if (WantResult)
1362 ResultReg = createResultReg(RC);
1363 else
1364 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1365
1366 const MCInstrDesc &II = TII.get(Opc);
1367 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1368 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1369 .addReg(LHSReg)
1370 .addImm(Imm)
1371 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1372 return ResultReg;
1373}
1374
1375Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1376 Register RHSReg,
1378 uint64_t ShiftImm, bool SetFlags,
1379 bool WantResult) {
1380 assert(LHSReg && RHSReg && "Invalid register number.");
1381 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1382 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1383
1384 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1385 return Register();
1386
1387 // Don't deal with undefined shifts.
1388 if (ShiftImm >= RetVT.getSizeInBits())
1389 return Register();
1390
1391 static const unsigned OpcTable[2][2][2] = {
1392 { { AArch64::SUBWrs, AArch64::SUBXrs },
1393 { AArch64::ADDWrs, AArch64::ADDXrs } },
1394 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1395 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1396 };
1397 bool Is64Bit = RetVT == MVT::i64;
1398 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1399 const TargetRegisterClass *RC =
1400 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1401 Register ResultReg;
1402 if (WantResult)
1403 ResultReg = createResultReg(RC);
1404 else
1405 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1406
1407 const MCInstrDesc &II = TII.get(Opc);
1408 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1409 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1411 .addReg(LHSReg)
1412 .addReg(RHSReg)
1413 .addImm(getShifterImm(ShiftType, ShiftImm));
1414 return ResultReg;
1415}
1416
1417Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1418 Register RHSReg,
1420 uint64_t ShiftImm, bool SetFlags,
1421 bool WantResult) {
1422 assert(LHSReg && RHSReg && "Invalid register number.");
1423 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1424 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1425
1426 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1427 return Register();
1428
1429 if (ShiftImm >= 4)
1430 return Register();
1431
1432 static const unsigned OpcTable[2][2][2] = {
1433 { { AArch64::SUBWrx, AArch64::SUBXrx },
1434 { AArch64::ADDWrx, AArch64::ADDXrx } },
1435 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1436 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1437 };
1438 bool Is64Bit = RetVT == MVT::i64;
1439 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1440 const TargetRegisterClass *RC = nullptr;
1441 if (SetFlags)
1442 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1443 else
1444 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1445 Register ResultReg;
1446 if (WantResult)
1447 ResultReg = createResultReg(RC);
1448 else
1449 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1450
1451 const MCInstrDesc &II = TII.get(Opc);
1452 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1453 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1455 .addReg(LHSReg)
1456 .addReg(RHSReg)
1457 .addImm(getArithExtendImm(ExtType, ShiftImm));
1458 return ResultReg;
1459}
1460
1461bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1462 Type *Ty = LHS->getType();
1463 EVT EVT = TLI.getValueType(DL, Ty, true);
1464 if (!EVT.isSimple())
1465 return false;
1466 MVT VT = EVT.getSimpleVT();
1467
1468 switch (VT.SimpleTy) {
1469 default:
1470 return false;
1471 case MVT::i1:
1472 case MVT::i8:
1473 case MVT::i16:
1474 case MVT::i32:
1475 case MVT::i64:
1476 return emitICmp(VT, LHS, RHS, IsZExt);
1477 case MVT::f32:
1478 case MVT::f64:
1479 return emitFCmp(VT, LHS, RHS);
1480 }
1481}
1482
1483bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1484 bool IsZExt) {
1485 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1486 IsZExt)
1487 .isValid();
1488}
1489
1490bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1491 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1492 /*SetFlags=*/true, /*WantResult=*/false)
1493 .isValid();
1494}
1495
1496bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1497 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1498 return false;
1499
1500 // Check to see if the 2nd operand is a constant that we can encode directly
1501 // in the compare.
1502 bool UseImm = false;
1503 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1504 if (CFP->isZero() && !CFP->isNegative())
1505 UseImm = true;
1506
1507 Register LHSReg = getRegForValue(LHS);
1508 if (!LHSReg)
1509 return false;
1510
1511 if (UseImm) {
1512 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1514 .addReg(LHSReg);
1515 return true;
1516 }
1517
1518 Register RHSReg = getRegForValue(RHS);
1519 if (!RHSReg)
1520 return false;
1521
1522 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1524 .addReg(LHSReg)
1525 .addReg(RHSReg);
1526 return true;
1527}
1528
1529Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1530 bool SetFlags, bool WantResult, bool IsZExt) {
1531 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1532 IsZExt);
1533}
1534
1535/// This method is a wrapper to simplify add emission.
1536///
1537/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1538/// that fails, then try to materialize the immediate into a register and use
1539/// emitAddSub_rr instead.
1540Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1541 Register ResultReg;
1542 if (Imm < 0)
1543 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1544 else
1545 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1546
1547 if (ResultReg)
1548 return ResultReg;
1549
1550 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1551 if (!CReg)
1552 return Register();
1553
1554 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1555 return ResultReg;
1556}
1557
1558Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1559 bool SetFlags, bool WantResult, bool IsZExt) {
1560 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1561 IsZExt);
1562}
1563
1564Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1565 Register RHSReg, bool WantResult) {
1566 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1567 /*SetFlags=*/true, WantResult);
1568}
1569
1570Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1571 Register RHSReg,
1573 uint64_t ShiftImm, bool WantResult) {
1574 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1575 ShiftImm, /*SetFlags=*/true, WantResult);
1576}
1577
1578Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1579 const Value *LHS, const Value *RHS) {
1580 // Canonicalize immediates to the RHS first.
1582 std::swap(LHS, RHS);
1583
1584 // Canonicalize mul by power-of-2 to the RHS.
1585 if (LHS->hasOneUse() && isValueAvailable(LHS))
1586 if (isMulPowOf2(LHS))
1587 std::swap(LHS, RHS);
1588
1589 // Canonicalize shift immediate to the RHS.
1590 if (LHS->hasOneUse() && isValueAvailable(LHS))
1591 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1592 if (isa<ConstantInt>(SI->getOperand(1)))
1593 std::swap(LHS, RHS);
1594
1595 Register LHSReg = getRegForValue(LHS);
1596 if (!LHSReg)
1597 return Register();
1598
1599 Register ResultReg;
1600 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1601 uint64_t Imm = C->getZExtValue();
1602 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1603 }
1604 if (ResultReg)
1605 return ResultReg;
1606
1607 // Check if the mul can be folded into the instruction.
1608 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1609 if (isMulPowOf2(RHS)) {
1610 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1611 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1612
1613 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1614 if (C->getValue().isPowerOf2())
1615 std::swap(MulLHS, MulRHS);
1616
1617 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1618 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1619
1620 Register RHSReg = getRegForValue(MulLHS);
1621 if (!RHSReg)
1622 return Register();
1623 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1624 if (ResultReg)
1625 return ResultReg;
1626 }
1627 }
1628
1629 // Check if the shift can be folded into the instruction.
1630 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1631 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1632 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1633 uint64_t ShiftVal = C->getZExtValue();
1634 Register RHSReg = getRegForValue(SI->getOperand(0));
1635 if (!RHSReg)
1636 return Register();
1637 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1638 if (ResultReg)
1639 return ResultReg;
1640 }
1641 }
1642
1643 Register RHSReg = getRegForValue(RHS);
1644 if (!RHSReg)
1645 return Register();
1646
1647 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1648 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1649 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1650 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1651 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1652 }
1653 return ResultReg;
1654}
1655
1656Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1657 Register LHSReg, uint64_t Imm) {
1658 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1659 "ISD nodes are not consecutive!");
1660 static const unsigned OpcTable[3][2] = {
1661 { AArch64::ANDWri, AArch64::ANDXri },
1662 { AArch64::ORRWri, AArch64::ORRXri },
1663 { AArch64::EORWri, AArch64::EORXri }
1664 };
1665 const TargetRegisterClass *RC;
1666 unsigned Opc;
1667 unsigned RegSize;
1668 switch (RetVT.SimpleTy) {
1669 default:
1670 return Register();
1671 case MVT::i1:
1672 case MVT::i8:
1673 case MVT::i16:
1674 case MVT::i32: {
1675 unsigned Idx = ISDOpc - ISD::AND;
1676 Opc = OpcTable[Idx][0];
1677 RC = &AArch64::GPR32spRegClass;
1678 RegSize = 32;
1679 break;
1680 }
1681 case MVT::i64:
1682 Opc = OpcTable[ISDOpc - ISD::AND][1];
1683 RC = &AArch64::GPR64spRegClass;
1684 RegSize = 64;
1685 break;
1686 }
1687
1689 return Register();
1690
1691 Register ResultReg =
1692 fastEmitInst_ri(Opc, RC, LHSReg,
1694 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1695 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1696 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1697 }
1698 return ResultReg;
1699}
1700
1701Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1702 Register LHSReg, Register RHSReg,
1703 uint64_t ShiftImm) {
1704 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705 "ISD nodes are not consecutive!");
1706 static const unsigned OpcTable[3][2] = {
1707 { AArch64::ANDWrs, AArch64::ANDXrs },
1708 { AArch64::ORRWrs, AArch64::ORRXrs },
1709 { AArch64::EORWrs, AArch64::EORXrs }
1710 };
1711
1712 // Don't deal with undefined shifts.
1713 if (ShiftImm >= RetVT.getSizeInBits())
1714 return Register();
1715
1716 const TargetRegisterClass *RC;
1717 unsigned Opc;
1718 switch (RetVT.SimpleTy) {
1719 default:
1720 return Register();
1721 case MVT::i1:
1722 case MVT::i8:
1723 case MVT::i16:
1724 case MVT::i32:
1725 Opc = OpcTable[ISDOpc - ISD::AND][0];
1726 RC = &AArch64::GPR32RegClass;
1727 break;
1728 case MVT::i64:
1729 Opc = OpcTable[ISDOpc - ISD::AND][1];
1730 RC = &AArch64::GPR64RegClass;
1731 break;
1732 }
1733 Register ResultReg =
1734 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1736 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1739 }
1740 return ResultReg;
1741}
1742
1743Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1744 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1745}
1746
1747Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1748 bool WantZExt, MachineMemOperand *MMO) {
1749 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1750 return Register();
1751
1752 // Simplify this down to something we can handle.
1753 if (!simplifyAddress(Addr, VT))
1754 return Register();
1755
1756 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1757 if (!ScaleFactor)
1758 llvm_unreachable("Unexpected value type.");
1759
1760 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1761 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1762 bool UseScaled = true;
1763 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1764 UseScaled = false;
1765 ScaleFactor = 1;
1766 }
1767
1768 static const unsigned GPOpcTable[2][8][4] = {
1769 // Sign-extend.
1770 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1771 AArch64::LDURXi },
1772 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1773 AArch64::LDURXi },
1774 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1775 AArch64::LDRXui },
1776 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1777 AArch64::LDRXui },
1778 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1779 AArch64::LDRXroX },
1780 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1781 AArch64::LDRXroX },
1782 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1783 AArch64::LDRXroW },
1784 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1785 AArch64::LDRXroW }
1786 },
1787 // Zero-extend.
1788 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1789 AArch64::LDURXi },
1790 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1791 AArch64::LDURXi },
1792 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1793 AArch64::LDRXui },
1794 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1795 AArch64::LDRXui },
1796 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1797 AArch64::LDRXroX },
1798 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1799 AArch64::LDRXroX },
1800 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1801 AArch64::LDRXroW },
1802 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1803 AArch64::LDRXroW }
1804 }
1805 };
1806
1807 static const unsigned FPOpcTable[4][2] = {
1808 { AArch64::LDURSi, AArch64::LDURDi },
1809 { AArch64::LDRSui, AArch64::LDRDui },
1810 { AArch64::LDRSroX, AArch64::LDRDroX },
1811 { AArch64::LDRSroW, AArch64::LDRDroW }
1812 };
1813
1814 unsigned Opc;
1815 const TargetRegisterClass *RC;
1816 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1817 Addr.getOffsetReg();
1818 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1819 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1820 Addr.getExtendType() == AArch64_AM::SXTW)
1821 Idx++;
1822
1823 bool IsRet64Bit = RetVT == MVT::i64;
1824 switch (VT.SimpleTy) {
1825 default:
1826 llvm_unreachable("Unexpected value type.");
1827 case MVT::i1: // Intentional fall-through.
1828 case MVT::i8:
1829 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1830 RC = (IsRet64Bit && !WantZExt) ?
1831 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1832 break;
1833 case MVT::i16:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i32:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i64:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1845 RC = &AArch64::GPR64RegClass;
1846 break;
1847 case MVT::f32:
1848 Opc = FPOpcTable[Idx][0];
1849 RC = &AArch64::FPR32RegClass;
1850 break;
1851 case MVT::f64:
1852 Opc = FPOpcTable[Idx][1];
1853 RC = &AArch64::FPR64RegClass;
1854 break;
1855 }
1856
1857 // Create the base instruction, then add the operands.
1858 Register ResultReg = createResultReg(RC);
1859 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1860 TII.get(Opc), ResultReg);
1861 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1862
1863 // Loading an i1 requires special handling.
1864 if (VT == MVT::i1) {
1865 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1866 assert(ANDReg && "Unexpected AND instruction emission failure.");
1867 ResultReg = ANDReg;
1868 }
1869
1870 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1871 // the 32bit reg to a 64bit reg.
1872 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1873 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1874 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1875 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1876 .addReg(ResultReg, getKillRegState(true))
1877 .addImm(AArch64::sub_32);
1878 ResultReg = Reg64;
1879 }
1880 return ResultReg;
1881}
1882
1883bool AArch64FastISel::selectAddSub(const Instruction *I) {
1884 MVT VT;
1885 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1886 return false;
1887
1888 if (VT.isVector())
1889 return selectOperator(I, I->getOpcode());
1890
1891 Register ResultReg;
1892 switch (I->getOpcode()) {
1893 default:
1894 llvm_unreachable("Unexpected instruction.");
1895 case Instruction::Add:
1896 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1897 break;
1898 case Instruction::Sub:
1899 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1900 break;
1901 }
1902 if (!ResultReg)
1903 return false;
1904
1905 updateValueMap(I, ResultReg);
1906 return true;
1907}
1908
1909bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1910 MVT VT;
1911 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1912 return false;
1913
1914 if (VT.isVector())
1915 return selectOperator(I, I->getOpcode());
1916
1917 Register ResultReg;
1918 switch (I->getOpcode()) {
1919 default:
1920 llvm_unreachable("Unexpected instruction.");
1921 case Instruction::And:
1922 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1923 break;
1924 case Instruction::Or:
1925 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1926 break;
1927 case Instruction::Xor:
1928 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1929 break;
1930 }
1931 if (!ResultReg)
1932 return false;
1933
1934 updateValueMap(I, ResultReg);
1935 return true;
1936}
1937
1938bool AArch64FastISel::selectLoad(const Instruction *I) {
1939 MVT VT;
1940 // Verify we have a legal type before going any further. Currently, we handle
1941 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1942 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1943 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1944 cast<LoadInst>(I)->isAtomic())
1945 return false;
1946
1947 const Value *SV = I->getOperand(0);
1948 if (TLI.supportSwiftError()) {
1949 // Swifterror values can come from either a function parameter with
1950 // swifterror attribute or an alloca with swifterror attribute.
1951 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1952 if (Arg->hasSwiftErrorAttr())
1953 return false;
1954 }
1955
1956 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1957 if (Alloca->isSwiftError())
1958 return false;
1959 }
1960 }
1961
1962 // See if we can handle this address.
1963 Address Addr;
1964 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1965 return false;
1966
1967 // Fold the following sign-/zero-extend into the load instruction.
1968 bool WantZExt = true;
1969 MVT RetVT = VT;
1970 const Value *IntExtVal = nullptr;
1971 if (I->hasOneUse()) {
1972 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1973 if (isTypeSupported(ZE->getType(), RetVT))
1974 IntExtVal = ZE;
1975 else
1976 RetVT = VT;
1977 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1978 if (isTypeSupported(SE->getType(), RetVT))
1979 IntExtVal = SE;
1980 else
1981 RetVT = VT;
1982 WantZExt = false;
1983 }
1984 }
1985
1986 Register ResultReg =
1987 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1988 if (!ResultReg)
1989 return false;
1990
1991 // There are a few different cases we have to handle, because the load or the
1992 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1993 // SelectionDAG. There is also an ordering issue when both instructions are in
1994 // different basic blocks.
1995 // 1.) The load instruction is selected by FastISel, but the integer extend
1996 // not. This usually happens when the integer extend is in a different
1997 // basic block and SelectionDAG took over for that basic block.
1998 // 2.) The load instruction is selected before the integer extend. This only
1999 // happens when the integer extend is in a different basic block.
2000 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2001 // by FastISel. This happens if there are instructions between the load
2002 // and the integer extend that couldn't be selected by FastISel.
2003 if (IntExtVal) {
2004 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2005 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2006 // it when it selects the integer extend.
2007 Register Reg = lookUpRegForValue(IntExtVal);
2008 auto *MI = MRI.getUniqueVRegDef(Reg);
2009 if (!MI) {
2010 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2011 if (WantZExt) {
2012 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2013 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2014 ResultReg = std::prev(I)->getOperand(0).getReg();
2015 removeDeadCode(I, std::next(I));
2016 } else
2017 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2018 AArch64::sub_32);
2019 }
2020 updateValueMap(I, ResultReg);
2021 return true;
2022 }
2023
2024 // The integer extend has already been emitted - delete all the instructions
2025 // that have been emitted by the integer extend lowering code and use the
2026 // result from the load instruction directly.
2027 while (MI) {
2028 Reg = 0;
2029 for (auto &Opnd : MI->uses()) {
2030 if (Opnd.isReg()) {
2031 Reg = Opnd.getReg();
2032 break;
2033 }
2034 }
2036 removeDeadCode(I, std::next(I));
2037 MI = nullptr;
2038 if (Reg)
2039 MI = MRI.getUniqueVRegDef(Reg);
2040 }
2041 updateValueMap(IntExtVal, ResultReg);
2042 return true;
2043 }
2044
2045 updateValueMap(I, ResultReg);
2046 return true;
2047}
2048
2049bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2050 Register AddrReg,
2051 MachineMemOperand *MMO) {
2052 unsigned Opc;
2053 switch (VT.SimpleTy) {
2054 default: return false;
2055 case MVT::i8: Opc = AArch64::STLRB; break;
2056 case MVT::i16: Opc = AArch64::STLRH; break;
2057 case MVT::i32: Opc = AArch64::STLRW; break;
2058 case MVT::i64: Opc = AArch64::STLRX; break;
2059 }
2060
2061 const MCInstrDesc &II = TII.get(Opc);
2062 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2063 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2064 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2065 .addReg(SrcReg)
2066 .addReg(AddrReg)
2067 .addMemOperand(MMO);
2068 return true;
2069}
2070
2071bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2072 MachineMemOperand *MMO) {
2073 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2074 return false;
2075
2076 // Simplify this down to something we can handle.
2077 if (!simplifyAddress(Addr, VT))
2078 return false;
2079
2080 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2081 if (!ScaleFactor)
2082 llvm_unreachable("Unexpected value type.");
2083
2084 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2085 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2086 bool UseScaled = true;
2087 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2088 UseScaled = false;
2089 ScaleFactor = 1;
2090 }
2091
2092 static const unsigned OpcTable[4][6] = {
2093 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2094 AArch64::STURSi, AArch64::STURDi },
2095 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2096 AArch64::STRSui, AArch64::STRDui },
2097 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2098 AArch64::STRSroX, AArch64::STRDroX },
2099 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2100 AArch64::STRSroW, AArch64::STRDroW }
2101 };
2102
2103 unsigned Opc;
2104 bool VTIsi1 = false;
2105 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2106 Addr.getOffsetReg();
2107 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2108 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2109 Addr.getExtendType() == AArch64_AM::SXTW)
2110 Idx++;
2111
2112 switch (VT.SimpleTy) {
2113 default: llvm_unreachable("Unexpected value type.");
2114 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2115 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2116 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2117 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2118 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2119 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2120 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2121 }
2122
2123 // Storing an i1 requires special handling.
2124 if (VTIsi1 && SrcReg != AArch64::WZR) {
2125 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2126 assert(ANDReg && "Unexpected AND instruction emission failure.");
2127 SrcReg = ANDReg;
2128 }
2129 // Create the base instruction, then add the operands.
2130 const MCInstrDesc &II = TII.get(Opc);
2131 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2132 MachineInstrBuilder MIB =
2133 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2134 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2135
2136 return true;
2137}
2138
2139bool AArch64FastISel::selectStore(const Instruction *I) {
2140 MVT VT;
2141 const Value *Op0 = I->getOperand(0);
2142 // Verify we have a legal type before going any further. Currently, we handle
2143 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2144 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2145 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2146 return false;
2147
2148 const Value *PtrV = I->getOperand(1);
2149 if (TLI.supportSwiftError()) {
2150 // Swifterror values can come from either a function parameter with
2151 // swifterror attribute or an alloca with swifterror attribute.
2152 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2153 if (Arg->hasSwiftErrorAttr())
2154 return false;
2155 }
2156
2157 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2158 if (Alloca->isSwiftError())
2159 return false;
2160 }
2161 }
2162
2163 // Get the value to be stored into a register. Use the zero register directly
2164 // when possible to avoid an unnecessary copy and a wasted register.
2165 Register SrcReg;
2166 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2167 if (CI->isZero())
2168 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2169 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2170 if (CF->isZero() && !CF->isNegative()) {
2172 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2173 }
2174 }
2175
2176 if (!SrcReg)
2177 SrcReg = getRegForValue(Op0);
2178
2179 if (!SrcReg)
2180 return false;
2181
2182 auto *SI = cast<StoreInst>(I);
2183
2184 // Try to emit a STLR for seq_cst/release.
2185 if (SI->isAtomic()) {
2186 AtomicOrdering Ord = SI->getOrdering();
2187 // The non-atomic instructions are sufficient for relaxed stores.
2188 if (isReleaseOrStronger(Ord)) {
2189 // The STLR addressing mode only supports a base reg; pass that directly.
2190 Register AddrReg = getRegForValue(PtrV);
2191 if (!AddrReg)
2192 return false;
2193 return emitStoreRelease(VT, SrcReg, AddrReg,
2194 createMachineMemOperandFor(I));
2195 }
2196 }
2197
2198 // See if we can handle this address.
2199 Address Addr;
2200 if (!computeAddress(PtrV, Addr, Op0->getType()))
2201 return false;
2202
2203 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2204 return false;
2205 return true;
2206}
2207
2209 switch (Pred) {
2210 case CmpInst::FCMP_ONE:
2211 case CmpInst::FCMP_UEQ:
2212 default:
2213 // AL is our "false" for now. The other two need more compares.
2214 return AArch64CC::AL;
2215 case CmpInst::ICMP_EQ:
2216 case CmpInst::FCMP_OEQ:
2217 return AArch64CC::EQ;
2218 case CmpInst::ICMP_SGT:
2219 case CmpInst::FCMP_OGT:
2220 return AArch64CC::GT;
2221 case CmpInst::ICMP_SGE:
2222 case CmpInst::FCMP_OGE:
2223 return AArch64CC::GE;
2224 case CmpInst::ICMP_UGT:
2225 case CmpInst::FCMP_UGT:
2226 return AArch64CC::HI;
2227 case CmpInst::FCMP_OLT:
2228 return AArch64CC::MI;
2229 case CmpInst::ICMP_ULE:
2230 case CmpInst::FCMP_OLE:
2231 return AArch64CC::LS;
2232 case CmpInst::FCMP_ORD:
2233 return AArch64CC::VC;
2234 case CmpInst::FCMP_UNO:
2235 return AArch64CC::VS;
2236 case CmpInst::FCMP_UGE:
2237 return AArch64CC::PL;
2238 case CmpInst::ICMP_SLT:
2239 case CmpInst::FCMP_ULT:
2240 return AArch64CC::LT;
2241 case CmpInst::ICMP_SLE:
2242 case CmpInst::FCMP_ULE:
2243 return AArch64CC::LE;
2244 case CmpInst::FCMP_UNE:
2245 case CmpInst::ICMP_NE:
2246 return AArch64CC::NE;
2247 case CmpInst::ICMP_UGE:
2248 return AArch64CC::HS;
2249 case CmpInst::ICMP_ULT:
2250 return AArch64CC::LO;
2251 }
2252}
2253
2254/// Try to emit a combined compare-and-branch instruction.
2255bool AArch64FastISel::emitCompareAndBranch(const CondBrInst *BI) {
2256 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2257 // will not be produced, as they are conditional branch instructions that do
2258 // not set flags.
2259 if (FuncInfo.MF->getFunction().hasFnAttribute(
2260 Attribute::SpeculativeLoadHardening))
2261 return false;
2262
2263 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2264 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2265 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2266
2267 const Value *LHS = CI->getOperand(0);
2268 const Value *RHS = CI->getOperand(1);
2269
2270 MVT VT;
2271 if (!isTypeSupported(LHS->getType(), VT))
2272 return false;
2273
2274 unsigned BW = VT.getSizeInBits();
2275 if (BW > 64)
2276 return false;
2277
2278 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2279 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2280
2281 // Try to take advantage of fallthrough opportunities.
2282 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2283 std::swap(TBB, FBB);
2285 }
2286
2287 int TestBit = -1;
2288 bool IsCmpNE;
2289 switch (Predicate) {
2290 default:
2291 return false;
2292 case CmpInst::ICMP_EQ:
2293 case CmpInst::ICMP_NE:
2294 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2295 std::swap(LHS, RHS);
2296
2297 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2298 return false;
2299
2300 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2301 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2302 const Value *AndLHS = AI->getOperand(0);
2303 const Value *AndRHS = AI->getOperand(1);
2304
2305 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2306 if (C->getValue().isPowerOf2())
2307 std::swap(AndLHS, AndRHS);
2308
2309 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2310 if (C->getValue().isPowerOf2()) {
2311 TestBit = C->getValue().logBase2();
2312 LHS = AndLHS;
2313 }
2314 }
2315
2316 if (VT == MVT::i1)
2317 TestBit = 0;
2318
2319 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2320 break;
2321 case CmpInst::ICMP_SLT:
2322 case CmpInst::ICMP_SGE:
2323 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2324 return false;
2325
2326 TestBit = BW - 1;
2327 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2328 break;
2329 case CmpInst::ICMP_SGT:
2330 case CmpInst::ICMP_SLE:
2331 if (!isa<ConstantInt>(RHS))
2332 return false;
2333
2334 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2335 return false;
2336
2337 TestBit = BW - 1;
2338 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2339 break;
2340 } // end switch
2341
2342 static const unsigned OpcTable[2][2][2] = {
2343 { {AArch64::CBZW, AArch64::CBZX },
2344 {AArch64::CBNZW, AArch64::CBNZX} },
2345 { {AArch64::TBZW, AArch64::TBZX },
2346 {AArch64::TBNZW, AArch64::TBNZX} }
2347 };
2348
2349 bool IsBitTest = TestBit != -1;
2350 bool Is64Bit = BW == 64;
2351 if (TestBit < 32 && TestBit >= 0)
2352 Is64Bit = false;
2353
2354 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2355 const MCInstrDesc &II = TII.get(Opc);
2356
2357 Register SrcReg = getRegForValue(LHS);
2358 if (!SrcReg)
2359 return false;
2360
2361 if (BW == 64 && !Is64Bit)
2362 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2363
2364 if ((BW < 32) && !IsBitTest)
2365 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2366
2367 // Emit the combined compare and branch instruction.
2368 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2369 MachineInstrBuilder MIB =
2370 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2371 .addReg(SrcReg);
2372 if (IsBitTest)
2373 MIB.addImm(TestBit);
2374 MIB.addMBB(TBB);
2375
2376 finishCondBranch(BI->getParent(), TBB, FBB);
2377 return true;
2378}
2379
2380bool AArch64FastISel::selectBranch(const Instruction *I) {
2381 const CondBrInst *BI = cast<CondBrInst>(I);
2382
2383 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2384 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2385
2386 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2387 if (CI->hasOneUse() && isValueAvailable(CI)) {
2388 // Try to optimize or fold the cmp.
2389 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2390 switch (Predicate) {
2391 default:
2392 break;
2394 fastEmitBranch(FBB, MIMD.getDL());
2395 return true;
2396 case CmpInst::FCMP_TRUE:
2397 fastEmitBranch(TBB, MIMD.getDL());
2398 return true;
2399 }
2400
2401 // Try to emit a combined compare-and-branch first.
2402 if (emitCompareAndBranch(BI))
2403 return true;
2404
2405 // Try to take advantage of fallthrough opportunities.
2406 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2407 std::swap(TBB, FBB);
2409 }
2410
2411 // Emit the cmp.
2412 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2413 return false;
2414
2415 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2416 // instruction.
2417 AArch64CC::CondCode CC = getCompareCC(Predicate);
2419 switch (Predicate) {
2420 default:
2421 break;
2422 case CmpInst::FCMP_UEQ:
2423 ExtraCC = AArch64CC::EQ;
2424 CC = AArch64CC::VS;
2425 break;
2426 case CmpInst::FCMP_ONE:
2427 ExtraCC = AArch64CC::MI;
2428 CC = AArch64CC::GT;
2429 break;
2430 }
2431 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2432
2433 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2434 if (ExtraCC != AArch64CC::AL) {
2435 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2436 .addImm(ExtraCC)
2437 .addMBB(TBB);
2438 }
2439
2440 // Emit the branch.
2441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2442 .addImm(CC)
2443 .addMBB(TBB);
2444
2445 finishCondBranch(BI->getParent(), TBB, FBB);
2446 return true;
2447 }
2448 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2449 uint64_t Imm = CI->getZExtValue();
2450 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2451 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2452 .addMBB(Target);
2453
2454 // Obtain the branch probability and add the target to the successor list.
2455 if (FuncInfo.BPI) {
2456 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2457 BI->getParent(), Target->getBasicBlock());
2458 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2459 } else
2460 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2461 return true;
2462 } else {
2464 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2465 // Fake request the condition, otherwise the intrinsic might be completely
2466 // optimized away.
2467 Register CondReg = getRegForValue(BI->getCondition());
2468 if (!CondReg)
2469 return false;
2470
2471 // Emit the branch.
2472 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2473 .addImm(CC)
2474 .addMBB(TBB);
2475
2476 finishCondBranch(BI->getParent(), TBB, FBB);
2477 return true;
2478 }
2479 }
2480
2481 Register CondReg = getRegForValue(BI->getCondition());
2482 if (!CondReg)
2483 return false;
2484
2485 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2486 unsigned Opcode = AArch64::TBNZW;
2487 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2488 std::swap(TBB, FBB);
2489 Opcode = AArch64::TBZW;
2490 }
2491
2492 const MCInstrDesc &II = TII.get(Opcode);
2493 Register ConstrainedCondReg
2494 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2495 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2496 .addReg(ConstrainedCondReg)
2497 .addImm(0)
2498 .addMBB(TBB);
2499
2500 finishCondBranch(BI->getParent(), TBB, FBB);
2501 return true;
2502}
2503
2504bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2505 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2506 Register AddrReg = getRegForValue(BI->getOperand(0));
2507 if (!AddrReg)
2508 return false;
2509
2510 // Authenticated indirectbr is not implemented yet.
2511 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2512 return false;
2513
2514 // Emit the indirect branch.
2515 const MCInstrDesc &II = TII.get(AArch64::BR);
2516 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2518
2519 // Make sure the CFG is up-to-date.
2520 for (const auto *Succ : BI->successors())
2521 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2522
2523 return true;
2524}
2525
2526bool AArch64FastISel::selectCmp(const Instruction *I) {
2527 const CmpInst *CI = cast<CmpInst>(I);
2528
2529 // Vectors of i1 are weird: bail out.
2530 if (CI->getType()->isVectorTy())
2531 return false;
2532
2533 // Try to optimize or fold the cmp.
2534 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2535 Register ResultReg;
2536 switch (Predicate) {
2537 default:
2538 break;
2540 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2542 TII.get(TargetOpcode::COPY), ResultReg)
2543 .addReg(AArch64::WZR, getKillRegState(true));
2544 break;
2545 case CmpInst::FCMP_TRUE:
2546 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2547 break;
2548 }
2549
2550 if (ResultReg) {
2551 updateValueMap(I, ResultReg);
2552 return true;
2553 }
2554
2555 // Emit the cmp.
2556 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2557 return false;
2558
2559 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2560
2561 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2562 // condition codes are inverted, because they are used by CSINC.
2563 static unsigned CondCodeTable[2][2] = {
2566 };
2567 unsigned *CondCodes = nullptr;
2568 switch (Predicate) {
2569 default:
2570 break;
2571 case CmpInst::FCMP_UEQ:
2572 CondCodes = &CondCodeTable[0][0];
2573 break;
2574 case CmpInst::FCMP_ONE:
2575 CondCodes = &CondCodeTable[1][0];
2576 break;
2577 }
2578
2579 if (CondCodes) {
2580 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2581 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2582 TmpReg1)
2583 .addReg(AArch64::WZR, getKillRegState(true))
2584 .addReg(AArch64::WZR, getKillRegState(true))
2585 .addImm(CondCodes[0]);
2586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2587 ResultReg)
2588 .addReg(TmpReg1, getKillRegState(true))
2589 .addReg(AArch64::WZR, getKillRegState(true))
2590 .addImm(CondCodes[1]);
2591
2592 updateValueMap(I, ResultReg);
2593 return true;
2594 }
2595
2596 // Now set a register based on the comparison.
2597 AArch64CC::CondCode CC = getCompareCC(Predicate);
2598 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2599 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2600 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2601 ResultReg)
2602 .addReg(AArch64::WZR, getKillRegState(true))
2603 .addReg(AArch64::WZR, getKillRegState(true))
2604 .addImm(invertedCC);
2605
2606 updateValueMap(I, ResultReg);
2607 return true;
2608}
2609
2610/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2611/// value.
2612bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2613 if (!SI->getType()->isIntegerTy(1))
2614 return false;
2615
2616 const Value *Src1Val, *Src2Val;
2617 unsigned Opc = 0;
2618 bool NeedExtraOp = false;
2619 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2620 if (CI->isOne()) {
2621 Src1Val = SI->getCondition();
2622 Src2Val = SI->getFalseValue();
2623 Opc = AArch64::ORRWrr;
2624 } else {
2625 assert(CI->isZero());
2626 Src1Val = SI->getFalseValue();
2627 Src2Val = SI->getCondition();
2628 Opc = AArch64::BICWrr;
2629 }
2630 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2631 if (CI->isOne()) {
2632 Src1Val = SI->getCondition();
2633 Src2Val = SI->getTrueValue();
2634 Opc = AArch64::ORRWrr;
2635 NeedExtraOp = true;
2636 } else {
2637 assert(CI->isZero());
2638 Src1Val = SI->getCondition();
2639 Src2Val = SI->getTrueValue();
2640 Opc = AArch64::ANDWrr;
2641 }
2642 }
2643
2644 if (!Opc)
2645 return false;
2646
2647 Register Src1Reg = getRegForValue(Src1Val);
2648 if (!Src1Reg)
2649 return false;
2650
2651 Register Src2Reg = getRegForValue(Src2Val);
2652 if (!Src2Reg)
2653 return false;
2654
2655 if (NeedExtraOp)
2656 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2657
2658 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2659 Src2Reg);
2660 updateValueMap(SI, ResultReg);
2661 return true;
2662}
2663
2664bool AArch64FastISel::selectSelect(const Instruction *I) {
2665 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2666 MVT VT;
2667 if (!isTypeSupported(I->getType(), VT))
2668 return false;
2669
2670 unsigned Opc;
2671 const TargetRegisterClass *RC;
2672 switch (VT.SimpleTy) {
2673 default:
2674 return false;
2675 case MVT::i1:
2676 case MVT::i8:
2677 case MVT::i16:
2678 case MVT::i32:
2679 Opc = AArch64::CSELWr;
2680 RC = &AArch64::GPR32RegClass;
2681 break;
2682 case MVT::i64:
2683 Opc = AArch64::CSELXr;
2684 RC = &AArch64::GPR64RegClass;
2685 break;
2686 case MVT::f32:
2687 Opc = AArch64::FCSELSrrr;
2688 RC = &AArch64::FPR32RegClass;
2689 break;
2690 case MVT::f64:
2691 Opc = AArch64::FCSELDrrr;
2692 RC = &AArch64::FPR64RegClass;
2693 break;
2694 }
2695
2696 const SelectInst *SI = cast<SelectInst>(I);
2697 const Value *Cond = SI->getCondition();
2700
2701 if (optimizeSelect(SI))
2702 return true;
2703
2704 // Try to pickup the flags, so we don't have to emit another compare.
2705 if (foldXALUIntrinsic(CC, I, Cond)) {
2706 // Fake request the condition to force emission of the XALU intrinsic.
2707 Register CondReg = getRegForValue(Cond);
2708 if (!CondReg)
2709 return false;
2710 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2711 isValueAvailable(Cond)) {
2712 const auto *Cmp = cast<CmpInst>(Cond);
2713 // Try to optimize or fold the cmp.
2714 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2715 const Value *FoldSelect = nullptr;
2716 switch (Predicate) {
2717 default:
2718 break;
2720 FoldSelect = SI->getFalseValue();
2721 break;
2722 case CmpInst::FCMP_TRUE:
2723 FoldSelect = SI->getTrueValue();
2724 break;
2725 }
2726
2727 if (FoldSelect) {
2728 Register SrcReg = getRegForValue(FoldSelect);
2729 if (!SrcReg)
2730 return false;
2731
2732 updateValueMap(I, SrcReg);
2733 return true;
2734 }
2735
2736 // Emit the cmp.
2737 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2738 return false;
2739
2740 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2741 CC = getCompareCC(Predicate);
2742 switch (Predicate) {
2743 default:
2744 break;
2745 case CmpInst::FCMP_UEQ:
2746 ExtraCC = AArch64CC::EQ;
2747 CC = AArch64CC::VS;
2748 break;
2749 case CmpInst::FCMP_ONE:
2750 ExtraCC = AArch64CC::MI;
2751 CC = AArch64CC::GT;
2752 break;
2753 }
2754 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2755 } else {
2756 Register CondReg = getRegForValue(Cond);
2757 if (!CondReg)
2758 return false;
2759
2760 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2761 CondReg = constrainOperandRegClass(II, CondReg, 1);
2762
2763 // Emit a TST instruction (ANDS wzr, reg, #imm).
2764 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2765 AArch64::WZR)
2766 .addReg(CondReg)
2768 }
2769
2770 Register Src1Reg = getRegForValue(SI->getTrueValue());
2771 Register Src2Reg = getRegForValue(SI->getFalseValue());
2772
2773 if (!Src1Reg || !Src2Reg)
2774 return false;
2775
2776 if (ExtraCC != AArch64CC::AL)
2777 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2778
2779 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2780 updateValueMap(I, ResultReg);
2781 return true;
2782}
2783
2784bool AArch64FastISel::selectFPExt(const Instruction *I) {
2785 Value *V = I->getOperand(0);
2786 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2787 return false;
2788
2789 Register Op = getRegForValue(V);
2790 if (Op == 0)
2791 return false;
2792
2793 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2794 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2795 ResultReg).addReg(Op);
2796 updateValueMap(I, ResultReg);
2797 return true;
2798}
2799
2800bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2801 Value *V = I->getOperand(0);
2802 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2803 return false;
2804
2805 Register Op = getRegForValue(V);
2806 if (Op == 0)
2807 return false;
2808
2809 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2810 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2811 ResultReg).addReg(Op);
2812 updateValueMap(I, ResultReg);
2813 return true;
2814}
2815
2816// FPToUI and FPToSI
2817bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2818 MVT DestVT;
2819 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2820 return false;
2821
2822 Register SrcReg = getRegForValue(I->getOperand(0));
2823 if (!SrcReg)
2824 return false;
2825
2826 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2827 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2828 return false;
2829
2830 unsigned Opc;
2831 if (SrcVT == MVT::f64) {
2832 if (Signed)
2833 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2834 else
2835 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2836 } else {
2837 if (Signed)
2838 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2839 else
2840 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2841 }
2842 Register ResultReg = createResultReg(
2843 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2844 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2845 .addReg(SrcReg);
2846 updateValueMap(I, ResultReg);
2847 return true;
2848}
2849
2850bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2851 MVT DestVT;
2852 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2853 return false;
2854 // Let regular ISEL handle FP16
2855 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2856 return false;
2857
2858 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2859 "Unexpected value type.");
2860
2861 Register SrcReg = getRegForValue(I->getOperand(0));
2862 if (!SrcReg)
2863 return false;
2864
2865 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2866
2867 // Handle sign-extension.
2868 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2869 SrcReg =
2870 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2871 if (!SrcReg)
2872 return false;
2873 }
2874
2875 unsigned Opc;
2876 if (SrcVT == MVT::i64) {
2877 if (Signed)
2878 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2879 else
2880 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2881 } else {
2882 if (Signed)
2883 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2884 else
2885 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2886 }
2887
2888 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2889 updateValueMap(I, ResultReg);
2890 return true;
2891}
2892
2893bool AArch64FastISel::fastLowerArguments() {
2894 if (!FuncInfo.CanLowerReturn)
2895 return false;
2896
2897 const Function *F = FuncInfo.Fn;
2898 if (F->isVarArg())
2899 return false;
2900
2901 CallingConv::ID CC = F->getCallingConv();
2902 if (CC != CallingConv::C && CC != CallingConv::Swift)
2903 return false;
2904
2905 if (Subtarget->hasCustomCallingConv())
2906 return false;
2907
2908 // Only handle simple cases of up to 8 GPR and FPR each.
2909 unsigned GPRCnt = 0;
2910 unsigned FPRCnt = 0;
2911 for (auto const &Arg : F->args()) {
2912 if (Arg.hasAttribute(Attribute::ByVal) ||
2913 Arg.hasAttribute(Attribute::InReg) ||
2914 Arg.hasAttribute(Attribute::StructRet) ||
2915 Arg.hasAttribute(Attribute::SwiftSelf) ||
2916 Arg.hasAttribute(Attribute::SwiftAsync) ||
2917 Arg.hasAttribute(Attribute::SwiftError) ||
2918 Arg.hasAttribute(Attribute::Nest))
2919 return false;
2920
2921 Type *ArgTy = Arg.getType();
2922 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2923 return false;
2924
2925 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2926 if (!ArgVT.isSimple())
2927 return false;
2928
2929 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2930 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2931 return false;
2932
2933 if (VT.isVector() &&
2934 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2935 return false;
2936
2937 if (VT >= MVT::i1 && VT <= MVT::i64)
2938 ++GPRCnt;
2939 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2940 VT.is128BitVector())
2941 ++FPRCnt;
2942 else
2943 return false;
2944
2945 if (GPRCnt > 8 || FPRCnt > 8)
2946 return false;
2947 }
2948
2949 static const MCPhysReg Registers[6][8] = {
2950 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2951 AArch64::W5, AArch64::W6, AArch64::W7 },
2952 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2953 AArch64::X5, AArch64::X6, AArch64::X7 },
2954 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2955 AArch64::H5, AArch64::H6, AArch64::H7 },
2956 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2957 AArch64::S5, AArch64::S6, AArch64::S7 },
2958 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2959 AArch64::D5, AArch64::D6, AArch64::D7 },
2960 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2961 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2962 };
2963
2964 unsigned GPRIdx = 0;
2965 unsigned FPRIdx = 0;
2966 for (auto const &Arg : F->args()) {
2967 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2968 unsigned SrcReg;
2969 const TargetRegisterClass *RC;
2970 if (VT >= MVT::i1 && VT <= MVT::i32) {
2971 SrcReg = Registers[0][GPRIdx++];
2972 RC = &AArch64::GPR32RegClass;
2973 VT = MVT::i32;
2974 } else if (VT == MVT::i64) {
2975 SrcReg = Registers[1][GPRIdx++];
2976 RC = &AArch64::GPR64RegClass;
2977 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2978 SrcReg = Registers[2][FPRIdx++];
2979 RC = &AArch64::FPR16RegClass;
2980 } else if (VT == MVT::f32) {
2981 SrcReg = Registers[3][FPRIdx++];
2982 RC = &AArch64::FPR32RegClass;
2983 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2984 SrcReg = Registers[4][FPRIdx++];
2985 RC = &AArch64::FPR64RegClass;
2986 } else if (VT.is128BitVector()) {
2987 SrcReg = Registers[5][FPRIdx++];
2988 RC = &AArch64::FPR128RegClass;
2989 } else
2990 llvm_unreachable("Unexpected value type.");
2991
2992 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2993 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2994 // Without this, EmitLiveInCopies may eliminate the livein if its only
2995 // use is a bitcast (which isn't turned into an instruction).
2996 Register ResultReg = createResultReg(RC);
2997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2998 TII.get(TargetOpcode::COPY), ResultReg)
2999 .addReg(DstReg, getKillRegState(true));
3000 updateValueMap(&Arg, ResultReg);
3001 }
3002 return true;
3003}
3004
3005bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3006 SmallVectorImpl<MVT> &OutVTs,
3007 SmallVectorImpl<Type *> &OrigTys,
3008 unsigned &NumBytes) {
3009 CallingConv::ID CC = CLI.CallConv;
3011 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3012 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3013 CCAssignFnForCall(CC));
3014
3015 // Get a count of how many bytes are to be pushed on the stack.
3016 NumBytes = CCInfo.getStackSize();
3017
3018 // Issue CALLSEQ_START
3019 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3021 .addImm(NumBytes).addImm(0);
3022
3023 // Process the args.
3024 for (CCValAssign &VA : ArgLocs) {
3025 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3026 MVT ArgVT = OutVTs[VA.getValNo()];
3027
3028 Register ArgReg = getRegForValue(ArgVal);
3029 if (!ArgReg)
3030 return false;
3031
3032 // Handle arg promotion: SExt, ZExt, AExt.
3033 switch (VA.getLocInfo()) {
3034 case CCValAssign::Full:
3035 break;
3036 case CCValAssign::SExt: {
3037 MVT DestVT = VA.getLocVT();
3038 MVT SrcVT = ArgVT;
3039 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3040 if (!ArgReg)
3041 return false;
3042 break;
3043 }
3044 case CCValAssign::AExt:
3045 // Intentional fall-through.
3046 case CCValAssign::ZExt: {
3047 MVT DestVT = VA.getLocVT();
3048 MVT SrcVT = ArgVT;
3049 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3050 if (!ArgReg)
3051 return false;
3052 break;
3053 }
3054 default:
3055 llvm_unreachable("Unknown arg promotion!");
3056 }
3057
3058 // Now copy/store arg to correct locations.
3059 if (VA.isRegLoc() && !VA.needsCustom()) {
3060 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3061 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3062 CLI.OutRegs.push_back(VA.getLocReg());
3063 } else if (VA.needsCustom()) {
3064 // FIXME: Handle custom args.
3065 return false;
3066 } else {
3067 assert(VA.isMemLoc() && "Assuming store on stack.");
3068
3069 // Don't emit stores for undef values.
3070 if (isa<UndefValue>(ArgVal))
3071 continue;
3072
3073 // Need to store on the stack.
3074 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3075
3076 unsigned BEAlign = 0;
3077 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3078 BEAlign = 8 - ArgSize;
3079
3080 Address Addr;
3081 Addr.setKind(Address::RegBase);
3082 Addr.setReg(AArch64::SP);
3083 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3084
3085 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3086 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3087 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3088 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3089
3090 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3091 return false;
3092 }
3093 }
3094 return true;
3095}
3096
3097bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3098 CallingConv::ID CC = CLI.CallConv;
3099
3100 // Issue CALLSEQ_END
3101 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3102 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3103 .addImm(NumBytes).addImm(0);
3104
3105 // Now the return values.
3107 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3108 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3109
3110 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3111 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3112 CCValAssign &VA = RVLocs[i];
3113 MVT CopyVT = VA.getValVT();
3114 Register CopyReg = ResultReg + i;
3115
3116 // TODO: Handle big-endian results
3117 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3118 return false;
3119
3120 // Copy result out of their specified physreg.
3121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3122 CopyReg)
3123 .addReg(VA.getLocReg());
3124 CLI.InRegs.push_back(VA.getLocReg());
3125 }
3126
3127 CLI.ResultReg = ResultReg;
3128 CLI.NumResultRegs = RVLocs.size();
3129
3130 return true;
3131}
3132
3133bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3134 CallingConv::ID CC = CLI.CallConv;
3135 bool IsTailCall = CLI.IsTailCall;
3136 bool IsVarArg = CLI.IsVarArg;
3137 const Value *Callee = CLI.Callee;
3138 MCSymbol *Symbol = CLI.Symbol;
3139
3140 if (!Callee && !Symbol)
3141 return false;
3142
3143 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3144 // a bti instruction following the call.
3145 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3146 !Subtarget->noBTIAtReturnTwice() &&
3147 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3148 return false;
3149
3150 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3151 if (CLI.CB && CLI.CB->isIndirectCall() &&
3152 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3153 return false;
3154
3155 // Allow SelectionDAG isel to handle tail calls.
3156 if (IsTailCall)
3157 return false;
3158
3159 // FIXME: we could and should support this, but for now correctness at -O0 is
3160 // more important.
3161 if (Subtarget->isTargetILP32())
3162 return false;
3163
3164 CodeModel::Model CM = TM.getCodeModel();
3165 // Only support the small-addressing and large code models.
3166 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3167 return false;
3168
3169 // FIXME: Add large code model support for ELF.
3170 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3171 return false;
3172
3173 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3174 // attribute. Check "RtLibUseGOT" instead.
3175 if (MF->getFunction().getParent()->getRtLibUseGOT())
3176 return false;
3177
3178 // Let SDISel handle vararg functions.
3179 if (IsVarArg)
3180 return false;
3181
3182 if (Subtarget->isWindowsArm64EC())
3183 return false;
3184
3185 for (auto Flag : CLI.OutFlags)
3186 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3187 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3188 return false;
3189
3190 // Set up the argument vectors.
3191 SmallVector<MVT, 16> OutVTs;
3193 OutVTs.reserve(CLI.OutVals.size());
3194
3195 for (auto *Val : CLI.OutVals) {
3196 MVT VT;
3197 if (!isTypeLegal(Val->getType(), VT) &&
3198 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3199 return false;
3200
3201 // We don't handle vector parameters yet.
3202 if (VT.isVector() || VT.getSizeInBits() > 64)
3203 return false;
3204
3205 OutVTs.push_back(VT);
3206 OrigTys.push_back(Val->getType());
3207 }
3208
3209 Address Addr;
3210 if (Callee && !computeCallAddress(Callee, Addr))
3211 return false;
3212
3213 // The weak function target may be zero; in that case we must use indirect
3214 // addressing via a stub on windows as it may be out of range for a
3215 // PC-relative jump.
3216 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3217 Addr.getGlobalValue()->hasExternalWeakLinkage())
3218 return false;
3219
3220 // Handle the arguments now that we've gotten them.
3221 unsigned NumBytes;
3222 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3223 return false;
3224
3225 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3226 if (RegInfo->isAnyArgRegReserved(*MF))
3227 RegInfo->emitReservedArgRegCallError(*MF);
3228
3229 // Issue the call.
3230 MachineInstrBuilder MIB;
3231 if (Subtarget->useSmallAddressing()) {
3232 const MCInstrDesc &II =
3233 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3234 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3235 if (Symbol)
3236 MIB.addSym(Symbol, 0);
3237 else if (Addr.getGlobalValue())
3238 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3239 else if (Addr.getReg()) {
3240 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3241 MIB.addReg(Reg);
3242 } else
3243 return false;
3244 } else {
3245 Register CallReg;
3246 if (Symbol) {
3247 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3249 ADRPReg)
3251
3252 CallReg = createResultReg(&AArch64::GPR64RegClass);
3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3254 TII.get(AArch64::LDRXui), CallReg)
3255 .addReg(ADRPReg)
3256 .addSym(Symbol,
3258 } else if (Addr.getGlobalValue())
3259 CallReg = materializeGV(Addr.getGlobalValue());
3260 else if (Addr.getReg())
3261 CallReg = Addr.getReg();
3262
3263 if (!CallReg)
3264 return false;
3265
3266 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3267 CallReg = constrainOperandRegClass(II, CallReg, 0);
3268 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3269 }
3270
3271 // Add implicit physical register uses to the call.
3272 for (auto Reg : CLI.OutRegs)
3273 MIB.addReg(Reg, RegState::Implicit);
3274
3275 // Add a register mask with the call-preserved registers.
3276 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3277 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3278
3279 CLI.Call = MIB;
3280
3281 // Finish off the call including any return values.
3282 return finishCall(CLI, NumBytes);
3283}
3284
3285bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3286 if (Alignment)
3287 return Len / Alignment->value() <= 4;
3288 else
3289 return Len < 32;
3290}
3291
3292bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3293 uint64_t Len, MaybeAlign Alignment) {
3294 // Make sure we don't bloat code by inlining very large memcpy's.
3295 if (!isMemCpySmall(Len, Alignment))
3296 return false;
3297
3298 int64_t UnscaledOffset = 0;
3299 Address OrigDest = Dest;
3300 Address OrigSrc = Src;
3301
3302 while (Len) {
3303 MVT VT;
3304 if (!Alignment || *Alignment >= 8) {
3305 if (Len >= 8)
3306 VT = MVT::i64;
3307 else if (Len >= 4)
3308 VT = MVT::i32;
3309 else if (Len >= 2)
3310 VT = MVT::i16;
3311 else {
3312 VT = MVT::i8;
3313 }
3314 } else {
3315 assert(Alignment && "Alignment is set in this branch");
3316 // Bound based on alignment.
3317 if (Len >= 4 && *Alignment == 4)
3318 VT = MVT::i32;
3319 else if (Len >= 2 && *Alignment == 2)
3320 VT = MVT::i16;
3321 else {
3322 VT = MVT::i8;
3323 }
3324 }
3325
3326 Register ResultReg = emitLoad(VT, VT, Src);
3327 if (!ResultReg)
3328 return false;
3329
3330 if (!emitStore(VT, ResultReg, Dest))
3331 return false;
3332
3333 int64_t Size = VT.getSizeInBits() / 8;
3334 Len -= Size;
3335 UnscaledOffset += Size;
3336
3337 // We need to recompute the unscaled offset for each iteration.
3338 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3339 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3340 }
3341
3342 return true;
3343}
3344
3345/// Check if it is possible to fold the condition from the XALU intrinsic
3346/// into the user. The condition code will only be updated on success.
3347bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3348 const Instruction *I,
3349 const Value *Cond) {
3351 return false;
3352
3353 const auto *EV = cast<ExtractValueInst>(Cond);
3354 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3355 return false;
3356
3357 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3358 MVT RetVT;
3359 const Function *Callee = II->getCalledFunction();
3360 Type *RetTy =
3361 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3362 if (!isTypeLegal(RetTy, RetVT))
3363 return false;
3364
3365 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3366 return false;
3367
3368 const Value *LHS = II->getArgOperand(0);
3369 const Value *RHS = II->getArgOperand(1);
3370
3371 // Canonicalize immediate to the RHS.
3372 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3373 std::swap(LHS, RHS);
3374
3375 // Simplify multiplies.
3376 Intrinsic::ID IID = II->getIntrinsicID();
3377 switch (IID) {
3378 default:
3379 break;
3380 case Intrinsic::smul_with_overflow:
3381 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382 if (C->getValue() == 2)
3383 IID = Intrinsic::sadd_with_overflow;
3384 break;
3385 case Intrinsic::umul_with_overflow:
3386 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387 if (C->getValue() == 2)
3388 IID = Intrinsic::uadd_with_overflow;
3389 break;
3390 }
3391
3392 AArch64CC::CondCode TmpCC;
3393 switch (IID) {
3394 default:
3395 return false;
3396 case Intrinsic::sadd_with_overflow:
3397 case Intrinsic::ssub_with_overflow:
3398 TmpCC = AArch64CC::VS;
3399 break;
3400 case Intrinsic::uadd_with_overflow:
3401 TmpCC = AArch64CC::HS;
3402 break;
3403 case Intrinsic::usub_with_overflow:
3404 TmpCC = AArch64CC::LO;
3405 break;
3406 case Intrinsic::smul_with_overflow:
3407 case Intrinsic::umul_with_overflow:
3408 TmpCC = AArch64CC::NE;
3409 break;
3410 }
3411
3412 // Check if both instructions are in the same basic block.
3413 if (!isValueAvailable(II))
3414 return false;
3415
3416 // Make sure nothing is in the way
3419 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3420 // We only expect extractvalue instructions between the intrinsic and the
3421 // instruction to be selected.
3422 if (!isa<ExtractValueInst>(Itr))
3423 return false;
3424
3425 // Check that the extractvalue operand comes from the intrinsic.
3426 const auto *EVI = cast<ExtractValueInst>(Itr);
3427 if (EVI->getAggregateOperand() != II)
3428 return false;
3429 }
3430
3431 CC = TmpCC;
3432 return true;
3433}
3434
3435bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3436 // FIXME: Handle more intrinsics.
3437 switch (II->getIntrinsicID()) {
3438 default: return false;
3439 case Intrinsic::frameaddress: {
3440 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3441 MFI.setFrameAddressIsTaken(true);
3442
3443 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3444 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3445 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3447 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3448 // Recursively load frame address
3449 // ldr x0, [fp]
3450 // ldr x0, [x0]
3451 // ldr x0, [x0]
3452 // ...
3453 Register DestReg;
3454 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3455 while (Depth--) {
3456 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3457 SrcReg, 0);
3458 assert(DestReg && "Unexpected LDR instruction emission failure.");
3459 SrcReg = DestReg;
3460 }
3461
3462 updateValueMap(II, SrcReg);
3463 return true;
3464 }
3465 case Intrinsic::sponentry: {
3466 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3467
3468 // SP = FP + Fixed Object + 16
3469 int FI = MFI.CreateFixedObject(4, 0, false);
3470 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3472 TII.get(AArch64::ADDXri), ResultReg)
3473 .addFrameIndex(FI)
3474 .addImm(0)
3475 .addImm(0);
3476
3477 updateValueMap(II, ResultReg);
3478 return true;
3479 }
3480 case Intrinsic::memcpy:
3481 case Intrinsic::memmove: {
3482 const auto *MTI = cast<MemTransferInst>(II);
3483 // Don't handle volatile.
3484 if (MTI->isVolatile())
3485 return false;
3486
3487 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3488 // we would emit dead code because we don't currently handle memmoves.
3489 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3490 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3491 // Small memcpy's are common enough that we want to do them without a call
3492 // if possible.
3493 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3494 MaybeAlign Alignment;
3495 if (MTI->getDestAlign() || MTI->getSourceAlign())
3496 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3497 MTI->getSourceAlign().valueOrOne());
3498 if (isMemCpySmall(Len, Alignment)) {
3499 Address Dest, Src;
3500 if (!computeAddress(MTI->getRawDest(), Dest) ||
3501 !computeAddress(MTI->getRawSource(), Src))
3502 return false;
3503 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3504 return true;
3505 }
3506 }
3507
3508 if (!MTI->getLength()->getType()->isIntegerTy(64))
3509 return false;
3510
3511 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3512 // Fast instruction selection doesn't support the special
3513 // address spaces.
3514 return false;
3515
3516 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3517 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3518 }
3519 case Intrinsic::memset: {
3520 const MemSetInst *MSI = cast<MemSetInst>(II);
3521 // Don't handle volatile.
3522 if (MSI->isVolatile())
3523 return false;
3524
3525 if (!MSI->getLength()->getType()->isIntegerTy(64))
3526 return false;
3527
3528 if (MSI->getDestAddressSpace() > 255)
3529 // Fast instruction selection doesn't support the special
3530 // address spaces.
3531 return false;
3532
3533 return lowerCallTo(II, "memset", II->arg_size() - 1);
3534 }
3535 case Intrinsic::sin:
3536 case Intrinsic::cos:
3537 case Intrinsic::tan:
3538 case Intrinsic::pow: {
3539 MVT RetVT;
3540 if (!isTypeLegal(II->getType(), RetVT))
3541 return false;
3542
3543 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3544 return false;
3545
3546 static const RTLIB::Libcall LibCallTable[4][2] = {
3547 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3548 {RTLIB::COS_F32, RTLIB::COS_F64},
3549 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3550 {RTLIB::POW_F32, RTLIB::POW_F64}};
3551 RTLIB::Libcall LC;
3552 bool Is64Bit = RetVT == MVT::f64;
3553 switch (II->getIntrinsicID()) {
3554 default:
3555 llvm_unreachable("Unexpected intrinsic.");
3556 case Intrinsic::sin:
3557 LC = LibCallTable[0][Is64Bit];
3558 break;
3559 case Intrinsic::cos:
3560 LC = LibCallTable[1][Is64Bit];
3561 break;
3562 case Intrinsic::tan:
3563 LC = LibCallTable[2][Is64Bit];
3564 break;
3565 case Intrinsic::pow:
3566 LC = LibCallTable[3][Is64Bit];
3567 break;
3568 }
3569
3570 ArgListTy Args;
3571 Args.reserve(II->arg_size());
3572
3573 // Populate the argument list.
3574 for (auto &Arg : II->args())
3575 Args.emplace_back(Arg);
3576
3577 CallLoweringInfo CLI;
3578 MCContext &Ctx = MF->getContext();
3579
3580 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(LC);
3581 if (LCImpl == RTLIB::Unsupported)
3582 return false;
3583
3584 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
3585 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
3586 CLI.setCallee(DL, Ctx, CC, II->getType(), FuncName, std::move(Args));
3587 if (!lowerCallTo(CLI))
3588 return false;
3589 updateValueMap(II, CLI.ResultReg);
3590 return true;
3591 }
3592 case Intrinsic::fabs: {
3593 MVT VT;
3594 if (!isTypeLegal(II->getType(), VT))
3595 return false;
3596
3597 unsigned Opc;
3598 switch (VT.SimpleTy) {
3599 default:
3600 return false;
3601 case MVT::f32:
3602 Opc = AArch64::FABSSr;
3603 break;
3604 case MVT::f64:
3605 Opc = AArch64::FABSDr;
3606 break;
3607 }
3608 Register SrcReg = getRegForValue(II->getOperand(0));
3609 if (!SrcReg)
3610 return false;
3611 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3612 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3613 .addReg(SrcReg);
3614 updateValueMap(II, ResultReg);
3615 return true;
3616 }
3617 case Intrinsic::trap:
3618 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3619 .addImm(1);
3620 return true;
3621 case Intrinsic::debugtrap:
3622 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3623 .addImm(0xF000);
3624 return true;
3625
3626 case Intrinsic::sqrt: {
3627 Type *RetTy = II->getCalledFunction()->getReturnType();
3628
3629 MVT VT;
3630 if (!isTypeLegal(RetTy, VT))
3631 return false;
3632
3633 Register Op0Reg = getRegForValue(II->getOperand(0));
3634 if (!Op0Reg)
3635 return false;
3636
3637 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3638 if (!ResultReg)
3639 return false;
3640
3641 updateValueMap(II, ResultReg);
3642 return true;
3643 }
3644 case Intrinsic::sadd_with_overflow:
3645 case Intrinsic::uadd_with_overflow:
3646 case Intrinsic::ssub_with_overflow:
3647 case Intrinsic::usub_with_overflow:
3648 case Intrinsic::smul_with_overflow:
3649 case Intrinsic::umul_with_overflow: {
3650 // This implements the basic lowering of the xalu with overflow intrinsics.
3651 const Function *Callee = II->getCalledFunction();
3652 auto *Ty = cast<StructType>(Callee->getReturnType());
3653 Type *RetTy = Ty->getTypeAtIndex(0U);
3654
3655 MVT VT;
3656 if (!isTypeLegal(RetTy, VT))
3657 return false;
3658
3659 if (VT != MVT::i32 && VT != MVT::i64)
3660 return false;
3661
3662 const Value *LHS = II->getArgOperand(0);
3663 const Value *RHS = II->getArgOperand(1);
3664 // Canonicalize immediate to the RHS.
3665 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3666 std::swap(LHS, RHS);
3667
3668 // Simplify multiplies.
3669 Intrinsic::ID IID = II->getIntrinsicID();
3670 switch (IID) {
3671 default:
3672 break;
3673 case Intrinsic::smul_with_overflow:
3674 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3675 if (C->getValue() == 2) {
3676 IID = Intrinsic::sadd_with_overflow;
3677 RHS = LHS;
3678 }
3679 break;
3680 case Intrinsic::umul_with_overflow:
3681 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3682 if (C->getValue() == 2) {
3683 IID = Intrinsic::uadd_with_overflow;
3684 RHS = LHS;
3685 }
3686 break;
3687 }
3688
3689 Register ResultReg1, ResultReg2, MulReg;
3691 switch (IID) {
3692 default: llvm_unreachable("Unexpected intrinsic!");
3693 case Intrinsic::sadd_with_overflow:
3694 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3695 CC = AArch64CC::VS;
3696 break;
3697 case Intrinsic::uadd_with_overflow:
3698 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3699 CC = AArch64CC::HS;
3700 break;
3701 case Intrinsic::ssub_with_overflow:
3702 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3703 CC = AArch64CC::VS;
3704 break;
3705 case Intrinsic::usub_with_overflow:
3706 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3707 CC = AArch64CC::LO;
3708 break;
3709 case Intrinsic::smul_with_overflow: {
3710 CC = AArch64CC::NE;
3711 Register LHSReg = getRegForValue(LHS);
3712 if (!LHSReg)
3713 return false;
3714
3715 Register RHSReg = getRegForValue(RHS);
3716 if (!RHSReg)
3717 return false;
3718
3719 if (VT == MVT::i32) {
3720 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3721 Register MulSubReg =
3722 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3723 // cmp xreg, wreg, sxtw
3724 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3725 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3726 /*WantResult=*/false);
3727 MulReg = MulSubReg;
3728 } else {
3729 assert(VT == MVT::i64 && "Unexpected value type.");
3730 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3731 // reused in the next instruction.
3732 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3733 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3734 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3735 /*WantResult=*/false);
3736 }
3737 break;
3738 }
3739 case Intrinsic::umul_with_overflow: {
3740 CC = AArch64CC::NE;
3741 Register LHSReg = getRegForValue(LHS);
3742 if (!LHSReg)
3743 return false;
3744
3745 Register RHSReg = getRegForValue(RHS);
3746 if (!RHSReg)
3747 return false;
3748
3749 if (VT == MVT::i32) {
3750 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3751 // tst xreg, #0xffffffff00000000
3752 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3753 TII.get(AArch64::ANDSXri), AArch64::XZR)
3754 .addReg(MulReg)
3755 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3756 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3757 } else {
3758 assert(VT == MVT::i64 && "Unexpected value type.");
3759 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3760 // reused in the next instruction.
3761 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3762 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3763 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3764 }
3765 break;
3766 }
3767 }
3768
3769 if (MulReg) {
3770 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3771 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3772 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3773 }
3774
3775 if (!ResultReg1)
3776 return false;
3777
3778 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3779 AArch64::WZR, AArch64::WZR,
3780 getInvertedCondCode(CC));
3781 (void)ResultReg2;
3782 assert((ResultReg1 + 1) == ResultReg2 &&
3783 "Nonconsecutive result registers.");
3784 updateValueMap(II, ResultReg1, 2);
3785 return true;
3786 }
3787 case Intrinsic::aarch64_crc32b:
3788 case Intrinsic::aarch64_crc32h:
3789 case Intrinsic::aarch64_crc32w:
3790 case Intrinsic::aarch64_crc32x:
3791 case Intrinsic::aarch64_crc32cb:
3792 case Intrinsic::aarch64_crc32ch:
3793 case Intrinsic::aarch64_crc32cw:
3794 case Intrinsic::aarch64_crc32cx: {
3795 if (!Subtarget->hasCRC())
3796 return false;
3797
3798 unsigned Opc;
3799 switch (II->getIntrinsicID()) {
3800 default:
3801 llvm_unreachable("Unexpected intrinsic!");
3802 case Intrinsic::aarch64_crc32b:
3803 Opc = AArch64::CRC32Brr;
3804 break;
3805 case Intrinsic::aarch64_crc32h:
3806 Opc = AArch64::CRC32Hrr;
3807 break;
3808 case Intrinsic::aarch64_crc32w:
3809 Opc = AArch64::CRC32Wrr;
3810 break;
3811 case Intrinsic::aarch64_crc32x:
3812 Opc = AArch64::CRC32Xrr;
3813 break;
3814 case Intrinsic::aarch64_crc32cb:
3815 Opc = AArch64::CRC32CBrr;
3816 break;
3817 case Intrinsic::aarch64_crc32ch:
3818 Opc = AArch64::CRC32CHrr;
3819 break;
3820 case Intrinsic::aarch64_crc32cw:
3821 Opc = AArch64::CRC32CWrr;
3822 break;
3823 case Intrinsic::aarch64_crc32cx:
3824 Opc = AArch64::CRC32CXrr;
3825 break;
3826 }
3827
3828 Register LHSReg = getRegForValue(II->getArgOperand(0));
3829 Register RHSReg = getRegForValue(II->getArgOperand(1));
3830 if (!LHSReg || !RHSReg)
3831 return false;
3832
3833 Register ResultReg =
3834 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3835 updateValueMap(II, ResultReg);
3836 return true;
3837 }
3838 }
3839 return false;
3840}
3841
3842bool AArch64FastISel::selectRet(const Instruction *I) {
3843 const ReturnInst *Ret = cast<ReturnInst>(I);
3844 const Function &F = *I->getParent()->getParent();
3845
3846 if (!FuncInfo.CanLowerReturn)
3847 return false;
3848
3849 if (F.isVarArg())
3850 return false;
3851
3852 if (TLI.supportSwiftError() &&
3853 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3854 return false;
3855
3856 if (TLI.supportSplitCSR(FuncInfo.MF))
3857 return false;
3858
3859 // Build a list of return value registers.
3861
3862 if (Ret->getNumOperands() > 0) {
3863 CallingConv::ID CC = F.getCallingConv();
3865 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3866
3867 // Analyze operands of the call, assigning locations to each operand.
3869 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3870 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3871
3872 // Only handle a single return value for now.
3873 if (ValLocs.size() != 1)
3874 return false;
3875
3876 CCValAssign &VA = ValLocs[0];
3877 const Value *RV = Ret->getOperand(0);
3878
3879 // Don't bother handling odd stuff for now.
3880 if ((VA.getLocInfo() != CCValAssign::Full) &&
3881 (VA.getLocInfo() != CCValAssign::BCvt))
3882 return false;
3883
3884 // Only handle register returns for now.
3885 if (!VA.isRegLoc())
3886 return false;
3887
3888 Register Reg = getRegForValue(RV);
3889 if (!Reg)
3890 return false;
3891
3892 Register SrcReg = Reg + VA.getValNo();
3893 Register DestReg = VA.getLocReg();
3894 // Avoid a cross-class copy. This is very unlikely.
3895 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3896 return false;
3897
3898 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3899 if (!RVEVT.isSimple())
3900 return false;
3901
3902 // Vectors (of > 1 lane) in big endian need tricky handling.
3903 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3904 !Subtarget->isLittleEndian())
3905 return false;
3906
3907 MVT RVVT = RVEVT.getSimpleVT();
3908 if (RVVT == MVT::f128)
3909 return false;
3910
3911 MVT DestVT = VA.getValVT();
3912 // Special handling for extended integers.
3913 if (RVVT != DestVT) {
3914 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3915 return false;
3916
3917 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3918 return false;
3919
3920 bool IsZExt = Outs[0].Flags.isZExt();
3921 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3922 if (!SrcReg)
3923 return false;
3924 }
3925
3926 // "Callee" (i.e. value producer) zero extends pointers at function
3927 // boundary.
3928 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3929 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3930
3931 // Make the copy.
3932 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3933 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3934
3935 // Add register to return instruction.
3936 RetRegs.push_back(VA.getLocReg());
3937 }
3938
3939 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3940 TII.get(AArch64::RET_ReallyLR));
3941 for (Register RetReg : RetRegs)
3942 MIB.addReg(RetReg, RegState::Implicit);
3943 return true;
3944}
3945
3946bool AArch64FastISel::selectTrunc(const Instruction *I) {
3947 Type *DestTy = I->getType();
3948 Value *Op = I->getOperand(0);
3949 Type *SrcTy = Op->getType();
3950
3951 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3952 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3953 if (!SrcEVT.isSimple())
3954 return false;
3955 if (!DestEVT.isSimple())
3956 return false;
3957
3958 MVT SrcVT = SrcEVT.getSimpleVT();
3959 MVT DestVT = DestEVT.getSimpleVT();
3960
3961 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3962 SrcVT != MVT::i8)
3963 return false;
3964 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3965 DestVT != MVT::i1)
3966 return false;
3967
3968 Register SrcReg = getRegForValue(Op);
3969 if (!SrcReg)
3970 return false;
3971
3972 // If we're truncating from i64 to a smaller non-legal type then generate an
3973 // AND. Otherwise, we know the high bits are undefined and a truncate only
3974 // generate a COPY. We cannot mark the source register also as result
3975 // register, because this can incorrectly transfer the kill flag onto the
3976 // source register.
3977 Register ResultReg;
3978 if (SrcVT == MVT::i64) {
3979 uint64_t Mask = 0;
3980 switch (DestVT.SimpleTy) {
3981 default:
3982 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3983 return false;
3984 case MVT::i1:
3985 Mask = 0x1;
3986 break;
3987 case MVT::i8:
3988 Mask = 0xff;
3989 break;
3990 case MVT::i16:
3991 Mask = 0xffff;
3992 break;
3993 }
3994 // Issue an extract_subreg to get the lower 32-bits.
3995 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3996 AArch64::sub_32);
3997 // Create the AND instruction which performs the actual truncation.
3998 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3999 assert(ResultReg && "Unexpected AND instruction emission failure.");
4000 } else {
4001 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4002 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4003 TII.get(TargetOpcode::COPY), ResultReg)
4004 .addReg(SrcReg);
4005 }
4006
4007 updateValueMap(I, ResultReg);
4008 return true;
4009}
4010
4011Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4012 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4013 DestVT == MVT::i64) &&
4014 "Unexpected value type.");
4015 // Handle i8 and i16 as i32.
4016 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4017 DestVT = MVT::i32;
4018
4019 if (IsZExt) {
4020 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4021 assert(ResultReg && "Unexpected AND instruction emission failure.");
4022 if (DestVT == MVT::i64) {
4023 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4024 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4025 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4026 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4027 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4028 .addReg(ResultReg)
4029 .addImm(AArch64::sub_32);
4030 ResultReg = Reg64;
4031 }
4032 return ResultReg;
4033 } else {
4034 if (DestVT == MVT::i64) {
4035 // FIXME: We're SExt i1 to i64.
4036 return Register();
4037 }
4038 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4039 0, 0);
4040 }
4041}
4042
4043Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4044 unsigned Opc;
4045 Register ZReg;
4046 switch (RetVT.SimpleTy) {
4047 default:
4048 return Register();
4049 case MVT::i8:
4050 case MVT::i16:
4051 case MVT::i32:
4052 RetVT = MVT::i32;
4053 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4054 case MVT::i64:
4055 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4056 }
4057
4058 const TargetRegisterClass *RC =
4059 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4060 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4061}
4062
4063Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4064 if (RetVT != MVT::i64)
4065 return Register();
4066
4067 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4068 Op0, Op1, AArch64::XZR);
4069}
4070
4071Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4072 if (RetVT != MVT::i64)
4073 return Register();
4074
4075 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4076 Op0, Op1, AArch64::XZR);
4077}
4078
4079Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4080 Register Op1Reg) {
4081 unsigned Opc = 0;
4082 bool NeedTrunc = false;
4083 uint64_t Mask = 0;
4084 switch (RetVT.SimpleTy) {
4085 default:
4086 return Register();
4087 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4088 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4089 case MVT::i32: Opc = AArch64::LSLVWr; break;
4090 case MVT::i64: Opc = AArch64::LSLVXr; break;
4091 }
4092
4093 const TargetRegisterClass *RC =
4094 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4095 if (NeedTrunc)
4096 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4097
4098 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4099 if (NeedTrunc)
4100 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4101 return ResultReg;
4102}
4103
4104Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4105 uint64_t Shift, bool IsZExt) {
4106 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4107 "Unexpected source/return type pair.");
4108 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4109 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4110 "Unexpected source value type.");
4111 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4112 RetVT == MVT::i64) && "Unexpected return value type.");
4113
4114 bool Is64Bit = (RetVT == MVT::i64);
4115 unsigned RegSize = Is64Bit ? 64 : 32;
4116 unsigned DstBits = RetVT.getSizeInBits();
4117 unsigned SrcBits = SrcVT.getSizeInBits();
4118 const TargetRegisterClass *RC =
4119 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4120
4121 // Just emit a copy for "zero" shifts.
4122 if (Shift == 0) {
4123 if (RetVT == SrcVT) {
4124 Register ResultReg = createResultReg(RC);
4125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4126 TII.get(TargetOpcode::COPY), ResultReg)
4127 .addReg(Op0);
4128 return ResultReg;
4129 } else
4130 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4131 }
4132
4133 // Don't deal with undefined shifts.
4134 if (Shift >= DstBits)
4135 return Register();
4136
4137 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4138 // {S|U}BFM Wd, Wn, #r, #s
4139 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4140
4141 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4142 // %2 = shl i16 %1, 4
4143 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4144 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4145 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4146 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4147
4148 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4149 // %2 = shl i16 %1, 8
4150 // Wd<32+7-24,32-24> = Wn<7:0>
4151 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4152 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4153 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4154
4155 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4156 // %2 = shl i16 %1, 12
4157 // Wd<32+3-20,32-20> = Wn<3:0>
4158 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4159 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4160 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4161
4162 unsigned ImmR = RegSize - Shift;
4163 // Limit the width to the length of the source type.
4164 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4165 static const unsigned OpcTable[2][2] = {
4166 {AArch64::SBFMWri, AArch64::SBFMXri},
4167 {AArch64::UBFMWri, AArch64::UBFMXri}
4168 };
4169 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4170 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4171 Register TmpReg = MRI.createVirtualRegister(RC);
4172 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4173 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4174 .addReg(Op0)
4175 .addImm(AArch64::sub_32);
4176 Op0 = TmpReg;
4177 }
4178 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4179}
4180
4181Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4182 Register Op1Reg) {
4183 unsigned Opc = 0;
4184 bool NeedTrunc = false;
4185 uint64_t Mask = 0;
4186 switch (RetVT.SimpleTy) {
4187 default:
4188 return Register();
4189 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4190 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4191 case MVT::i32: Opc = AArch64::LSRVWr; break;
4192 case MVT::i64: Opc = AArch64::LSRVXr; break;
4193 }
4194
4195 const TargetRegisterClass *RC =
4196 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4197 if (NeedTrunc) {
4198 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4199 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4200 }
4201 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4202 if (NeedTrunc)
4203 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4204 return ResultReg;
4205}
4206
4207Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4208 uint64_t Shift, bool IsZExt) {
4209 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4210 "Unexpected source/return type pair.");
4211 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4212 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4213 "Unexpected source value type.");
4214 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4215 RetVT == MVT::i64) && "Unexpected return value type.");
4216
4217 bool Is64Bit = (RetVT == MVT::i64);
4218 unsigned RegSize = Is64Bit ? 64 : 32;
4219 unsigned DstBits = RetVT.getSizeInBits();
4220 unsigned SrcBits = SrcVT.getSizeInBits();
4221 const TargetRegisterClass *RC =
4222 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4223
4224 // Just emit a copy for "zero" shifts.
4225 if (Shift == 0) {
4226 if (RetVT == SrcVT) {
4227 Register ResultReg = createResultReg(RC);
4228 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4229 TII.get(TargetOpcode::COPY), ResultReg)
4230 .addReg(Op0);
4231 return ResultReg;
4232 } else
4233 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4234 }
4235
4236 // Don't deal with undefined shifts.
4237 if (Shift >= DstBits)
4238 return Register();
4239
4240 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4241 // {S|U}BFM Wd, Wn, #r, #s
4242 // Wd<s-r:0> = Wn<s:r> when r <= s
4243
4244 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4245 // %2 = lshr i16 %1, 4
4246 // Wd<7-4:0> = Wn<7:4>
4247 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4250
4251 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4252 // %2 = lshr i16 %1, 8
4253 // Wd<7-7,0> = Wn<7:7>
4254 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4257
4258 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4259 // %2 = lshr i16 %1, 12
4260 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4261 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4262 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4264
4265 if (Shift >= SrcBits && IsZExt)
4266 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4267
4268 // It is not possible to fold a sign-extend into the LShr instruction. In this
4269 // case emit a sign-extend.
4270 if (!IsZExt) {
4271 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4272 if (!Op0)
4273 return Register();
4274 SrcVT = RetVT;
4275 SrcBits = SrcVT.getSizeInBits();
4276 IsZExt = true;
4277 }
4278
4279 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4280 unsigned ImmS = SrcBits - 1;
4281 static const unsigned OpcTable[2][2] = {
4282 {AArch64::SBFMWri, AArch64::SBFMXri},
4283 {AArch64::UBFMWri, AArch64::UBFMXri}
4284 };
4285 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4286 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4287 Register TmpReg = MRI.createVirtualRegister(RC);
4288 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4289 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4290 .addReg(Op0)
4291 .addImm(AArch64::sub_32);
4292 Op0 = TmpReg;
4293 }
4294 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4295}
4296
4297Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4298 Register Op1Reg) {
4299 unsigned Opc = 0;
4300 bool NeedTrunc = false;
4301 uint64_t Mask = 0;
4302 switch (RetVT.SimpleTy) {
4303 default:
4304 return Register();
4305 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4306 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4307 case MVT::i32: Opc = AArch64::ASRVWr; break;
4308 case MVT::i64: Opc = AArch64::ASRVXr; break;
4309 }
4310
4311 const TargetRegisterClass *RC =
4312 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4313 if (NeedTrunc) {
4314 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4315 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4316 }
4317 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4318 if (NeedTrunc)
4319 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4320 return ResultReg;
4321}
4322
4323Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4324 uint64_t Shift, bool IsZExt) {
4325 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4326 "Unexpected source/return type pair.");
4327 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4328 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4329 "Unexpected source value type.");
4330 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4331 RetVT == MVT::i64) && "Unexpected return value type.");
4332
4333 bool Is64Bit = (RetVT == MVT::i64);
4334 unsigned RegSize = Is64Bit ? 64 : 32;
4335 unsigned DstBits = RetVT.getSizeInBits();
4336 unsigned SrcBits = SrcVT.getSizeInBits();
4337 const TargetRegisterClass *RC =
4338 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4339
4340 // Just emit a copy for "zero" shifts.
4341 if (Shift == 0) {
4342 if (RetVT == SrcVT) {
4343 Register ResultReg = createResultReg(RC);
4344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4345 TII.get(TargetOpcode::COPY), ResultReg)
4346 .addReg(Op0);
4347 return ResultReg;
4348 } else
4349 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4350 }
4351
4352 // Don't deal with undefined shifts.
4353 if (Shift >= DstBits)
4354 return Register();
4355
4356 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4357 // {S|U}BFM Wd, Wn, #r, #s
4358 // Wd<s-r:0> = Wn<s:r> when r <= s
4359
4360 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4361 // %2 = ashr i16 %1, 4
4362 // Wd<7-4:0> = Wn<7:4>
4363 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4364 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4366
4367 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4368 // %2 = ashr i16 %1, 8
4369 // Wd<7-7,0> = Wn<7:7>
4370 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4373
4374 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4375 // %2 = ashr i16 %1, 12
4376 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4377 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4378 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4379 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4380
4381 if (Shift >= SrcBits && IsZExt)
4382 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4383
4384 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4385 unsigned ImmS = SrcBits - 1;
4386 static const unsigned OpcTable[2][2] = {
4387 {AArch64::SBFMWri, AArch64::SBFMXri},
4388 {AArch64::UBFMWri, AArch64::UBFMXri}
4389 };
4390 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4391 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4392 Register TmpReg = MRI.createVirtualRegister(RC);
4393 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4394 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4395 .addReg(Op0)
4396 .addImm(AArch64::sub_32);
4397 Op0 = TmpReg;
4398 }
4399 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4400}
4401
4402Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4403 bool IsZExt) {
4404 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4405
4406 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4407 // DestVT are odd things, so test to make sure that they are both types we can
4408 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4409 // bail out to SelectionDAG.
4410 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4411 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4412 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4413 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4414 return Register();
4415
4416 unsigned Opc;
4417 unsigned Imm = 0;
4418
4419 switch (SrcVT.SimpleTy) {
4420 default:
4421 return Register();
4422 case MVT::i1:
4423 return emiti1Ext(SrcReg, DestVT, IsZExt);
4424 case MVT::i8:
4425 if (DestVT == MVT::i64)
4426 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4427 else
4428 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4429 Imm = 7;
4430 break;
4431 case MVT::i16:
4432 if (DestVT == MVT::i64)
4433 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4434 else
4435 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4436 Imm = 15;
4437 break;
4438 case MVT::i32:
4439 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4440 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4441 Imm = 31;
4442 break;
4443 }
4444
4445 // Handle i8 and i16 as i32.
4446 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4447 DestVT = MVT::i32;
4448 else if (DestVT == MVT::i64) {
4449 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4451 TII.get(AArch64::SUBREG_TO_REG), Src64)
4452 .addReg(SrcReg)
4453 .addImm(AArch64::sub_32);
4454 SrcReg = Src64;
4455 }
4456
4457 const TargetRegisterClass *RC =
4458 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4459 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4460}
4461
4462bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4463 MVT SrcVT) {
4464 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4465 if (!LI || !LI->hasOneUse())
4466 return false;
4467
4468 // Check if the load instruction has already been selected.
4469 Register Reg = lookUpRegForValue(LI);
4470 if (!Reg)
4471 return false;
4472
4473 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4474 if (!MI)
4475 return false;
4476
4477 // Check if the correct load instruction has been emitted - SelectionDAG might
4478 // have emitted a zero-extending load, but we need a sign-extending load.
4479 bool IsZExt = isa<ZExtInst>(I);
4480 const auto *LoadMI = MI;
4481 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4482 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4483 Register LoadReg = MI->getOperand(1).getReg();
4484 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4485 assert(LoadMI && "Expected valid instruction");
4486 }
4487 if (!(IsZExt && AArch64InstrInfo::isZExtLoad(*LoadMI)) &&
4488 !(!IsZExt && AArch64InstrInfo::isSExtLoad(*LoadMI)))
4489 return false;
4490
4491 // Nothing to be done.
4492 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4493 updateValueMap(I, Reg);
4494 return true;
4495 }
4496
4497 if (IsZExt) {
4498 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4500 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4501 .addReg(Reg, getKillRegState(true))
4502 .addImm(AArch64::sub_32);
4503 Reg = Reg64;
4504 } else {
4505 assert((MI->getOpcode() == TargetOpcode::COPY &&
4506 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4507 "Expected copy instruction");
4508 Reg = MI->getOperand(1).getReg();
4510 removeDeadCode(I, std::next(I));
4511 }
4512 updateValueMap(I, Reg);
4513 return true;
4514}
4515
4516bool AArch64FastISel::selectIntExt(const Instruction *I) {
4518 "Unexpected integer extend instruction.");
4519 MVT RetVT;
4520 MVT SrcVT;
4521 if (!isTypeSupported(I->getType(), RetVT))
4522 return false;
4523
4524 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4525 return false;
4526
4527 // Try to optimize already sign-/zero-extended values from load instructions.
4528 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4529 return true;
4530
4531 Register SrcReg = getRegForValue(I->getOperand(0));
4532 if (!SrcReg)
4533 return false;
4534
4535 // Try to optimize already sign-/zero-extended values from function arguments.
4536 bool IsZExt = isa<ZExtInst>(I);
4537 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4538 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4539 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4540 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4542 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4543 .addReg(SrcReg)
4544 .addImm(AArch64::sub_32);
4545 SrcReg = ResultReg;
4546 }
4547
4548 updateValueMap(I, SrcReg);
4549 return true;
4550 }
4551 }
4552
4553 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4554 if (!ResultReg)
4555 return false;
4556
4557 updateValueMap(I, ResultReg);
4558 return true;
4559}
4560
4561bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4562 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4563 if (!DestEVT.isSimple())
4564 return false;
4565
4566 MVT DestVT = DestEVT.getSimpleVT();
4567 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4568 return false;
4569
4570 unsigned DivOpc;
4571 bool Is64bit = (DestVT == MVT::i64);
4572 switch (ISDOpcode) {
4573 default:
4574 return false;
4575 case ISD::SREM:
4576 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4577 break;
4578 case ISD::UREM:
4579 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4580 break;
4581 }
4582 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4583 Register Src0Reg = getRegForValue(I->getOperand(0));
4584 if (!Src0Reg)
4585 return false;
4586
4587 Register Src1Reg = getRegForValue(I->getOperand(1));
4588 if (!Src1Reg)
4589 return false;
4590
4591 const TargetRegisterClass *RC =
4592 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4593 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4594 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4595 // The remainder is computed as numerator - (quotient * denominator) using the
4596 // MSUB instruction.
4597 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4598 updateValueMap(I, ResultReg);
4599 return true;
4600}
4601
4602bool AArch64FastISel::selectMul(const Instruction *I) {
4603 MVT VT;
4604 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4605 return false;
4606
4607 if (VT.isVector())
4608 return selectBinaryOp(I, ISD::MUL);
4609
4610 const Value *Src0 = I->getOperand(0);
4611 const Value *Src1 = I->getOperand(1);
4612 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4613 if (C->getValue().isPowerOf2())
4614 std::swap(Src0, Src1);
4615
4616 // Try to simplify to a shift instruction.
4617 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4618 if (C->getValue().isPowerOf2()) {
4619 uint64_t ShiftVal = C->getValue().logBase2();
4620 MVT SrcVT = VT;
4621 bool IsZExt = true;
4622 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4623 if (!isIntExtFree(ZExt)) {
4624 MVT VT;
4625 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4626 SrcVT = VT;
4627 IsZExt = true;
4628 Src0 = ZExt->getOperand(0);
4629 }
4630 }
4631 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4632 if (!isIntExtFree(SExt)) {
4633 MVT VT;
4634 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4635 SrcVT = VT;
4636 IsZExt = false;
4637 Src0 = SExt->getOperand(0);
4638 }
4639 }
4640 }
4641
4642 Register Src0Reg = getRegForValue(Src0);
4643 if (!Src0Reg)
4644 return false;
4645
4646 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4647
4648 if (ResultReg) {
4649 updateValueMap(I, ResultReg);
4650 return true;
4651 }
4652 }
4653
4654 Register Src0Reg = getRegForValue(I->getOperand(0));
4655 if (!Src0Reg)
4656 return false;
4657
4658 Register Src1Reg = getRegForValue(I->getOperand(1));
4659 if (!Src1Reg)
4660 return false;
4661
4662 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4663
4664 if (!ResultReg)
4665 return false;
4666
4667 updateValueMap(I, ResultReg);
4668 return true;
4669}
4670
4671bool AArch64FastISel::selectShift(const Instruction *I) {
4672 MVT RetVT;
4673 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4674 return false;
4675
4676 if (RetVT.isVector())
4677 return selectOperator(I, I->getOpcode());
4678
4679 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4680 Register ResultReg;
4681 uint64_t ShiftVal = C->getZExtValue();
4682 MVT SrcVT = RetVT;
4683 bool IsZExt = I->getOpcode() != Instruction::AShr;
4684 const Value *Op0 = I->getOperand(0);
4685 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4686 if (!isIntExtFree(ZExt)) {
4687 MVT TmpVT;
4688 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4689 SrcVT = TmpVT;
4690 IsZExt = true;
4691 Op0 = ZExt->getOperand(0);
4692 }
4693 }
4694 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4695 if (!isIntExtFree(SExt)) {
4696 MVT TmpVT;
4697 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4698 SrcVT = TmpVT;
4699 IsZExt = false;
4700 Op0 = SExt->getOperand(0);
4701 }
4702 }
4703 }
4704
4705 Register Op0Reg = getRegForValue(Op0);
4706 if (!Op0Reg)
4707 return false;
4708
4709 switch (I->getOpcode()) {
4710 default: llvm_unreachable("Unexpected instruction.");
4711 case Instruction::Shl:
4712 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4713 break;
4714 case Instruction::AShr:
4715 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4716 break;
4717 case Instruction::LShr:
4718 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4719 break;
4720 }
4721 if (!ResultReg)
4722 return false;
4723
4724 updateValueMap(I, ResultReg);
4725 return true;
4726 }
4727
4728 Register Op0Reg = getRegForValue(I->getOperand(0));
4729 if (!Op0Reg)
4730 return false;
4731
4732 Register Op1Reg = getRegForValue(I->getOperand(1));
4733 if (!Op1Reg)
4734 return false;
4735
4736 Register ResultReg;
4737 switch (I->getOpcode()) {
4738 default: llvm_unreachable("Unexpected instruction.");
4739 case Instruction::Shl:
4740 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4741 break;
4742 case Instruction::AShr:
4743 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4744 break;
4745 case Instruction::LShr:
4746 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4747 break;
4748 }
4749
4750 if (!ResultReg)
4751 return false;
4752
4753 updateValueMap(I, ResultReg);
4754 return true;
4755}
4756
4757bool AArch64FastISel::selectBitCast(const Instruction *I) {
4758 MVT RetVT, SrcVT;
4759
4760 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4761 return false;
4762 if (!isTypeLegal(I->getType(), RetVT))
4763 return false;
4764
4765 unsigned Opc;
4766 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4767 Opc = AArch64::FMOVWSr;
4768 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4769 Opc = AArch64::FMOVXDr;
4770 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4771 Opc = AArch64::FMOVSWr;
4772 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4773 Opc = AArch64::FMOVDXr;
4774 else
4775 return false;
4776
4777 const TargetRegisterClass *RC = nullptr;
4778 switch (RetVT.SimpleTy) {
4779 default: llvm_unreachable("Unexpected value type.");
4780 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4781 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4782 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4783 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4784 }
4785 Register Op0Reg = getRegForValue(I->getOperand(0));
4786 if (!Op0Reg)
4787 return false;
4788
4789 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4790 if (!ResultReg)
4791 return false;
4792
4793 updateValueMap(I, ResultReg);
4794 return true;
4795}
4796
4797bool AArch64FastISel::selectFRem(const Instruction *I) {
4798 MVT RetVT;
4799 if (!isTypeLegal(I->getType(), RetVT))
4800 return false;
4801
4802 RTLIB::LibcallImpl LCImpl =
4803 LibcallLowering->getLibcallImpl(RTLIB::getREM(RetVT));
4804 if (LCImpl == RTLIB::Unsupported)
4805 return false;
4806
4807 ArgListTy Args;
4808 Args.reserve(I->getNumOperands());
4809
4810 // Populate the argument list.
4811 for (auto &Arg : I->operands())
4812 Args.emplace_back(Arg);
4813
4814 CallLoweringInfo CLI;
4815 MCContext &Ctx = MF->getContext();
4816 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
4817 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
4818
4819 CLI.setCallee(DL, Ctx, CC, I->getType(), FuncName, std::move(Args));
4820 if (!lowerCallTo(CLI))
4821 return false;
4822 updateValueMap(I, CLI.ResultReg);
4823 return true;
4824}
4825
4826bool AArch64FastISel::selectSDiv(const Instruction *I) {
4827 MVT VT;
4828 if (!isTypeLegal(I->getType(), VT))
4829 return false;
4830
4831 if (!isa<ConstantInt>(I->getOperand(1)))
4832 return selectBinaryOp(I, ISD::SDIV);
4833
4834 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4835 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4836 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4837 return selectBinaryOp(I, ISD::SDIV);
4838
4839 unsigned Lg2 = C.countr_zero();
4840 Register Src0Reg = getRegForValue(I->getOperand(0));
4841 if (!Src0Reg)
4842 return false;
4843
4844 if (cast<BinaryOperator>(I)->isExact()) {
4845 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4846 if (!ResultReg)
4847 return false;
4848 updateValueMap(I, ResultReg);
4849 return true;
4850 }
4851
4852 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4853 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4854 if (!AddReg)
4855 return false;
4856
4857 // (Src0 < 0) ? Pow2 - 1 : 0;
4858 if (!emitICmp_ri(VT, Src0Reg, 0))
4859 return false;
4860
4861 unsigned SelectOpc;
4862 const TargetRegisterClass *RC;
4863 if (VT == MVT::i64) {
4864 SelectOpc = AArch64::CSELXr;
4865 RC = &AArch64::GPR64RegClass;
4866 } else {
4867 SelectOpc = AArch64::CSELWr;
4868 RC = &AArch64::GPR32RegClass;
4869 }
4870 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4872 if (!SelectReg)
4873 return false;
4874
4875 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4876 // negate the result.
4877 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4878 Register ResultReg;
4879 if (C.isNegative())
4880 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4881 AArch64_AM::ASR, Lg2);
4882 else
4883 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4884
4885 if (!ResultReg)
4886 return false;
4887
4888 updateValueMap(I, ResultReg);
4889 return true;
4890}
4891
4892/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4893/// have to duplicate it for AArch64, because otherwise we would fail during the
4894/// sign-extend emission.
4895Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4896 Register IdxN = getRegForValue(Idx);
4897 if (!IdxN)
4898 // Unhandled operand. Halt "fast" selection and bail.
4899 return Register();
4900
4901 // If the index is smaller or larger than intptr_t, truncate or extend it.
4902 MVT PtrVT = TLI.getPointerTy(DL);
4903 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4904 if (IdxVT.bitsLT(PtrVT)) {
4905 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4906 } else if (IdxVT.bitsGT(PtrVT))
4907 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4908 return IdxN;
4909}
4910
4911/// This is mostly a copy of the existing FastISel GEP code, but we have to
4912/// duplicate it for AArch64, because otherwise we would bail out even for
4913/// simple cases. This is because the standard fastEmit functions don't cover
4914/// MUL at all and ADD is lowered very inefficientily.
4915bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4916 if (Subtarget->isTargetILP32())
4917 return false;
4918
4919 Register N = getRegForValue(I->getOperand(0));
4920 if (!N)
4921 return false;
4922
4923 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4924 // into a single N = N + TotalOffset.
4925 uint64_t TotalOffs = 0;
4926 MVT VT = TLI.getPointerTy(DL);
4928 GTI != E; ++GTI) {
4929 const Value *Idx = GTI.getOperand();
4930 if (auto *StTy = GTI.getStructTypeOrNull()) {
4931 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4932 // N = N + Offset
4933 if (Field)
4934 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4935 } else {
4936 // If this is a constant subscript, handle it quickly.
4937 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4938 if (CI->isZero())
4939 continue;
4940 // N = N + Offset
4941 TotalOffs += GTI.getSequentialElementStride(DL) *
4942 cast<ConstantInt>(CI)->getSExtValue();
4943 continue;
4944 }
4945 if (TotalOffs) {
4946 N = emitAdd_ri_(VT, N, TotalOffs);
4947 if (!N)
4948 return false;
4949 TotalOffs = 0;
4950 }
4951
4952 // N = N + Idx * ElementSize;
4953 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
4954 Register IdxN = getRegForGEPIndex(Idx);
4955 if (!IdxN)
4956 return false;
4957
4958 if (ElementSize != 1) {
4959 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4960 if (!C)
4961 return false;
4962 IdxN = emitMul_rr(VT, IdxN, C);
4963 if (!IdxN)
4964 return false;
4965 }
4966 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4967 if (!N)
4968 return false;
4969 }
4970 }
4971 if (TotalOffs) {
4972 N = emitAdd_ri_(VT, N, TotalOffs);
4973 if (!N)
4974 return false;
4975 }
4976 updateValueMap(I, N);
4977 return true;
4978}
4979
4980bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4981 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
4982 "cmpxchg survived AtomicExpand at optlevel > -O0");
4983
4984 auto *RetPairTy = cast<StructType>(I->getType());
4985 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4986 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4987 "cmpxchg has a non-i1 status result");
4988
4989 MVT VT;
4990 if (!isTypeLegal(RetTy, VT))
4991 return false;
4992
4993 const TargetRegisterClass *ResRC;
4994 unsigned Opc, CmpOpc;
4995 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4996 // extractvalue selection doesn't support that.
4997 if (VT == MVT::i32) {
4998 Opc = AArch64::CMP_SWAP_32;
4999 CmpOpc = AArch64::SUBSWrs;
5000 ResRC = &AArch64::GPR32RegClass;
5001 } else if (VT == MVT::i64) {
5002 Opc = AArch64::CMP_SWAP_64;
5003 CmpOpc = AArch64::SUBSXrs;
5004 ResRC = &AArch64::GPR64RegClass;
5005 } else {
5006 return false;
5007 }
5008
5009 const MCInstrDesc &II = TII.get(Opc);
5010
5011 Register AddrReg = getRegForValue(I->getPointerOperand());
5012 Register DesiredReg = getRegForValue(I->getCompareOperand());
5013 Register NewReg = getRegForValue(I->getNewValOperand());
5014
5015 if (!AddrReg || !DesiredReg || !NewReg)
5016 return false;
5017
5018 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5019 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5020 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5021
5022 const Register ResultReg1 = createResultReg(ResRC);
5023 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5024 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5025
5026 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5028 .addDef(ResultReg1)
5029 .addDef(ScratchReg)
5030 .addUse(AddrReg)
5031 .addUse(DesiredReg)
5032 .addUse(NewReg);
5033
5034 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5035 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5036 .addUse(ResultReg1)
5037 .addUse(DesiredReg)
5038 .addImm(0);
5039
5040 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5041 .addDef(ResultReg2)
5042 .addUse(AArch64::WZR)
5043 .addUse(AArch64::WZR)
5045
5046 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5047 updateValueMap(I, ResultReg1, 2);
5048 return true;
5049}
5050
5051bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5052 if (TLI.fallBackToDAGISel(*I))
5053 return false;
5054 switch (I->getOpcode()) {
5055 default:
5056 break;
5057 case Instruction::Add:
5058 case Instruction::Sub:
5059 return selectAddSub(I);
5060 case Instruction::Mul:
5061 return selectMul(I);
5062 case Instruction::SDiv:
5063 return selectSDiv(I);
5064 case Instruction::SRem:
5065 if (!selectBinaryOp(I, ISD::SREM))
5066 return selectRem(I, ISD::SREM);
5067 return true;
5068 case Instruction::URem:
5069 if (!selectBinaryOp(I, ISD::UREM))
5070 return selectRem(I, ISD::UREM);
5071 return true;
5072 case Instruction::Shl:
5073 case Instruction::LShr:
5074 case Instruction::AShr:
5075 return selectShift(I);
5076 case Instruction::And:
5077 case Instruction::Or:
5078 case Instruction::Xor:
5079 return selectLogicalOp(I);
5080 case Instruction::CondBr:
5081 return selectBranch(I);
5082 case Instruction::IndirectBr:
5083 return selectIndirectBr(I);
5084 case Instruction::BitCast:
5086 return selectBitCast(I);
5087 return true;
5088 case Instruction::FPToSI:
5089 if (!selectCast(I, ISD::FP_TO_SINT))
5090 return selectFPToInt(I, /*Signed=*/true);
5091 return true;
5092 case Instruction::FPToUI:
5093 return selectFPToInt(I, /*Signed=*/false);
5094 case Instruction::ZExt:
5095 case Instruction::SExt:
5096 return selectIntExt(I);
5097 case Instruction::Trunc:
5098 if (!selectCast(I, ISD::TRUNCATE))
5099 return selectTrunc(I);
5100 return true;
5101 case Instruction::FPExt:
5102 return selectFPExt(I);
5103 case Instruction::FPTrunc:
5104 return selectFPTrunc(I);
5105 case Instruction::SIToFP:
5106 if (!selectCast(I, ISD::SINT_TO_FP))
5107 return selectIntToFP(I, /*Signed=*/true);
5108 return true;
5109 case Instruction::UIToFP:
5110 return selectIntToFP(I, /*Signed=*/false);
5111 case Instruction::Load:
5112 return selectLoad(I);
5113 case Instruction::Store:
5114 return selectStore(I);
5115 case Instruction::FCmp:
5116 case Instruction::ICmp:
5117 return selectCmp(I);
5118 case Instruction::Select:
5119 return selectSelect(I);
5120 case Instruction::Ret:
5121 return selectRet(I);
5122 case Instruction::FRem:
5123 return selectFRem(I);
5124 case Instruction::GetElementPtr:
5125 return selectGetElementPtr(I);
5126 case Instruction::AtomicCmpXchg:
5127 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5128 }
5129
5130 // fall-back to target-independent instruction selection.
5131 return selectOperator(I, I->getOpcode());
5132}
5133
5135 const TargetLibraryInfo *LibInfo,
5136 const LibcallLoweringInfo *LibcallLowering) {
5137
5138 SMEAttrs CallerAttrs =
5139 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5140 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5141 CallerAttrs.hasStreamingInterfaceOrBody() ||
5142 CallerAttrs.hasStreamingCompatibleInterface() ||
5143 CallerAttrs.hasAgnosticZAInterface())
5144 return nullptr;
5145 return new AArch64FastISel(FuncInfo, LibInfo, LibcallLowering);
5146}
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static bool isZExtLoad(const MachineInstr &MI)
Returns whether the instruction is a zero-extending load.
static bool isSExtLoad(const MachineInstr &MI)
Returns whether the instruction is a sign-extending load.
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
const APFloat & getValueAPF() const
Definition Constants.h:463
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:473
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:467
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_iterator > successors()
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Tracks which library functions to use for a particular subtarget.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:100
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:774
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:281
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:278
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
LLVM_ABI Libcall getREM(EVT VT)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:57
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.