LLVM 18.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/User.h"
61#include "llvm/IR/Value.h"
62#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338 if (CC == CallingConv::GHC)
339 return CC_AArch64_GHC;
342 if (Subtarget->isTargetDarwin())
344 if (Subtarget->isTargetWindows())
345 return CC_AArch64_Win64PCS;
346 return CC_AArch64_AAPCS;
347}
348
349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351 "Alloca should always return a pointer.");
352
353 // Don't handle dynamic allocas.
354 if (!FuncInfo.StaticAllocaMap.count(AI))
355 return 0;
356
358 FuncInfo.StaticAllocaMap.find(AI);
359
360 if (SI != FuncInfo.StaticAllocaMap.end()) {
361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
363 ResultReg)
364 .addFrameIndex(SI->second)
365 .addImm(0)
366 .addImm(0);
367 return ResultReg;
368 }
369
370 return 0;
371}
372
373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374 if (VT > MVT::i64)
375 return 0;
376
377 if (!CI->isZero())
378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
379
380 // Create a copy from the zero register to materialize a "0" value.
381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382 : &AArch64::GPR32RegClass;
383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384 Register ResultReg = createResultReg(RC);
385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
386 ResultReg).addReg(ZeroReg, getKillRegState(true));
387 return ResultReg;
388}
389
390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391 // Positive zero (+0.0) has to be materialized with a fmov from the zero
392 // register, because the immediate version of fmov cannot encode zero.
393 if (CFP->isNullValue())
394 return fastMaterializeFloatZero(CFP);
395
396 if (VT != MVT::f32 && VT != MVT::f64)
397 return 0;
398
399 const APFloat Val = CFP->getValueAPF();
400 bool Is64Bit = (VT == MVT::f64);
401 // This checks to see if we can use FMOV instructions to materialize
402 // a constant, otherwise we have to materialize via the constant pool.
403 int Imm =
404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
405 if (Imm != -1) {
406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
408 }
409
410 // For the large code model materialize the FP constant in code.
411 if (TM.getCodeModel() == CodeModel::Large) {
412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413 const TargetRegisterClass *RC = Is64Bit ?
414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416 Register TmpReg = createResultReg(RC);
417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
422 TII.get(TargetOpcode::COPY), ResultReg)
423 .addReg(TmpReg, getKillRegState(true));
424
425 return ResultReg;
426 }
427
428 // Materialize via constant pool. MachineConstantPool wants an explicit
429 // alignment.
430 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
431
432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
440 .addReg(ADRPReg)
442 return ResultReg;
443}
444
445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446 // We can't handle thread-local variables quickly yet.
447 if (GV->isThreadLocal())
448 return 0;
449
450 // MachO still uses GOT for large code-model accesses, but ELF requires
451 // movz/movk sequences, which FastISel doesn't handle yet.
452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 return 0;
454
455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456
457 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
458 if (!DestEVT.isSimple())
459 return 0;
460
461 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
462 unsigned ResultReg;
463
464 if (OpFlags & AArch64II::MO_GOT) {
465 // ADRP + LDRX
466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
467 ADRPReg)
468 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
469
470 unsigned LdrOpc;
471 if (Subtarget->isTargetILP32()) {
472 ResultReg = createResultReg(&AArch64::GPR32RegClass);
473 LdrOpc = AArch64::LDRWui;
474 } else {
475 ResultReg = createResultReg(&AArch64::GPR64RegClass);
476 LdrOpc = AArch64::LDRXui;
477 }
478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
479 ResultReg)
480 .addReg(ADRPReg)
482 AArch64II::MO_NC | OpFlags);
483 if (!Subtarget->isTargetILP32())
484 return ResultReg;
485
486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487 // so we must extend the result on ILP32.
488 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
490 TII.get(TargetOpcode::SUBREG_TO_REG))
491 .addDef(Result64)
492 .addImm(0)
493 .addReg(ResultReg, RegState::Kill)
494 .addImm(AArch64::sub_32);
495 return Result64;
496 } else {
497 // ADRP + ADDX
498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
499 ADRPReg)
500 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
501
502 if (OpFlags & AArch64II::MO_TAGGED) {
503 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
504 // We do so by creating a MOVK that sets bits 48-63 of the register to
505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506 // the small code model so we can assume a binary size of <= 4GB, which
507 // makes the untagged PC relative offset positive. The binary must also be
508 // loaded into address range [0, 2^48). Both of these properties need to
509 // be ensured at runtime when using tagged addresses.
510 //
511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513 // are not exactly 1:1 with FastISel so we cannot easily abstract this
514 // out. At some point, it would be nice to find a way to not have this
515 // duplciate code.
516 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
518 DstReg)
519 .addReg(ADRPReg)
520 .addGlobalAddress(GV, /*Offset=*/0x100000000,
522 .addImm(48);
523 ADRPReg = DstReg;
524 }
525
526 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
528 ResultReg)
529 .addReg(ADRPReg)
530 .addGlobalAddress(GV, 0,
532 .addImm(0);
533 }
534 return ResultReg;
535}
536
537unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
539
540 // Only handle simple types.
541 if (!CEVT.isSimple())
542 return 0;
543 MVT VT = CEVT.getSimpleVT();
544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545 // 'null' pointers need to have a somewhat special treatment.
546 if (isa<ConstantPointerNull>(C)) {
547 assert(VT == MVT::i64 && "Expected 64-bit pointers");
548 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
549 }
550
551 if (const auto *CI = dyn_cast<ConstantInt>(C))
552 return materializeInt(CI, VT);
553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
554 return materializeFP(CFP, VT);
555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
556 return materializeGV(GV);
557
558 return 0;
559}
560
561unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562 assert(CFP->isNullValue() &&
563 "Floating-point constant is not a positive zero.");
564 MVT VT;
565 if (!isTypeLegal(CFP->getType(), VT))
566 return 0;
567
568 if (VT != MVT::f32 && VT != MVT::f64)
569 return 0;
570
571 bool Is64Bit = (VT == MVT::f64);
572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
575}
576
577/// Check if the multiply is by a power-of-2 constant.
578static bool isMulPowOf2(const Value *I) {
579 if (const auto *MI = dyn_cast<MulOperator>(I)) {
580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
581 if (C->getValue().isPowerOf2())
582 return true;
583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
584 if (C->getValue().isPowerOf2())
585 return true;
586 }
587 return false;
588}
589
590// Computes the address to get to an object.
591bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
592{
593 const User *U = nullptr;
594 unsigned Opcode = Instruction::UserOp1;
595 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
596 // Don't walk into other basic blocks unless the object is an alloca from
597 // another block, otherwise it may not have a virtual register assigned.
598 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
600 Opcode = I->getOpcode();
601 U = I;
602 }
603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
604 Opcode = C->getOpcode();
605 U = C;
606 }
607
608 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
609 if (Ty->getAddressSpace() > 255)
610 // Fast instruction selection doesn't support the special
611 // address spaces.
612 return false;
613
614 switch (Opcode) {
615 default:
616 break;
617 case Instruction::BitCast:
618 // Look through bitcasts.
619 return computeAddress(U->getOperand(0), Addr, Ty);
620
621 case Instruction::IntToPtr:
622 // Look past no-op inttoptrs.
623 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
624 TLI.getPointerTy(DL))
625 return computeAddress(U->getOperand(0), Addr, Ty);
626 break;
627
628 case Instruction::PtrToInt:
629 // Look past no-op ptrtoints.
630 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
631 return computeAddress(U->getOperand(0), Addr, Ty);
632 break;
633
634 case Instruction::GetElementPtr: {
635 Address SavedAddr = Addr;
636 uint64_t TmpOffset = Addr.getOffset();
637
638 // Iterate through the GEP folding the constants into offsets where
639 // we can.
641 GTI != E; ++GTI) {
642 const Value *Op = GTI.getOperand();
643 if (StructType *STy = GTI.getStructTypeOrNull()) {
644 const StructLayout *SL = DL.getStructLayout(STy);
645 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
646 TmpOffset += SL->getElementOffset(Idx);
647 } else {
648 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
649 while (true) {
650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
651 // Constant-offset addressing.
652 TmpOffset += CI->getSExtValue() * S;
653 break;
654 }
655 if (canFoldAddIntoGEP(U, Op)) {
656 // A compatible add with a constant operand. Fold the constant.
657 ConstantInt *CI =
658 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
659 TmpOffset += CI->getSExtValue() * S;
660 // Iterate on the other operand.
661 Op = cast<AddOperator>(Op)->getOperand(0);
662 continue;
663 }
664 // Unsupported
665 goto unsupported_gep;
666 }
667 }
668 }
669
670 // Try to grab the base operand now.
671 Addr.setOffset(TmpOffset);
672 if (computeAddress(U->getOperand(0), Addr, Ty))
673 return true;
674
675 // We failed, restore everything and try the other options.
676 Addr = SavedAddr;
677
678 unsupported_gep:
679 break;
680 }
681 case Instruction::Alloca: {
682 const AllocaInst *AI = cast<AllocaInst>(Obj);
684 FuncInfo.StaticAllocaMap.find(AI);
685 if (SI != FuncInfo.StaticAllocaMap.end()) {
686 Addr.setKind(Address::FrameIndexBase);
687 Addr.setFI(SI->second);
688 return true;
689 }
690 break;
691 }
692 case Instruction::Add: {
693 // Adds of constants are common and easy enough.
694 const Value *LHS = U->getOperand(0);
695 const Value *RHS = U->getOperand(1);
696
697 if (isa<ConstantInt>(LHS))
698 std::swap(LHS, RHS);
699
700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702 return computeAddress(LHS, Addr, Ty);
703 }
704
705 Address Backup = Addr;
706 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
707 return true;
708 Addr = Backup;
709
710 break;
711 }
712 case Instruction::Sub: {
713 // Subs of constants are common and easy enough.
714 const Value *LHS = U->getOperand(0);
715 const Value *RHS = U->getOperand(1);
716
717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719 return computeAddress(LHS, Addr, Ty);
720 }
721 break;
722 }
723 case Instruction::Shl: {
724 if (Addr.getOffsetReg())
725 break;
726
727 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
728 if (!CI)
729 break;
730
731 unsigned Val = CI->getZExtValue();
732 if (Val < 1 || Val > 3)
733 break;
734
735 uint64_t NumBytes = 0;
736 if (Ty && Ty->isSized()) {
737 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738 NumBytes = NumBits / 8;
739 if (!isPowerOf2_64(NumBits))
740 NumBytes = 0;
741 }
742
743 if (NumBytes != (1ULL << Val))
744 break;
745
746 Addr.setShift(Val);
747 Addr.setExtendType(AArch64_AM::LSL);
748
749 const Value *Src = U->getOperand(0);
750 if (const auto *I = dyn_cast<Instruction>(Src)) {
751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
752 // Fold the zext or sext when it won't become a noop.
753 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
754 if (!isIntExtFree(ZE) &&
755 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
756 Addr.setExtendType(AArch64_AM::UXTW);
757 Src = ZE->getOperand(0);
758 }
759 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
760 if (!isIntExtFree(SE) &&
761 SE->getOperand(0)->getType()->isIntegerTy(32)) {
762 Addr.setExtendType(AArch64_AM::SXTW);
763 Src = SE->getOperand(0);
764 }
765 }
766 }
767 }
768
769 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
770 if (AI->getOpcode() == Instruction::And) {
771 const Value *LHS = AI->getOperand(0);
772 const Value *RHS = AI->getOperand(1);
773
774 if (const auto *C = dyn_cast<ConstantInt>(LHS))
775 if (C->getValue() == 0xffffffff)
776 std::swap(LHS, RHS);
777
778 if (const auto *C = dyn_cast<ConstantInt>(RHS))
779 if (C->getValue() == 0xffffffff) {
780 Addr.setExtendType(AArch64_AM::UXTW);
781 Register Reg = getRegForValue(LHS);
782 if (!Reg)
783 return false;
784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785 Addr.setOffsetReg(Reg);
786 return true;
787 }
788 }
789
790 Register Reg = getRegForValue(Src);
791 if (!Reg)
792 return false;
793 Addr.setOffsetReg(Reg);
794 return true;
795 }
796 case Instruction::Mul: {
797 if (Addr.getOffsetReg())
798 break;
799
800 if (!isMulPowOf2(U))
801 break;
802
803 const Value *LHS = U->getOperand(0);
804 const Value *RHS = U->getOperand(1);
805
806 // Canonicalize power-of-2 value to the RHS.
807 if (const auto *C = dyn_cast<ConstantInt>(LHS))
808 if (C->getValue().isPowerOf2())
809 std::swap(LHS, RHS);
810
811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812 const auto *C = cast<ConstantInt>(RHS);
813 unsigned Val = C->getValue().logBase2();
814 if (Val < 1 || Val > 3)
815 break;
816
817 uint64_t NumBytes = 0;
818 if (Ty && Ty->isSized()) {
819 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820 NumBytes = NumBits / 8;
821 if (!isPowerOf2_64(NumBits))
822 NumBytes = 0;
823 }
824
825 if (NumBytes != (1ULL << Val))
826 break;
827
828 Addr.setShift(Val);
829 Addr.setExtendType(AArch64_AM::LSL);
830
831 const Value *Src = LHS;
832 if (const auto *I = dyn_cast<Instruction>(Src)) {
833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834 // Fold the zext or sext when it won't become a noop.
835 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
836 if (!isIntExtFree(ZE) &&
837 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
838 Addr.setExtendType(AArch64_AM::UXTW);
839 Src = ZE->getOperand(0);
840 }
841 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
842 if (!isIntExtFree(SE) &&
843 SE->getOperand(0)->getType()->isIntegerTy(32)) {
844 Addr.setExtendType(AArch64_AM::SXTW);
845 Src = SE->getOperand(0);
846 }
847 }
848 }
849 }
850
851 Register Reg = getRegForValue(Src);
852 if (!Reg)
853 return false;
854 Addr.setOffsetReg(Reg);
855 return true;
856 }
857 case Instruction::And: {
858 if (Addr.getOffsetReg())
859 break;
860
861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
862 break;
863
864 const Value *LHS = U->getOperand(0);
865 const Value *RHS = U->getOperand(1);
866
867 if (const auto *C = dyn_cast<ConstantInt>(LHS))
868 if (C->getValue() == 0xffffffff)
869 std::swap(LHS, RHS);
870
871 if (const auto *C = dyn_cast<ConstantInt>(RHS))
872 if (C->getValue() == 0xffffffff) {
873 Addr.setShift(0);
874 Addr.setExtendType(AArch64_AM::LSL);
875 Addr.setExtendType(AArch64_AM::UXTW);
876
877 Register Reg = getRegForValue(LHS);
878 if (!Reg)
879 return false;
880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881 Addr.setOffsetReg(Reg);
882 return true;
883 }
884 break;
885 }
886 case Instruction::SExt:
887 case Instruction::ZExt: {
888 if (!Addr.getReg() || Addr.getOffsetReg())
889 break;
890
891 const Value *Src = nullptr;
892 // Fold the zext or sext when it won't become a noop.
893 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
894 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
895 Addr.setExtendType(AArch64_AM::UXTW);
896 Src = ZE->getOperand(0);
897 }
898 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
899 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
900 Addr.setExtendType(AArch64_AM::SXTW);
901 Src = SE->getOperand(0);
902 }
903 }
904
905 if (!Src)
906 break;
907
908 Addr.setShift(0);
909 Register Reg = getRegForValue(Src);
910 if (!Reg)
911 return false;
912 Addr.setOffsetReg(Reg);
913 return true;
914 }
915 } // end switch
916
917 if (Addr.isRegBase() && !Addr.getReg()) {
918 Register Reg = getRegForValue(Obj);
919 if (!Reg)
920 return false;
921 Addr.setReg(Reg);
922 return true;
923 }
924
925 if (!Addr.getOffsetReg()) {
926 Register Reg = getRegForValue(Obj);
927 if (!Reg)
928 return false;
929 Addr.setOffsetReg(Reg);
930 return true;
931 }
932
933 return false;
934}
935
936bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937 const User *U = nullptr;
938 unsigned Opcode = Instruction::UserOp1;
939 bool InMBB = true;
940
941 if (const auto *I = dyn_cast<Instruction>(V)) {
942 Opcode = I->getOpcode();
943 U = I;
944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
946 Opcode = C->getOpcode();
947 U = C;
948 }
949
950 switch (Opcode) {
951 default: break;
952 case Instruction::BitCast:
953 // Look past bitcasts if its operand is in the same BB.
954 if (InMBB)
955 return computeCallAddress(U->getOperand(0), Addr);
956 break;
957 case Instruction::IntToPtr:
958 // Look past no-op inttoptrs if its operand is in the same BB.
959 if (InMBB &&
960 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
961 TLI.getPointerTy(DL))
962 return computeCallAddress(U->getOperand(0), Addr);
963 break;
964 case Instruction::PtrToInt:
965 // Look past no-op ptrtoints if its operand is in the same BB.
966 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
967 return computeCallAddress(U->getOperand(0), Addr);
968 break;
969 }
970
971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
972 Addr.setGlobalValue(GV);
973 return true;
974 }
975
976 // If all else fails, try to materialize the value in a register.
977 if (!Addr.getGlobalValue()) {
978 Addr.setReg(getRegForValue(V));
979 return Addr.getReg() != 0;
980 }
981
982 return false;
983}
984
985bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986 EVT evt = TLI.getValueType(DL, Ty, true);
987
988 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989 return false;
990
991 // Only handle simple types.
992 if (evt == MVT::Other || !evt.isSimple())
993 return false;
994 VT = evt.getSimpleVT();
995
996 // This is a legal type, but it's not something we handle in fast-isel.
997 if (VT == MVT::f128)
998 return false;
999
1000 // Handle all other legal types, i.e. a register that will directly hold this
1001 // value.
1002 return TLI.isTypeLegal(VT);
1003}
1004
1005/// Determine if the value type is supported by FastISel.
1006///
1007/// FastISel for AArch64 can handle more value types than are legal. This adds
1008/// simple value type such as i1, i8, and i16.
1009bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1010 if (Ty->isVectorTy() && !IsVectorAllowed)
1011 return false;
1012
1013 if (isTypeLegal(Ty, VT))
1014 return true;
1015
1016 // If this is a type than can be sign or zero-extended to a basic operation
1017 // go ahead and accept it now.
1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1019 return true;
1020
1021 return false;
1022}
1023
1024bool AArch64FastISel::isValueAvailable(const Value *V) const {
1025 if (!isa<Instruction>(V))
1026 return true;
1027
1028 const auto *I = cast<Instruction>(V);
1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1030}
1031
1032bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033 if (Subtarget->isTargetILP32())
1034 return false;
1035
1036 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037 if (!ScaleFactor)
1038 return false;
1039
1040 bool ImmediateOffsetNeedsLowering = false;
1041 bool RegisterOffsetNeedsLowering = false;
1042 int64_t Offset = Addr.getOffset();
1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1044 ImmediateOffsetNeedsLowering = true;
1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1046 !isUInt<12>(Offset / ScaleFactor))
1047 ImmediateOffsetNeedsLowering = true;
1048
1049 // Cannot encode an offset register and an immediate offset in the same
1050 // instruction. Fold the immediate offset into the load/store instruction and
1051 // emit an additional add to take care of the offset register.
1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // Cannot encode zero register as base.
1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057 RegisterOffsetNeedsLowering = true;
1058
1059 // If this is a stack pointer and the offset needs to be simplified then put
1060 // the alloca address into a register, set the base type back to register and
1061 // continue. This should almost never happen.
1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1063 {
1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066 ResultReg)
1067 .addFrameIndex(Addr.getFI())
1068 .addImm(0)
1069 .addImm(0);
1070 Addr.setKind(Address::RegBase);
1071 Addr.setReg(ResultReg);
1072 }
1073
1074 if (RegisterOffsetNeedsLowering) {
1075 unsigned ResultReg = 0;
1076 if (Addr.getReg()) {
1077 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1078 Addr.getExtendType() == AArch64_AM::UXTW )
1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), Addr.getExtendType(),
1081 Addr.getShift());
1082 else
1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1084 Addr.getOffsetReg(), AArch64_AM::LSL,
1085 Addr.getShift());
1086 } else {
1087 if (Addr.getExtendType() == AArch64_AM::UXTW)
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089 Addr.getShift(), /*IsZExt=*/true);
1090 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092 Addr.getShift(), /*IsZExt=*/false);
1093 else
1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095 Addr.getShift());
1096 }
1097 if (!ResultReg)
1098 return false;
1099
1100 Addr.setReg(ResultReg);
1101 Addr.setOffsetReg(0);
1102 Addr.setShift(0);
1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104 }
1105
1106 // Since the offset is too large for the load/store instruction get the
1107 // reg+offset into a register.
1108 if (ImmediateOffsetNeedsLowering) {
1109 unsigned ResultReg;
1110 if (Addr.getReg())
1111 // Try to fold the immediate into the add instruction.
1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113 else
1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115
1116 if (!ResultReg)
1117 return false;
1118 Addr.setReg(ResultReg);
1119 Addr.setOffset(0);
1120 }
1121 return true;
1122}
1123
1124void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125 const MachineInstrBuilder &MIB,
1127 unsigned ScaleFactor,
1128 MachineMemOperand *MMO) {
1129 int64_t Offset = Addr.getOffset() / ScaleFactor;
1130 // Frame base works a bit differently. Handle it separately.
1131 if (Addr.isFIBase()) {
1132 int FI = Addr.getFI();
1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1134 // and alignment should be based on the VT.
1135 MMO = FuncInfo.MF->getMachineMemOperand(
1136 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1137 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1138 // Now add the rest of the operands.
1139 MIB.addFrameIndex(FI).addImm(Offset);
1140 } else {
1141 assert(Addr.isRegBase() && "Unexpected address kind.");
1142 const MCInstrDesc &II = MIB->getDesc();
1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1144 Addr.setReg(
1145 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1146 Addr.setOffsetReg(
1147 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1148 if (Addr.getOffsetReg()) {
1149 assert(Addr.getOffset() == 0 && "Unexpected offset");
1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1151 Addr.getExtendType() == AArch64_AM::SXTX;
1152 MIB.addReg(Addr.getReg());
1153 MIB.addReg(Addr.getOffsetReg());
1154 MIB.addImm(IsSigned);
1155 MIB.addImm(Addr.getShift() != 0);
1156 } else
1157 MIB.addReg(Addr.getReg()).addImm(Offset);
1158 }
1159
1160 if (MMO)
1161 MIB.addMemOperand(MMO);
1162}
1163
1164unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165 const Value *RHS, bool SetFlags,
1166 bool WantResult, bool IsZExt) {
1168 bool NeedExtend = false;
1169 switch (RetVT.SimpleTy) {
1170 default:
1171 return 0;
1172 case MVT::i1:
1173 NeedExtend = true;
1174 break;
1175 case MVT::i8:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178 break;
1179 case MVT::i16:
1180 NeedExtend = true;
1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182 break;
1183 case MVT::i32: // fall-through
1184 case MVT::i64:
1185 break;
1186 }
1187 MVT SrcVT = RetVT;
1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189
1190 // Canonicalize immediates to the RHS first.
1191 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1192 std::swap(LHS, RHS);
1193
1194 // Canonicalize mul by power of 2 to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196 if (isMulPowOf2(LHS))
1197 std::swap(LHS, RHS);
1198
1199 // Canonicalize shift immediate to the RHS.
1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1201 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1202 if (isa<ConstantInt>(SI->getOperand(1)))
1203 if (SI->getOpcode() == Instruction::Shl ||
1204 SI->getOpcode() == Instruction::LShr ||
1205 SI->getOpcode() == Instruction::AShr )
1206 std::swap(LHS, RHS);
1207
1208 Register LHSReg = getRegForValue(LHS);
1209 if (!LHSReg)
1210 return 0;
1211
1212 if (NeedExtend)
1213 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1214
1215 unsigned ResultReg = 0;
1216 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218 if (C->isNegative())
1219 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1220 WantResult);
1221 else
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223 WantResult);
1224 } else if (const auto *C = dyn_cast<Constant>(RHS))
1225 if (C->isNullValue())
1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1227
1228 if (ResultReg)
1229 return ResultReg;
1230
1231 // Only extend the RHS within the instruction if there is a valid extend type.
1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233 isValueAvailable(RHS)) {
1234 if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1235 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1236 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1237 Register RHSReg = getRegForValue(SI->getOperand(0));
1238 if (!RHSReg)
1239 return 0;
1240 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1241 C->getZExtValue(), SetFlags, WantResult);
1242 }
1243 Register RHSReg = getRegForValue(RHS);
1244 if (!RHSReg)
1245 return 0;
1246 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1247 SetFlags, WantResult);
1248 }
1249
1250 // Check if the mul can be folded into the instruction.
1251 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1252 if (isMulPowOf2(RHS)) {
1253 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1254 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1255
1256 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1257 if (C->getValue().isPowerOf2())
1258 std::swap(MulLHS, MulRHS);
1259
1260 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1261 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1262 Register RHSReg = getRegForValue(MulLHS);
1263 if (!RHSReg)
1264 return 0;
1265 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1266 ShiftVal, SetFlags, WantResult);
1267 if (ResultReg)
1268 return ResultReg;
1269 }
1270 }
1271
1272 // Check if the shift can be folded into the instruction.
1273 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1274 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1275 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1277 switch (SI->getOpcode()) {
1278 default: break;
1279 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1280 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1281 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1282 }
1283 uint64_t ShiftVal = C->getZExtValue();
1284 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1285 Register RHSReg = getRegForValue(SI->getOperand(0));
1286 if (!RHSReg)
1287 return 0;
1288 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1289 ShiftVal, SetFlags, WantResult);
1290 if (ResultReg)
1291 return ResultReg;
1292 }
1293 }
1294 }
1295 }
1296
1297 Register RHSReg = getRegForValue(RHS);
1298 if (!RHSReg)
1299 return 0;
1300
1301 if (NeedExtend)
1302 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1303
1304 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1305}
1306
1307unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1308 unsigned RHSReg, bool SetFlags,
1309 bool WantResult) {
1310 assert(LHSReg && RHSReg && "Invalid register number.");
1311
1312 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1313 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1314 return 0;
1315
1316 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1317 return 0;
1318
1319 static const unsigned OpcTable[2][2][2] = {
1320 { { AArch64::SUBWrr, AArch64::SUBXrr },
1321 { AArch64::ADDWrr, AArch64::ADDXrr } },
1322 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1323 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1324 };
1325 bool Is64Bit = RetVT == MVT::i64;
1326 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1327 const TargetRegisterClass *RC =
1328 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1329 unsigned ResultReg;
1330 if (WantResult)
1331 ResultReg = createResultReg(RC);
1332 else
1333 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1334
1335 const MCInstrDesc &II = TII.get(Opc);
1336 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1337 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1338 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1339 .addReg(LHSReg)
1340 .addReg(RHSReg);
1341 return ResultReg;
1342}
1343
1344unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1345 uint64_t Imm, bool SetFlags,
1346 bool WantResult) {
1347 assert(LHSReg && "Invalid register number.");
1348
1349 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1350 return 0;
1351
1352 unsigned ShiftImm;
1353 if (isUInt<12>(Imm))
1354 ShiftImm = 0;
1355 else if ((Imm & 0xfff000) == Imm) {
1356 ShiftImm = 12;
1357 Imm >>= 12;
1358 } else
1359 return 0;
1360
1361 static const unsigned OpcTable[2][2][2] = {
1362 { { AArch64::SUBWri, AArch64::SUBXri },
1363 { AArch64::ADDWri, AArch64::ADDXri } },
1364 { { AArch64::SUBSWri, AArch64::SUBSXri },
1365 { AArch64::ADDSWri, AArch64::ADDSXri } }
1366 };
1367 bool Is64Bit = RetVT == MVT::i64;
1368 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1369 const TargetRegisterClass *RC;
1370 if (SetFlags)
1371 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1372 else
1373 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1374 unsigned ResultReg;
1375 if (WantResult)
1376 ResultReg = createResultReg(RC);
1377 else
1378 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1379
1380 const MCInstrDesc &II = TII.get(Opc);
1381 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1383 .addReg(LHSReg)
1384 .addImm(Imm)
1385 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1386 return ResultReg;
1387}
1388
1389unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1390 unsigned RHSReg,
1392 uint64_t ShiftImm, bool SetFlags,
1393 bool WantResult) {
1394 assert(LHSReg && RHSReg && "Invalid register number.");
1395 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1396 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1397
1398 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1399 return 0;
1400
1401 // Don't deal with undefined shifts.
1402 if (ShiftImm >= RetVT.getSizeInBits())
1403 return 0;
1404
1405 static const unsigned OpcTable[2][2][2] = {
1406 { { AArch64::SUBWrs, AArch64::SUBXrs },
1407 { AArch64::ADDWrs, AArch64::ADDXrs } },
1408 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1409 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1410 };
1411 bool Is64Bit = RetVT == MVT::i64;
1412 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1413 const TargetRegisterClass *RC =
1414 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1415 unsigned ResultReg;
1416 if (WantResult)
1417 ResultReg = createResultReg(RC);
1418 else
1419 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1420
1421 const MCInstrDesc &II = TII.get(Opc);
1422 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1423 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1425 .addReg(LHSReg)
1426 .addReg(RHSReg)
1427 .addImm(getShifterImm(ShiftType, ShiftImm));
1428 return ResultReg;
1429}
1430
1431unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1432 unsigned RHSReg,
1434 uint64_t ShiftImm, bool SetFlags,
1435 bool WantResult) {
1436 assert(LHSReg && RHSReg && "Invalid register number.");
1437 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1438 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1439
1440 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1441 return 0;
1442
1443 if (ShiftImm >= 4)
1444 return 0;
1445
1446 static const unsigned OpcTable[2][2][2] = {
1447 { { AArch64::SUBWrx, AArch64::SUBXrx },
1448 { AArch64::ADDWrx, AArch64::ADDXrx } },
1449 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1450 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1451 };
1452 bool Is64Bit = RetVT == MVT::i64;
1453 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1454 const TargetRegisterClass *RC = nullptr;
1455 if (SetFlags)
1456 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1457 else
1458 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1459 unsigned ResultReg;
1460 if (WantResult)
1461 ResultReg = createResultReg(RC);
1462 else
1463 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1464
1465 const MCInstrDesc &II = TII.get(Opc);
1466 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1467 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1469 .addReg(LHSReg)
1470 .addReg(RHSReg)
1471 .addImm(getArithExtendImm(ExtType, ShiftImm));
1472 return ResultReg;
1473}
1474
1475bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1476 Type *Ty = LHS->getType();
1477 EVT EVT = TLI.getValueType(DL, Ty, true);
1478 if (!EVT.isSimple())
1479 return false;
1480 MVT VT = EVT.getSimpleVT();
1481
1482 switch (VT.SimpleTy) {
1483 default:
1484 return false;
1485 case MVT::i1:
1486 case MVT::i8:
1487 case MVT::i16:
1488 case MVT::i32:
1489 case MVT::i64:
1490 return emitICmp(VT, LHS, RHS, IsZExt);
1491 case MVT::f32:
1492 case MVT::f64:
1493 return emitFCmp(VT, LHS, RHS);
1494 }
1495}
1496
1497bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1498 bool IsZExt) {
1499 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1500 IsZExt) != 0;
1501}
1502
1503bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1504 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1505 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1506}
1507
1508bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1509 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1510 return false;
1511
1512 // Check to see if the 2nd operand is a constant that we can encode directly
1513 // in the compare.
1514 bool UseImm = false;
1515 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1516 if (CFP->isZero() && !CFP->isNegative())
1517 UseImm = true;
1518
1519 Register LHSReg = getRegForValue(LHS);
1520 if (!LHSReg)
1521 return false;
1522
1523 if (UseImm) {
1524 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1525 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1526 .addReg(LHSReg);
1527 return true;
1528 }
1529
1530 Register RHSReg = getRegForValue(RHS);
1531 if (!RHSReg)
1532 return false;
1533
1534 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1535 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1536 .addReg(LHSReg)
1537 .addReg(RHSReg);
1538 return true;
1539}
1540
1541unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1542 bool SetFlags, bool WantResult, bool IsZExt) {
1543 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1544 IsZExt);
1545}
1546
1547/// This method is a wrapper to simplify add emission.
1548///
1549/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1550/// that fails, then try to materialize the immediate into a register and use
1551/// emitAddSub_rr instead.
1552unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1553 unsigned ResultReg;
1554 if (Imm < 0)
1555 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1556 else
1557 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1558
1559 if (ResultReg)
1560 return ResultReg;
1561
1562 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1563 if (!CReg)
1564 return 0;
1565
1566 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1567 return ResultReg;
1568}
1569
1570unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1571 bool SetFlags, bool WantResult, bool IsZExt) {
1572 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1573 IsZExt);
1574}
1575
1576unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1577 unsigned RHSReg, bool WantResult) {
1578 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1579 /*SetFlags=*/true, WantResult);
1580}
1581
1582unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1583 unsigned RHSReg,
1585 uint64_t ShiftImm, bool WantResult) {
1586 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1587 ShiftImm, /*SetFlags=*/true, WantResult);
1588}
1589
1590unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1591 const Value *LHS, const Value *RHS) {
1592 // Canonicalize immediates to the RHS first.
1593 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1594 std::swap(LHS, RHS);
1595
1596 // Canonicalize mul by power-of-2 to the RHS.
1597 if (LHS->hasOneUse() && isValueAvailable(LHS))
1598 if (isMulPowOf2(LHS))
1599 std::swap(LHS, RHS);
1600
1601 // Canonicalize shift immediate to the RHS.
1602 if (LHS->hasOneUse() && isValueAvailable(LHS))
1603 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1604 if (isa<ConstantInt>(SI->getOperand(1)))
1605 std::swap(LHS, RHS);
1606
1607 Register LHSReg = getRegForValue(LHS);
1608 if (!LHSReg)
1609 return 0;
1610
1611 unsigned ResultReg = 0;
1612 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1613 uint64_t Imm = C->getZExtValue();
1614 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1615 }
1616 if (ResultReg)
1617 return ResultReg;
1618
1619 // Check if the mul can be folded into the instruction.
1620 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1621 if (isMulPowOf2(RHS)) {
1622 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1623 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1624
1625 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1626 if (C->getValue().isPowerOf2())
1627 std::swap(MulLHS, MulRHS);
1628
1629 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1630 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1631
1632 Register RHSReg = getRegForValue(MulLHS);
1633 if (!RHSReg)
1634 return 0;
1635 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1636 if (ResultReg)
1637 return ResultReg;
1638 }
1639 }
1640
1641 // Check if the shift can be folded into the instruction.
1642 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1643 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1644 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1645 uint64_t ShiftVal = C->getZExtValue();
1646 Register RHSReg = getRegForValue(SI->getOperand(0));
1647 if (!RHSReg)
1648 return 0;
1649 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1650 if (ResultReg)
1651 return ResultReg;
1652 }
1653 }
1654
1655 Register RHSReg = getRegForValue(RHS);
1656 if (!RHSReg)
1657 return 0;
1658
1659 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1660 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1661 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1662 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1663 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1664 }
1665 return ResultReg;
1666}
1667
1668unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1669 unsigned LHSReg, uint64_t Imm) {
1670 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1671 "ISD nodes are not consecutive!");
1672 static const unsigned OpcTable[3][2] = {
1673 { AArch64::ANDWri, AArch64::ANDXri },
1674 { AArch64::ORRWri, AArch64::ORRXri },
1675 { AArch64::EORWri, AArch64::EORXri }
1676 };
1677 const TargetRegisterClass *RC;
1678 unsigned Opc;
1679 unsigned RegSize;
1680 switch (RetVT.SimpleTy) {
1681 default:
1682 return 0;
1683 case MVT::i1:
1684 case MVT::i8:
1685 case MVT::i16:
1686 case MVT::i32: {
1687 unsigned Idx = ISDOpc - ISD::AND;
1688 Opc = OpcTable[Idx][0];
1689 RC = &AArch64::GPR32spRegClass;
1690 RegSize = 32;
1691 break;
1692 }
1693 case MVT::i64:
1694 Opc = OpcTable[ISDOpc - ISD::AND][1];
1695 RC = &AArch64::GPR64spRegClass;
1696 RegSize = 64;
1697 break;
1698 }
1699
1701 return 0;
1702
1703 Register ResultReg =
1704 fastEmitInst_ri(Opc, RC, LHSReg,
1706 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1707 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1708 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1709 }
1710 return ResultReg;
1711}
1712
1713unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1714 unsigned LHSReg, unsigned RHSReg,
1715 uint64_t ShiftImm) {
1716 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1717 "ISD nodes are not consecutive!");
1718 static const unsigned OpcTable[3][2] = {
1719 { AArch64::ANDWrs, AArch64::ANDXrs },
1720 { AArch64::ORRWrs, AArch64::ORRXrs },
1721 { AArch64::EORWrs, AArch64::EORXrs }
1722 };
1723
1724 // Don't deal with undefined shifts.
1725 if (ShiftImm >= RetVT.getSizeInBits())
1726 return 0;
1727
1728 const TargetRegisterClass *RC;
1729 unsigned Opc;
1730 switch (RetVT.SimpleTy) {
1731 default:
1732 return 0;
1733 case MVT::i1:
1734 case MVT::i8:
1735 case MVT::i16:
1736 case MVT::i32:
1737 Opc = OpcTable[ISDOpc - ISD::AND][0];
1738 RC = &AArch64::GPR32RegClass;
1739 break;
1740 case MVT::i64:
1741 Opc = OpcTable[ISDOpc - ISD::AND][1];
1742 RC = &AArch64::GPR64RegClass;
1743 break;
1744 }
1745 Register ResultReg =
1746 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1748 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1749 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1750 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1751 }
1752 return ResultReg;
1753}
1754
1755unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1756 uint64_t Imm) {
1757 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1758}
1759
1760unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1761 bool WantZExt, MachineMemOperand *MMO) {
1762 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1763 return 0;
1764
1765 // Simplify this down to something we can handle.
1766 if (!simplifyAddress(Addr, VT))
1767 return 0;
1768
1769 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1770 if (!ScaleFactor)
1771 llvm_unreachable("Unexpected value type.");
1772
1773 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1774 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1775 bool UseScaled = true;
1776 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1777 UseScaled = false;
1778 ScaleFactor = 1;
1779 }
1780
1781 static const unsigned GPOpcTable[2][8][4] = {
1782 // Sign-extend.
1783 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1784 AArch64::LDURXi },
1785 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1786 AArch64::LDURXi },
1787 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1788 AArch64::LDRXui },
1789 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1790 AArch64::LDRXui },
1791 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1792 AArch64::LDRXroX },
1793 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1794 AArch64::LDRXroX },
1795 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1796 AArch64::LDRXroW },
1797 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1798 AArch64::LDRXroW }
1799 },
1800 // Zero-extend.
1801 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1802 AArch64::LDURXi },
1803 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1804 AArch64::LDURXi },
1805 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1806 AArch64::LDRXui },
1807 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1808 AArch64::LDRXui },
1809 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1810 AArch64::LDRXroX },
1811 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1812 AArch64::LDRXroX },
1813 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1814 AArch64::LDRXroW },
1815 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1816 AArch64::LDRXroW }
1817 }
1818 };
1819
1820 static const unsigned FPOpcTable[4][2] = {
1821 { AArch64::LDURSi, AArch64::LDURDi },
1822 { AArch64::LDRSui, AArch64::LDRDui },
1823 { AArch64::LDRSroX, AArch64::LDRDroX },
1824 { AArch64::LDRSroW, AArch64::LDRDroW }
1825 };
1826
1827 unsigned Opc;
1828 const TargetRegisterClass *RC;
1829 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1830 Addr.getOffsetReg();
1831 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1832 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1833 Addr.getExtendType() == AArch64_AM::SXTW)
1834 Idx++;
1835
1836 bool IsRet64Bit = RetVT == MVT::i64;
1837 switch (VT.SimpleTy) {
1838 default:
1839 llvm_unreachable("Unexpected value type.");
1840 case MVT::i1: // Intentional fall-through.
1841 case MVT::i8:
1842 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1843 RC = (IsRet64Bit && !WantZExt) ?
1844 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1845 break;
1846 case MVT::i16:
1847 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1848 RC = (IsRet64Bit && !WantZExt) ?
1849 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1850 break;
1851 case MVT::i32:
1852 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1853 RC = (IsRet64Bit && !WantZExt) ?
1854 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1855 break;
1856 case MVT::i64:
1857 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1858 RC = &AArch64::GPR64RegClass;
1859 break;
1860 case MVT::f32:
1861 Opc = FPOpcTable[Idx][0];
1862 RC = &AArch64::FPR32RegClass;
1863 break;
1864 case MVT::f64:
1865 Opc = FPOpcTable[Idx][1];
1866 RC = &AArch64::FPR64RegClass;
1867 break;
1868 }
1869
1870 // Create the base instruction, then add the operands.
1871 Register ResultReg = createResultReg(RC);
1872 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1873 TII.get(Opc), ResultReg);
1874 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1875
1876 // Loading an i1 requires special handling.
1877 if (VT == MVT::i1) {
1878 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1879 assert(ANDReg && "Unexpected AND instruction emission failure.");
1880 ResultReg = ANDReg;
1881 }
1882
1883 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1884 // the 32bit reg to a 64bit reg.
1885 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1886 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1887 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1888 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1889 .addImm(0)
1890 .addReg(ResultReg, getKillRegState(true))
1891 .addImm(AArch64::sub_32);
1892 ResultReg = Reg64;
1893 }
1894 return ResultReg;
1895}
1896
1897bool AArch64FastISel::selectAddSub(const Instruction *I) {
1898 MVT VT;
1899 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1900 return false;
1901
1902 if (VT.isVector())
1903 return selectOperator(I, I->getOpcode());
1904
1905 unsigned ResultReg;
1906 switch (I->getOpcode()) {
1907 default:
1908 llvm_unreachable("Unexpected instruction.");
1909 case Instruction::Add:
1910 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1911 break;
1912 case Instruction::Sub:
1913 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1914 break;
1915 }
1916 if (!ResultReg)
1917 return false;
1918
1919 updateValueMap(I, ResultReg);
1920 return true;
1921}
1922
1923bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1924 MVT VT;
1925 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1926 return false;
1927
1928 if (VT.isVector())
1929 return selectOperator(I, I->getOpcode());
1930
1931 unsigned ResultReg;
1932 switch (I->getOpcode()) {
1933 default:
1934 llvm_unreachable("Unexpected instruction.");
1935 case Instruction::And:
1936 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1937 break;
1938 case Instruction::Or:
1939 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1940 break;
1941 case Instruction::Xor:
1942 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1943 break;
1944 }
1945 if (!ResultReg)
1946 return false;
1947
1948 updateValueMap(I, ResultReg);
1949 return true;
1950}
1951
1952bool AArch64FastISel::selectLoad(const Instruction *I) {
1953 MVT VT;
1954 // Verify we have a legal type before going any further. Currently, we handle
1955 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1956 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1957 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1958 cast<LoadInst>(I)->isAtomic())
1959 return false;
1960
1961 const Value *SV = I->getOperand(0);
1962 if (TLI.supportSwiftError()) {
1963 // Swifterror values can come from either a function parameter with
1964 // swifterror attribute or an alloca with swifterror attribute.
1965 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1966 if (Arg->hasSwiftErrorAttr())
1967 return false;
1968 }
1969
1970 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1971 if (Alloca->isSwiftError())
1972 return false;
1973 }
1974 }
1975
1976 // See if we can handle this address.
1977 Address Addr;
1978 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1979 return false;
1980
1981 // Fold the following sign-/zero-extend into the load instruction.
1982 bool WantZExt = true;
1983 MVT RetVT = VT;
1984 const Value *IntExtVal = nullptr;
1985 if (I->hasOneUse()) {
1986 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1987 if (isTypeSupported(ZE->getType(), RetVT))
1988 IntExtVal = ZE;
1989 else
1990 RetVT = VT;
1991 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1992 if (isTypeSupported(SE->getType(), RetVT))
1993 IntExtVal = SE;
1994 else
1995 RetVT = VT;
1996 WantZExt = false;
1997 }
1998 }
1999
2000 unsigned ResultReg =
2001 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
2002 if (!ResultReg)
2003 return false;
2004
2005 // There are a few different cases we have to handle, because the load or the
2006 // sign-/zero-extend might not be selected by FastISel if we fall-back to
2007 // SelectionDAG. There is also an ordering issue when both instructions are in
2008 // different basic blocks.
2009 // 1.) The load instruction is selected by FastISel, but the integer extend
2010 // not. This usually happens when the integer extend is in a different
2011 // basic block and SelectionDAG took over for that basic block.
2012 // 2.) The load instruction is selected before the integer extend. This only
2013 // happens when the integer extend is in a different basic block.
2014 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2015 // by FastISel. This happens if there are instructions between the load
2016 // and the integer extend that couldn't be selected by FastISel.
2017 if (IntExtVal) {
2018 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2019 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2020 // it when it selects the integer extend.
2021 Register Reg = lookUpRegForValue(IntExtVal);
2022 auto *MI = MRI.getUniqueVRegDef(Reg);
2023 if (!MI) {
2024 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2025 if (WantZExt) {
2026 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2027 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2028 ResultReg = std::prev(I)->getOperand(0).getReg();
2029 removeDeadCode(I, std::next(I));
2030 } else
2031 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2032 AArch64::sub_32);
2033 }
2034 updateValueMap(I, ResultReg);
2035 return true;
2036 }
2037
2038 // The integer extend has already been emitted - delete all the instructions
2039 // that have been emitted by the integer extend lowering code and use the
2040 // result from the load instruction directly.
2041 while (MI) {
2042 Reg = 0;
2043 for (auto &Opnd : MI->uses()) {
2044 if (Opnd.isReg()) {
2045 Reg = Opnd.getReg();
2046 break;
2047 }
2048 }
2050 removeDeadCode(I, std::next(I));
2051 MI = nullptr;
2052 if (Reg)
2053 MI = MRI.getUniqueVRegDef(Reg);
2054 }
2055 updateValueMap(IntExtVal, ResultReg);
2056 return true;
2057 }
2058
2059 updateValueMap(I, ResultReg);
2060 return true;
2061}
2062
2063bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2064 unsigned AddrReg,
2065 MachineMemOperand *MMO) {
2066 unsigned Opc;
2067 switch (VT.SimpleTy) {
2068 default: return false;
2069 case MVT::i8: Opc = AArch64::STLRB; break;
2070 case MVT::i16: Opc = AArch64::STLRH; break;
2071 case MVT::i32: Opc = AArch64::STLRW; break;
2072 case MVT::i64: Opc = AArch64::STLRX; break;
2073 }
2074
2075 const MCInstrDesc &II = TII.get(Opc);
2076 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2077 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2079 .addReg(SrcReg)
2080 .addReg(AddrReg)
2081 .addMemOperand(MMO);
2082 return true;
2083}
2084
2085bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2086 MachineMemOperand *MMO) {
2087 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2088 return false;
2089
2090 // Simplify this down to something we can handle.
2091 if (!simplifyAddress(Addr, VT))
2092 return false;
2093
2094 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2095 if (!ScaleFactor)
2096 llvm_unreachable("Unexpected value type.");
2097
2098 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2099 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2100 bool UseScaled = true;
2101 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2102 UseScaled = false;
2103 ScaleFactor = 1;
2104 }
2105
2106 static const unsigned OpcTable[4][6] = {
2107 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2108 AArch64::STURSi, AArch64::STURDi },
2109 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2110 AArch64::STRSui, AArch64::STRDui },
2111 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2112 AArch64::STRSroX, AArch64::STRDroX },
2113 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2114 AArch64::STRSroW, AArch64::STRDroW }
2115 };
2116
2117 unsigned Opc;
2118 bool VTIsi1 = false;
2119 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2120 Addr.getOffsetReg();
2121 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2122 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2123 Addr.getExtendType() == AArch64_AM::SXTW)
2124 Idx++;
2125
2126 switch (VT.SimpleTy) {
2127 default: llvm_unreachable("Unexpected value type.");
2128 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2129 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2130 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2131 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2132 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2133 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2134 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2135 }
2136
2137 // Storing an i1 requires special handling.
2138 if (VTIsi1 && SrcReg != AArch64::WZR) {
2139 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2140 assert(ANDReg && "Unexpected AND instruction emission failure.");
2141 SrcReg = ANDReg;
2142 }
2143 // Create the base instruction, then add the operands.
2144 const MCInstrDesc &II = TII.get(Opc);
2145 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2148 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2149
2150 return true;
2151}
2152
2153bool AArch64FastISel::selectStore(const Instruction *I) {
2154 MVT VT;
2155 const Value *Op0 = I->getOperand(0);
2156 // Verify we have a legal type before going any further. Currently, we handle
2157 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2158 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2159 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2160 return false;
2161
2162 const Value *PtrV = I->getOperand(1);
2163 if (TLI.supportSwiftError()) {
2164 // Swifterror values can come from either a function parameter with
2165 // swifterror attribute or an alloca with swifterror attribute.
2166 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2167 if (Arg->hasSwiftErrorAttr())
2168 return false;
2169 }
2170
2171 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2172 if (Alloca->isSwiftError())
2173 return false;
2174 }
2175 }
2176
2177 // Get the value to be stored into a register. Use the zero register directly
2178 // when possible to avoid an unnecessary copy and a wasted register.
2179 unsigned SrcReg = 0;
2180 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2181 if (CI->isZero())
2182 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2183 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2184 if (CF->isZero() && !CF->isNegative()) {
2186 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2187 }
2188 }
2189
2190 if (!SrcReg)
2191 SrcReg = getRegForValue(Op0);
2192
2193 if (!SrcReg)
2194 return false;
2195
2196 auto *SI = cast<StoreInst>(I);
2197
2198 // Try to emit a STLR for seq_cst/release.
2199 if (SI->isAtomic()) {
2200 AtomicOrdering Ord = SI->getOrdering();
2201 // The non-atomic instructions are sufficient for relaxed stores.
2202 if (isReleaseOrStronger(Ord)) {
2203 // The STLR addressing mode only supports a base reg; pass that directly.
2204 Register AddrReg = getRegForValue(PtrV);
2205 return emitStoreRelease(VT, SrcReg, AddrReg,
2206 createMachineMemOperandFor(I));
2207 }
2208 }
2209
2210 // See if we can handle this address.
2211 Address Addr;
2212 if (!computeAddress(PtrV, Addr, Op0->getType()))
2213 return false;
2214
2215 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2216 return false;
2217 return true;
2218}
2219
2221 switch (Pred) {
2222 case CmpInst::FCMP_ONE:
2223 case CmpInst::FCMP_UEQ:
2224 default:
2225 // AL is our "false" for now. The other two need more compares.
2226 return AArch64CC::AL;
2227 case CmpInst::ICMP_EQ:
2228 case CmpInst::FCMP_OEQ:
2229 return AArch64CC::EQ;
2230 case CmpInst::ICMP_SGT:
2231 case CmpInst::FCMP_OGT:
2232 return AArch64CC::GT;
2233 case CmpInst::ICMP_SGE:
2234 case CmpInst::FCMP_OGE:
2235 return AArch64CC::GE;
2236 case CmpInst::ICMP_UGT:
2237 case CmpInst::FCMP_UGT:
2238 return AArch64CC::HI;
2239 case CmpInst::FCMP_OLT:
2240 return AArch64CC::MI;
2241 case CmpInst::ICMP_ULE:
2242 case CmpInst::FCMP_OLE:
2243 return AArch64CC::LS;
2244 case CmpInst::FCMP_ORD:
2245 return AArch64CC::VC;
2246 case CmpInst::FCMP_UNO:
2247 return AArch64CC::VS;
2248 case CmpInst::FCMP_UGE:
2249 return AArch64CC::PL;
2250 case CmpInst::ICMP_SLT:
2251 case CmpInst::FCMP_ULT:
2252 return AArch64CC::LT;
2253 case CmpInst::ICMP_SLE:
2254 case CmpInst::FCMP_ULE:
2255 return AArch64CC::LE;
2256 case CmpInst::FCMP_UNE:
2257 case CmpInst::ICMP_NE:
2258 return AArch64CC::NE;
2259 case CmpInst::ICMP_UGE:
2260 return AArch64CC::HS;
2261 case CmpInst::ICMP_ULT:
2262 return AArch64CC::LO;
2263 }
2264}
2265
2266/// Try to emit a combined compare-and-branch instruction.
2267bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2268 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2269 // will not be produced, as they are conditional branch instructions that do
2270 // not set flags.
2271 if (FuncInfo.MF->getFunction().hasFnAttribute(
2272 Attribute::SpeculativeLoadHardening))
2273 return false;
2274
2275 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2276 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2277 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2278
2279 const Value *LHS = CI->getOperand(0);
2280 const Value *RHS = CI->getOperand(1);
2281
2282 MVT VT;
2283 if (!isTypeSupported(LHS->getType(), VT))
2284 return false;
2285
2286 unsigned BW = VT.getSizeInBits();
2287 if (BW > 64)
2288 return false;
2289
2290 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2291 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2292
2293 // Try to take advantage of fallthrough opportunities.
2294 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2295 std::swap(TBB, FBB);
2297 }
2298
2299 int TestBit = -1;
2300 bool IsCmpNE;
2301 switch (Predicate) {
2302 default:
2303 return false;
2304 case CmpInst::ICMP_EQ:
2305 case CmpInst::ICMP_NE:
2306 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2307 std::swap(LHS, RHS);
2308
2309 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2310 return false;
2311
2312 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2313 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2314 const Value *AndLHS = AI->getOperand(0);
2315 const Value *AndRHS = AI->getOperand(1);
2316
2317 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2318 if (C->getValue().isPowerOf2())
2319 std::swap(AndLHS, AndRHS);
2320
2321 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2322 if (C->getValue().isPowerOf2()) {
2323 TestBit = C->getValue().logBase2();
2324 LHS = AndLHS;
2325 }
2326 }
2327
2328 if (VT == MVT::i1)
2329 TestBit = 0;
2330
2331 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2332 break;
2333 case CmpInst::ICMP_SLT:
2334 case CmpInst::ICMP_SGE:
2335 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2336 return false;
2337
2338 TestBit = BW - 1;
2339 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2340 break;
2341 case CmpInst::ICMP_SGT:
2342 case CmpInst::ICMP_SLE:
2343 if (!isa<ConstantInt>(RHS))
2344 return false;
2345
2346 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2347 return false;
2348
2349 TestBit = BW - 1;
2350 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2351 break;
2352 } // end switch
2353
2354 static const unsigned OpcTable[2][2][2] = {
2355 { {AArch64::CBZW, AArch64::CBZX },
2356 {AArch64::CBNZW, AArch64::CBNZX} },
2357 { {AArch64::TBZW, AArch64::TBZX },
2358 {AArch64::TBNZW, AArch64::TBNZX} }
2359 };
2360
2361 bool IsBitTest = TestBit != -1;
2362 bool Is64Bit = BW == 64;
2363 if (TestBit < 32 && TestBit >= 0)
2364 Is64Bit = false;
2365
2366 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2367 const MCInstrDesc &II = TII.get(Opc);
2368
2369 Register SrcReg = getRegForValue(LHS);
2370 if (!SrcReg)
2371 return false;
2372
2373 if (BW == 64 && !Is64Bit)
2374 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2375
2376 if ((BW < 32) && !IsBitTest)
2377 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2378
2379 // Emit the combined compare and branch instruction.
2380 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2383 .addReg(SrcReg);
2384 if (IsBitTest)
2385 MIB.addImm(TestBit);
2386 MIB.addMBB(TBB);
2387
2388 finishCondBranch(BI->getParent(), TBB, FBB);
2389 return true;
2390}
2391
2392bool AArch64FastISel::selectBranch(const Instruction *I) {
2393 const BranchInst *BI = cast<BranchInst>(I);
2394 if (BI->isUnconditional()) {
2395 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2396 fastEmitBranch(MSucc, BI->getDebugLoc());
2397 return true;
2398 }
2399
2400 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2401 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2402
2403 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2404 if (CI->hasOneUse() && isValueAvailable(CI)) {
2405 // Try to optimize or fold the cmp.
2406 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2407 switch (Predicate) {
2408 default:
2409 break;
2411 fastEmitBranch(FBB, MIMD.getDL());
2412 return true;
2413 case CmpInst::FCMP_TRUE:
2414 fastEmitBranch(TBB, MIMD.getDL());
2415 return true;
2416 }
2417
2418 // Try to emit a combined compare-and-branch first.
2419 if (emitCompareAndBranch(BI))
2420 return true;
2421
2422 // Try to take advantage of fallthrough opportunities.
2423 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2424 std::swap(TBB, FBB);
2426 }
2427
2428 // Emit the cmp.
2429 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2430 return false;
2431
2432 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2433 // instruction.
2434 AArch64CC::CondCode CC = getCompareCC(Predicate);
2436 switch (Predicate) {
2437 default:
2438 break;
2439 case CmpInst::FCMP_UEQ:
2440 ExtraCC = AArch64CC::EQ;
2441 CC = AArch64CC::VS;
2442 break;
2443 case CmpInst::FCMP_ONE:
2444 ExtraCC = AArch64CC::MI;
2445 CC = AArch64CC::GT;
2446 break;
2447 }
2448 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2449
2450 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2451 if (ExtraCC != AArch64CC::AL) {
2452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2453 .addImm(ExtraCC)
2454 .addMBB(TBB);
2455 }
2456
2457 // Emit the branch.
2458 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2459 .addImm(CC)
2460 .addMBB(TBB);
2461
2462 finishCondBranch(BI->getParent(), TBB, FBB);
2463 return true;
2464 }
2465 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2466 uint64_t Imm = CI->getZExtValue();
2467 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2469 .addMBB(Target);
2470
2471 // Obtain the branch probability and add the target to the successor list.
2472 if (FuncInfo.BPI) {
2473 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2474 BI->getParent(), Target->getBasicBlock());
2475 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2476 } else
2477 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2478 return true;
2479 } else {
2481 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2482 // Fake request the condition, otherwise the intrinsic might be completely
2483 // optimized away.
2484 Register CondReg = getRegForValue(BI->getCondition());
2485 if (!CondReg)
2486 return false;
2487
2488 // Emit the branch.
2489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2490 .addImm(CC)
2491 .addMBB(TBB);
2492
2493 finishCondBranch(BI->getParent(), TBB, FBB);
2494 return true;
2495 }
2496 }
2497
2498 Register CondReg = getRegForValue(BI->getCondition());
2499 if (CondReg == 0)
2500 return false;
2501
2502 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2503 unsigned Opcode = AArch64::TBNZW;
2504 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2505 std::swap(TBB, FBB);
2506 Opcode = AArch64::TBZW;
2507 }
2508
2509 const MCInstrDesc &II = TII.get(Opcode);
2510 Register ConstrainedCondReg
2511 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2513 .addReg(ConstrainedCondReg)
2514 .addImm(0)
2515 .addMBB(TBB);
2516
2517 finishCondBranch(BI->getParent(), TBB, FBB);
2518 return true;
2519}
2520
2521bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2522 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2523 Register AddrReg = getRegForValue(BI->getOperand(0));
2524 if (AddrReg == 0)
2525 return false;
2526
2527 // Emit the indirect branch.
2528 const MCInstrDesc &II = TII.get(AArch64::BR);
2529 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2530 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2531
2532 // Make sure the CFG is up-to-date.
2533 for (const auto *Succ : BI->successors())
2534 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2535
2536 return true;
2537}
2538
2539bool AArch64FastISel::selectCmp(const Instruction *I) {
2540 const CmpInst *CI = cast<CmpInst>(I);
2541
2542 // Vectors of i1 are weird: bail out.
2543 if (CI->getType()->isVectorTy())
2544 return false;
2545
2546 // Try to optimize or fold the cmp.
2547 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2548 unsigned ResultReg = 0;
2549 switch (Predicate) {
2550 default:
2551 break;
2553 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2554 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2555 TII.get(TargetOpcode::COPY), ResultReg)
2556 .addReg(AArch64::WZR, getKillRegState(true));
2557 break;
2558 case CmpInst::FCMP_TRUE:
2559 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2560 break;
2561 }
2562
2563 if (ResultReg) {
2564 updateValueMap(I, ResultReg);
2565 return true;
2566 }
2567
2568 // Emit the cmp.
2569 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2570 return false;
2571
2572 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2573
2574 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2575 // condition codes are inverted, because they are used by CSINC.
2576 static unsigned CondCodeTable[2][2] = {
2579 };
2580 unsigned *CondCodes = nullptr;
2581 switch (Predicate) {
2582 default:
2583 break;
2584 case CmpInst::FCMP_UEQ:
2585 CondCodes = &CondCodeTable[0][0];
2586 break;
2587 case CmpInst::FCMP_ONE:
2588 CondCodes = &CondCodeTable[1][0];
2589 break;
2590 }
2591
2592 if (CondCodes) {
2593 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2594 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2595 TmpReg1)
2596 .addReg(AArch64::WZR, getKillRegState(true))
2597 .addReg(AArch64::WZR, getKillRegState(true))
2598 .addImm(CondCodes[0]);
2599 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2600 ResultReg)
2601 .addReg(TmpReg1, getKillRegState(true))
2602 .addReg(AArch64::WZR, getKillRegState(true))
2603 .addImm(CondCodes[1]);
2604
2605 updateValueMap(I, ResultReg);
2606 return true;
2607 }
2608
2609 // Now set a register based on the comparison.
2610 AArch64CC::CondCode CC = getCompareCC(Predicate);
2611 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2612 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2613 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2614 ResultReg)
2615 .addReg(AArch64::WZR, getKillRegState(true))
2616 .addReg(AArch64::WZR, getKillRegState(true))
2617 .addImm(invertedCC);
2618
2619 updateValueMap(I, ResultReg);
2620 return true;
2621}
2622
2623/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2624/// value.
2625bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2626 if (!SI->getType()->isIntegerTy(1))
2627 return false;
2628
2629 const Value *Src1Val, *Src2Val;
2630 unsigned Opc = 0;
2631 bool NeedExtraOp = false;
2632 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2633 if (CI->isOne()) {
2634 Src1Val = SI->getCondition();
2635 Src2Val = SI->getFalseValue();
2636 Opc = AArch64::ORRWrr;
2637 } else {
2638 assert(CI->isZero());
2639 Src1Val = SI->getFalseValue();
2640 Src2Val = SI->getCondition();
2641 Opc = AArch64::BICWrr;
2642 }
2643 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2644 if (CI->isOne()) {
2645 Src1Val = SI->getCondition();
2646 Src2Val = SI->getTrueValue();
2647 Opc = AArch64::ORRWrr;
2648 NeedExtraOp = true;
2649 } else {
2650 assert(CI->isZero());
2651 Src1Val = SI->getCondition();
2652 Src2Val = SI->getTrueValue();
2653 Opc = AArch64::ANDWrr;
2654 }
2655 }
2656
2657 if (!Opc)
2658 return false;
2659
2660 Register Src1Reg = getRegForValue(Src1Val);
2661 if (!Src1Reg)
2662 return false;
2663
2664 Register Src2Reg = getRegForValue(Src2Val);
2665 if (!Src2Reg)
2666 return false;
2667
2668 if (NeedExtraOp)
2669 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2670
2671 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2672 Src2Reg);
2673 updateValueMap(SI, ResultReg);
2674 return true;
2675}
2676
2677bool AArch64FastISel::selectSelect(const Instruction *I) {
2678 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2679 MVT VT;
2680 if (!isTypeSupported(I->getType(), VT))
2681 return false;
2682
2683 unsigned Opc;
2684 const TargetRegisterClass *RC;
2685 switch (VT.SimpleTy) {
2686 default:
2687 return false;
2688 case MVT::i1:
2689 case MVT::i8:
2690 case MVT::i16:
2691 case MVT::i32:
2692 Opc = AArch64::CSELWr;
2693 RC = &AArch64::GPR32RegClass;
2694 break;
2695 case MVT::i64:
2696 Opc = AArch64::CSELXr;
2697 RC = &AArch64::GPR64RegClass;
2698 break;
2699 case MVT::f32:
2700 Opc = AArch64::FCSELSrrr;
2701 RC = &AArch64::FPR32RegClass;
2702 break;
2703 case MVT::f64:
2704 Opc = AArch64::FCSELDrrr;
2705 RC = &AArch64::FPR64RegClass;
2706 break;
2707 }
2708
2709 const SelectInst *SI = cast<SelectInst>(I);
2710 const Value *Cond = SI->getCondition();
2713
2714 if (optimizeSelect(SI))
2715 return true;
2716
2717 // Try to pickup the flags, so we don't have to emit another compare.
2718 if (foldXALUIntrinsic(CC, I, Cond)) {
2719 // Fake request the condition to force emission of the XALU intrinsic.
2720 Register CondReg = getRegForValue(Cond);
2721 if (!CondReg)
2722 return false;
2723 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2724 isValueAvailable(Cond)) {
2725 const auto *Cmp = cast<CmpInst>(Cond);
2726 // Try to optimize or fold the cmp.
2727 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2728 const Value *FoldSelect = nullptr;
2729 switch (Predicate) {
2730 default:
2731 break;
2733 FoldSelect = SI->getFalseValue();
2734 break;
2735 case CmpInst::FCMP_TRUE:
2736 FoldSelect = SI->getTrueValue();
2737 break;
2738 }
2739
2740 if (FoldSelect) {
2741 Register SrcReg = getRegForValue(FoldSelect);
2742 if (!SrcReg)
2743 return false;
2744
2745 updateValueMap(I, SrcReg);
2746 return true;
2747 }
2748
2749 // Emit the cmp.
2750 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2751 return false;
2752
2753 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2754 CC = getCompareCC(Predicate);
2755 switch (Predicate) {
2756 default:
2757 break;
2758 case CmpInst::FCMP_UEQ:
2759 ExtraCC = AArch64CC::EQ;
2760 CC = AArch64CC::VS;
2761 break;
2762 case CmpInst::FCMP_ONE:
2763 ExtraCC = AArch64CC::MI;
2764 CC = AArch64CC::GT;
2765 break;
2766 }
2767 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2768 } else {
2769 Register CondReg = getRegForValue(Cond);
2770 if (!CondReg)
2771 return false;
2772
2773 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2774 CondReg = constrainOperandRegClass(II, CondReg, 1);
2775
2776 // Emit a TST instruction (ANDS wzr, reg, #imm).
2777 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2778 AArch64::WZR)
2779 .addReg(CondReg)
2781 }
2782
2783 Register Src1Reg = getRegForValue(SI->getTrueValue());
2784 Register Src2Reg = getRegForValue(SI->getFalseValue());
2785
2786 if (!Src1Reg || !Src2Reg)
2787 return false;
2788
2789 if (ExtraCC != AArch64CC::AL)
2790 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2791
2792 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2793 updateValueMap(I, ResultReg);
2794 return true;
2795}
2796
2797bool AArch64FastISel::selectFPExt(const Instruction *I) {
2798 Value *V = I->getOperand(0);
2799 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2800 return false;
2801
2802 Register Op = getRegForValue(V);
2803 if (Op == 0)
2804 return false;
2805
2806 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2808 ResultReg).addReg(Op);
2809 updateValueMap(I, ResultReg);
2810 return true;
2811}
2812
2813bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2814 Value *V = I->getOperand(0);
2815 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2816 return false;
2817
2818 Register Op = getRegForValue(V);
2819 if (Op == 0)
2820 return false;
2821
2822 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2824 ResultReg).addReg(Op);
2825 updateValueMap(I, ResultReg);
2826 return true;
2827}
2828
2829// FPToUI and FPToSI
2830bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2831 MVT DestVT;
2832 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2833 return false;
2834
2835 Register SrcReg = getRegForValue(I->getOperand(0));
2836 if (SrcReg == 0)
2837 return false;
2838
2839 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2840 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2841 return false;
2842
2843 unsigned Opc;
2844 if (SrcVT == MVT::f64) {
2845 if (Signed)
2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2847 else
2848 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849 } else {
2850 if (Signed)
2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2852 else
2853 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2854 }
2855 Register ResultReg = createResultReg(
2856 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2858 .addReg(SrcReg);
2859 updateValueMap(I, ResultReg);
2860 return true;
2861}
2862
2863bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2864 MVT DestVT;
2865 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2866 return false;
2867 // Let regular ISEL handle FP16
2868 if (DestVT == MVT::f16)
2869 return false;
2870
2871 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2872 "Unexpected value type.");
2873
2874 Register SrcReg = getRegForValue(I->getOperand(0));
2875 if (!SrcReg)
2876 return false;
2877
2878 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2879
2880 // Handle sign-extension.
2881 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2882 SrcReg =
2883 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2884 if (!SrcReg)
2885 return false;
2886 }
2887
2888 unsigned Opc;
2889 if (SrcVT == MVT::i64) {
2890 if (Signed)
2891 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2892 else
2893 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2894 } else {
2895 if (Signed)
2896 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2897 else
2898 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2899 }
2900
2901 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2902 updateValueMap(I, ResultReg);
2903 return true;
2904}
2905
2906bool AArch64FastISel::fastLowerArguments() {
2907 if (!FuncInfo.CanLowerReturn)
2908 return false;
2909
2910 const Function *F = FuncInfo.Fn;
2911 if (F->isVarArg())
2912 return false;
2913
2914 CallingConv::ID CC = F->getCallingConv();
2916 return false;
2917
2918 if (Subtarget->hasCustomCallingConv())
2919 return false;
2920
2921 // Only handle simple cases of up to 8 GPR and FPR each.
2922 unsigned GPRCnt = 0;
2923 unsigned FPRCnt = 0;
2924 for (auto const &Arg : F->args()) {
2925 if (Arg.hasAttribute(Attribute::ByVal) ||
2926 Arg.hasAttribute(Attribute::InReg) ||
2927 Arg.hasAttribute(Attribute::StructRet) ||
2928 Arg.hasAttribute(Attribute::SwiftSelf) ||
2929 Arg.hasAttribute(Attribute::SwiftAsync) ||
2930 Arg.hasAttribute(Attribute::SwiftError) ||
2931 Arg.hasAttribute(Attribute::Nest))
2932 return false;
2933
2934 Type *ArgTy = Arg.getType();
2935 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2936 return false;
2937
2938 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2939 if (!ArgVT.isSimple())
2940 return false;
2941
2942 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2943 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2944 return false;
2945
2946 if (VT.isVector() &&
2947 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2948 return false;
2949
2950 if (VT >= MVT::i1 && VT <= MVT::i64)
2951 ++GPRCnt;
2952 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2953 VT.is128BitVector())
2954 ++FPRCnt;
2955 else
2956 return false;
2957
2958 if (GPRCnt > 8 || FPRCnt > 8)
2959 return false;
2960 }
2961
2962 static const MCPhysReg Registers[6][8] = {
2963 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2964 AArch64::W5, AArch64::W6, AArch64::W7 },
2965 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2966 AArch64::X5, AArch64::X6, AArch64::X7 },
2967 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2968 AArch64::H5, AArch64::H6, AArch64::H7 },
2969 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2970 AArch64::S5, AArch64::S6, AArch64::S7 },
2971 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2972 AArch64::D5, AArch64::D6, AArch64::D7 },
2973 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2974 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2975 };
2976
2977 unsigned GPRIdx = 0;
2978 unsigned FPRIdx = 0;
2979 for (auto const &Arg : F->args()) {
2980 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2981 unsigned SrcReg;
2982 const TargetRegisterClass *RC;
2983 if (VT >= MVT::i1 && VT <= MVT::i32) {
2984 SrcReg = Registers[0][GPRIdx++];
2985 RC = &AArch64::GPR32RegClass;
2986 VT = MVT::i32;
2987 } else if (VT == MVT::i64) {
2988 SrcReg = Registers[1][GPRIdx++];
2989 RC = &AArch64::GPR64RegClass;
2990 } else if (VT == MVT::f16) {
2991 SrcReg = Registers[2][FPRIdx++];
2992 RC = &AArch64::FPR16RegClass;
2993 } else if (VT == MVT::f32) {
2994 SrcReg = Registers[3][FPRIdx++];
2995 RC = &AArch64::FPR32RegClass;
2996 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2997 SrcReg = Registers[4][FPRIdx++];
2998 RC = &AArch64::FPR64RegClass;
2999 } else if (VT.is128BitVector()) {
3000 SrcReg = Registers[5][FPRIdx++];
3001 RC = &AArch64::FPR128RegClass;
3002 } else
3003 llvm_unreachable("Unexpected value type.");
3004
3005 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3006 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3007 // Without this, EmitLiveInCopies may eliminate the livein if its only
3008 // use is a bitcast (which isn't turned into an instruction).
3009 Register ResultReg = createResultReg(RC);
3010 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3011 TII.get(TargetOpcode::COPY), ResultReg)
3012 .addReg(DstReg, getKillRegState(true));
3013 updateValueMap(&Arg, ResultReg);
3014 }
3015 return true;
3016}
3017
3018bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3019 SmallVectorImpl<MVT> &OutVTs,
3020 unsigned &NumBytes) {
3021 CallingConv::ID CC = CLI.CallConv;
3023 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3024 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3025
3026 // Get a count of how many bytes are to be pushed on the stack.
3027 NumBytes = CCInfo.getStackSize();
3028
3029 // Issue CALLSEQ_START
3030 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3032 .addImm(NumBytes).addImm(0);
3033
3034 // Process the args.
3035 for (CCValAssign &VA : ArgLocs) {
3036 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3037 MVT ArgVT = OutVTs[VA.getValNo()];
3038
3039 Register ArgReg = getRegForValue(ArgVal);
3040 if (!ArgReg)
3041 return false;
3042
3043 // Handle arg promotion: SExt, ZExt, AExt.
3044 switch (VA.getLocInfo()) {
3045 case CCValAssign::Full:
3046 break;
3047 case CCValAssign::SExt: {
3048 MVT DestVT = VA.getLocVT();
3049 MVT SrcVT = ArgVT;
3050 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3051 if (!ArgReg)
3052 return false;
3053 break;
3054 }
3055 case CCValAssign::AExt:
3056 // Intentional fall-through.
3057 case CCValAssign::ZExt: {
3058 MVT DestVT = VA.getLocVT();
3059 MVT SrcVT = ArgVT;
3060 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3061 if (!ArgReg)
3062 return false;
3063 break;
3064 }
3065 default:
3066 llvm_unreachable("Unknown arg promotion!");
3067 }
3068
3069 // Now copy/store arg to correct locations.
3070 if (VA.isRegLoc() && !VA.needsCustom()) {
3071 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3072 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3073 CLI.OutRegs.push_back(VA.getLocReg());
3074 } else if (VA.needsCustom()) {
3075 // FIXME: Handle custom args.
3076 return false;
3077 } else {
3078 assert(VA.isMemLoc() && "Assuming store on stack.");
3079
3080 // Don't emit stores for undef values.
3081 if (isa<UndefValue>(ArgVal))
3082 continue;
3083
3084 // Need to store on the stack.
3085 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3086
3087 unsigned BEAlign = 0;
3088 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3089 BEAlign = 8 - ArgSize;
3090
3091 Address Addr;
3092 Addr.setKind(Address::RegBase);
3093 Addr.setReg(AArch64::SP);
3094 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3095
3096 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3097 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3098 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3099 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3100
3101 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3102 return false;
3103 }
3104 }
3105 return true;
3106}
3107
3108bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3109 CallingConv::ID CC = CLI.CallConv;
3110
3111 // Issue CALLSEQ_END
3112 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3114 .addImm(NumBytes).addImm(0);
3115
3116 // Now the return values.
3118 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3119 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3120
3121 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3122 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3123 CCValAssign &VA = RVLocs[i];
3124 MVT CopyVT = VA.getValVT();
3125 unsigned CopyReg = ResultReg + i;
3126
3127 // TODO: Handle big-endian results
3128 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3129 return false;
3130
3131 // Copy result out of their specified physreg.
3132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3133 CopyReg)
3134 .addReg(VA.getLocReg());
3135 CLI.InRegs.push_back(VA.getLocReg());
3136 }
3137
3138 CLI.ResultReg = ResultReg;
3139 CLI.NumResultRegs = RVLocs.size();
3140
3141 return true;
3142}
3143
3144bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3145 CallingConv::ID CC = CLI.CallConv;
3146 bool IsTailCall = CLI.IsTailCall;
3147 bool IsVarArg = CLI.IsVarArg;
3148 const Value *Callee = CLI.Callee;
3149 MCSymbol *Symbol = CLI.Symbol;
3150
3151 if (!Callee && !Symbol)
3152 return false;
3153
3154 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3155 // a bti instruction following the call.
3156 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3157 !Subtarget->noBTIAtReturnTwice() &&
3159 return false;
3160
3161 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3162 if (CLI.CB && CLI.CB->isIndirectCall() &&
3163 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3164 return false;
3165
3166 // Allow SelectionDAG isel to handle tail calls.
3167 if (IsTailCall)
3168 return false;
3169
3170 // FIXME: we could and should support this, but for now correctness at -O0 is
3171 // more important.
3172 if (Subtarget->isTargetILP32())
3173 return false;
3174
3175 CodeModel::Model CM = TM.getCodeModel();
3176 // Only support the small-addressing and large code models.
3177 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3178 return false;
3179
3180 // FIXME: Add large code model support for ELF.
3181 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3182 return false;
3183
3184 // Let SDISel handle vararg functions.
3185 if (IsVarArg)
3186 return false;
3187
3188 for (auto Flag : CLI.OutFlags)
3189 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3191 return false;
3192
3193 // Set up the argument vectors.
3194 SmallVector<MVT, 16> OutVTs;
3195 OutVTs.reserve(CLI.OutVals.size());
3196
3197 for (auto *Val : CLI.OutVals) {
3198 MVT VT;
3199 if (!isTypeLegal(Val->getType(), VT) &&
3200 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201 return false;
3202
3203 // We don't handle vector parameters yet.
3204 if (VT.isVector() || VT.getSizeInBits() > 64)
3205 return false;
3206
3207 OutVTs.push_back(VT);
3208 }
3209
3210 Address Addr;
3211 if (Callee && !computeCallAddress(Callee, Addr))
3212 return false;
3213
3214 // The weak function target may be zero; in that case we must use indirect
3215 // addressing via a stub on windows as it may be out of range for a
3216 // PC-relative jump.
3217 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3218 Addr.getGlobalValue()->hasExternalWeakLinkage())
3219 return false;
3220
3221 // Handle the arguments now that we've gotten them.
3222 unsigned NumBytes;
3223 if (!processCallArgs(CLI, OutVTs, NumBytes))
3224 return false;
3225
3226 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3227 if (RegInfo->isAnyArgRegReserved(*MF))
3228 RegInfo->emitReservedArgRegCallError(*MF);
3229
3230 // Issue the call.
3232 if (Subtarget->useSmallAddressing()) {
3233 const MCInstrDesc &II =
3234 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3235 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3236 if (Symbol)
3237 MIB.addSym(Symbol, 0);
3238 else if (Addr.getGlobalValue())
3239 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3240 else if (Addr.getReg()) {
3241 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3242 MIB.addReg(Reg);
3243 } else
3244 return false;
3245 } else {
3246 unsigned CallReg = 0;
3247 if (Symbol) {
3248 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3249 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3250 ADRPReg)
3252
3253 CallReg = createResultReg(&AArch64::GPR64RegClass);
3254 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3255 TII.get(AArch64::LDRXui), CallReg)
3256 .addReg(ADRPReg)
3257 .addSym(Symbol,
3259 } else if (Addr.getGlobalValue())
3260 CallReg = materializeGV(Addr.getGlobalValue());
3261 else if (Addr.getReg())
3262 CallReg = Addr.getReg();
3263
3264 if (!CallReg)
3265 return false;
3266
3267 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3268 CallReg = constrainOperandRegClass(II, CallReg, 0);
3269 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3270 }
3271
3272 // Add implicit physical register uses to the call.
3273 for (auto Reg : CLI.OutRegs)
3274 MIB.addReg(Reg, RegState::Implicit);
3275
3276 // Add a register mask with the call-preserved registers.
3277 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3278 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3279
3280 CLI.Call = MIB;
3281
3282 // Finish off the call including any return values.
3283 return finishCall(CLI, NumBytes);
3284}
3285
3286bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3287 if (Alignment)
3288 return Len / Alignment->value() <= 4;
3289 else
3290 return Len < 32;
3291}
3292
3293bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3294 uint64_t Len, MaybeAlign Alignment) {
3295 // Make sure we don't bloat code by inlining very large memcpy's.
3296 if (!isMemCpySmall(Len, Alignment))
3297 return false;
3298
3299 int64_t UnscaledOffset = 0;
3300 Address OrigDest = Dest;
3301 Address OrigSrc = Src;
3302
3303 while (Len) {
3304 MVT VT;
3305 if (!Alignment || *Alignment >= 8) {
3306 if (Len >= 8)
3307 VT = MVT::i64;
3308 else if (Len >= 4)
3309 VT = MVT::i32;
3310 else if (Len >= 2)
3311 VT = MVT::i16;
3312 else {
3313 VT = MVT::i8;
3314 }
3315 } else {
3316 assert(Alignment && "Alignment is set in this branch");
3317 // Bound based on alignment.
3318 if (Len >= 4 && *Alignment == 4)
3319 VT = MVT::i32;
3320 else if (Len >= 2 && *Alignment == 2)
3321 VT = MVT::i16;
3322 else {
3323 VT = MVT::i8;
3324 }
3325 }
3326
3327 unsigned ResultReg = emitLoad(VT, VT, Src);
3328 if (!ResultReg)
3329 return false;
3330
3331 if (!emitStore(VT, ResultReg, Dest))
3332 return false;
3333
3334 int64_t Size = VT.getSizeInBits() / 8;
3335 Len -= Size;
3336 UnscaledOffset += Size;
3337
3338 // We need to recompute the unscaled offset for each iteration.
3339 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3340 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3341 }
3342
3343 return true;
3344}
3345
3346/// Check if it is possible to fold the condition from the XALU intrinsic
3347/// into the user. The condition code will only be updated on success.
3348bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3349 const Instruction *I,
3350 const Value *Cond) {
3351 if (!isa<ExtractValueInst>(Cond))
3352 return false;
3353
3354 const auto *EV = cast<ExtractValueInst>(Cond);
3355 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3356 return false;
3357
3358 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3359 MVT RetVT;
3360 const Function *Callee = II->getCalledFunction();
3361 Type *RetTy =
3362 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3363 if (!isTypeLegal(RetTy, RetVT))
3364 return false;
3365
3366 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3367 return false;
3368
3369 const Value *LHS = II->getArgOperand(0);
3370 const Value *RHS = II->getArgOperand(1);
3371
3372 // Canonicalize immediate to the RHS.
3373 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3374 std::swap(LHS, RHS);
3375
3376 // Simplify multiplies.
3377 Intrinsic::ID IID = II->getIntrinsicID();
3378 switch (IID) {
3379 default:
3380 break;
3381 case Intrinsic::smul_with_overflow:
3382 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3383 if (C->getValue() == 2)
3384 IID = Intrinsic::sadd_with_overflow;
3385 break;
3386 case Intrinsic::umul_with_overflow:
3387 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3388 if (C->getValue() == 2)
3389 IID = Intrinsic::uadd_with_overflow;
3390 break;
3391 }
3392
3393 AArch64CC::CondCode TmpCC;
3394 switch (IID) {
3395 default:
3396 return false;
3397 case Intrinsic::sadd_with_overflow:
3398 case Intrinsic::ssub_with_overflow:
3399 TmpCC = AArch64CC::VS;
3400 break;
3401 case Intrinsic::uadd_with_overflow:
3402 TmpCC = AArch64CC::HS;
3403 break;
3404 case Intrinsic::usub_with_overflow:
3405 TmpCC = AArch64CC::LO;
3406 break;
3407 case Intrinsic::smul_with_overflow:
3408 case Intrinsic::umul_with_overflow:
3409 TmpCC = AArch64CC::NE;
3410 break;
3411 }
3412
3413 // Check if both instructions are in the same basic block.
3414 if (!isValueAvailable(II))
3415 return false;
3416
3417 // Make sure nothing is in the way
3420 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3421 // We only expect extractvalue instructions between the intrinsic and the
3422 // instruction to be selected.
3423 if (!isa<ExtractValueInst>(Itr))
3424 return false;
3425
3426 // Check that the extractvalue operand comes from the intrinsic.
3427 const auto *EVI = cast<ExtractValueInst>(Itr);
3428 if (EVI->getAggregateOperand() != II)
3429 return false;
3430 }
3431
3432 CC = TmpCC;
3433 return true;
3434}
3435
3436bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3437 // FIXME: Handle more intrinsics.
3438 switch (II->getIntrinsicID()) {
3439 default: return false;
3440 case Intrinsic::frameaddress: {
3441 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3442 MFI.setFrameAddressIsTaken(true);
3443
3444 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3445 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3446 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3448 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3449 // Recursively load frame address
3450 // ldr x0, [fp]
3451 // ldr x0, [x0]
3452 // ldr x0, [x0]
3453 // ...
3454 unsigned DestReg;
3455 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3456 while (Depth--) {
3457 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3458 SrcReg, 0);
3459 assert(DestReg && "Unexpected LDR instruction emission failure.");
3460 SrcReg = DestReg;
3461 }
3462
3463 updateValueMap(II, SrcReg);
3464 return true;
3465 }
3466 case Intrinsic::sponentry: {
3467 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3468
3469 // SP = FP + Fixed Object + 16
3470 int FI = MFI.CreateFixedObject(4, 0, false);
3471 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3472 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3473 TII.get(AArch64::ADDXri), ResultReg)
3474 .addFrameIndex(FI)
3475 .addImm(0)
3476 .addImm(0);
3477
3478 updateValueMap(II, ResultReg);
3479 return true;
3480 }
3481 case Intrinsic::memcpy:
3482 case Intrinsic::memmove: {
3483 const auto *MTI = cast<MemTransferInst>(II);
3484 // Don't handle volatile.
3485 if (MTI->isVolatile())
3486 return false;
3487
3488 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3489 // we would emit dead code because we don't currently handle memmoves.
3490 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3491 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3492 // Small memcpy's are common enough that we want to do them without a call
3493 // if possible.
3494 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3495 MaybeAlign Alignment;
3496 if (MTI->getDestAlign() || MTI->getSourceAlign())
3497 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3498 MTI->getSourceAlign().valueOrOne());
3499 if (isMemCpySmall(Len, Alignment)) {
3500 Address Dest, Src;
3501 if (!computeAddress(MTI->getRawDest(), Dest) ||
3502 !computeAddress(MTI->getRawSource(), Src))
3503 return false;
3504 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3505 return true;
3506 }
3507 }
3508
3509 if (!MTI->getLength()->getType()->isIntegerTy(64))
3510 return false;
3511
3512 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3513 // Fast instruction selection doesn't support the special
3514 // address spaces.
3515 return false;
3516
3517 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3518 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3519 }
3520 case Intrinsic::memset: {
3521 const MemSetInst *MSI = cast<MemSetInst>(II);
3522 // Don't handle volatile.
3523 if (MSI->isVolatile())
3524 return false;
3525
3526 if (!MSI->getLength()->getType()->isIntegerTy(64))
3527 return false;
3528
3529 if (MSI->getDestAddressSpace() > 255)
3530 // Fast instruction selection doesn't support the special
3531 // address spaces.
3532 return false;
3533
3534 return lowerCallTo(II, "memset", II->arg_size() - 1);
3535 }
3536 case Intrinsic::sin:
3537 case Intrinsic::cos:
3538 case Intrinsic::pow: {
3539 MVT RetVT;
3540 if (!isTypeLegal(II->getType(), RetVT))
3541 return false;
3542
3543 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3544 return false;
3545
3546 static const RTLIB::Libcall LibCallTable[3][2] = {
3547 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3548 { RTLIB::COS_F32, RTLIB::COS_F64 },
3549 { RTLIB::POW_F32, RTLIB::POW_F64 }
3550 };
3551 RTLIB::Libcall LC;
3552 bool Is64Bit = RetVT == MVT::f64;
3553 switch (II->getIntrinsicID()) {
3554 default:
3555 llvm_unreachable("Unexpected intrinsic.");
3556 case Intrinsic::sin:
3557 LC = LibCallTable[0][Is64Bit];
3558 break;
3559 case Intrinsic::cos:
3560 LC = LibCallTable[1][Is64Bit];
3561 break;
3562 case Intrinsic::pow:
3563 LC = LibCallTable[2][Is64Bit];
3564 break;
3565 }
3566
3567 ArgListTy Args;
3568 Args.reserve(II->arg_size());
3569
3570 // Populate the argument list.
3571 for (auto &Arg : II->args()) {
3572 ArgListEntry Entry;
3573 Entry.Val = Arg;
3574 Entry.Ty = Arg->getType();
3575 Args.push_back(Entry);
3576 }
3577
3578 CallLoweringInfo CLI;
3579 MCContext &Ctx = MF->getContext();
3580 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3581 TLI.getLibcallName(LC), std::move(Args));
3582 if (!lowerCallTo(CLI))
3583 return false;
3584 updateValueMap(II, CLI.ResultReg);
3585 return true;
3586 }
3587 case Intrinsic::fabs: {
3588 MVT VT;
3589 if (!isTypeLegal(II->getType(), VT))
3590 return false;
3591
3592 unsigned Opc;
3593 switch (VT.SimpleTy) {
3594 default:
3595 return false;
3596 case MVT::f32:
3597 Opc = AArch64::FABSSr;
3598 break;
3599 case MVT::f64:
3600 Opc = AArch64::FABSDr;
3601 break;
3602 }
3603 Register SrcReg = getRegForValue(II->getOperand(0));
3604 if (!SrcReg)
3605 return false;
3606 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3608 .addReg(SrcReg);
3609 updateValueMap(II, ResultReg);
3610 return true;
3611 }
3612 case Intrinsic::trap:
3613 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3614 .addImm(1);
3615 return true;
3616 case Intrinsic::debugtrap:
3617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3618 .addImm(0xF000);
3619 return true;
3620
3621 case Intrinsic::sqrt: {
3623
3624 MVT VT;
3625 if (!isTypeLegal(RetTy, VT))
3626 return false;
3627
3628 Register Op0Reg = getRegForValue(II->getOperand(0));
3629 if (!Op0Reg)
3630 return false;
3631
3632 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3633 if (!ResultReg)
3634 return false;
3635
3636 updateValueMap(II, ResultReg);
3637 return true;
3638 }
3639 case Intrinsic::sadd_with_overflow:
3640 case Intrinsic::uadd_with_overflow:
3641 case Intrinsic::ssub_with_overflow:
3642 case Intrinsic::usub_with_overflow:
3643 case Intrinsic::smul_with_overflow:
3644 case Intrinsic::umul_with_overflow: {
3645 // This implements the basic lowering of the xalu with overflow intrinsics.
3646 const Function *Callee = II->getCalledFunction();
3647 auto *Ty = cast<StructType>(Callee->getReturnType());
3648 Type *RetTy = Ty->getTypeAtIndex(0U);
3649
3650 MVT VT;
3651 if (!isTypeLegal(RetTy, VT))
3652 return false;
3653
3654 if (VT != MVT::i32 && VT != MVT::i64)
3655 return false;
3656
3657 const Value *LHS = II->getArgOperand(0);
3658 const Value *RHS = II->getArgOperand(1);
3659 // Canonicalize immediate to the RHS.
3660 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3661 std::swap(LHS, RHS);
3662
3663 // Simplify multiplies.
3664 Intrinsic::ID IID = II->getIntrinsicID();
3665 switch (IID) {
3666 default:
3667 break;
3668 case Intrinsic::smul_with_overflow:
3669 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3670 if (C->getValue() == 2) {
3671 IID = Intrinsic::sadd_with_overflow;
3672 RHS = LHS;
3673 }
3674 break;
3675 case Intrinsic::umul_with_overflow:
3676 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3677 if (C->getValue() == 2) {
3678 IID = Intrinsic::uadd_with_overflow;
3679 RHS = LHS;
3680 }
3681 break;
3682 }
3683
3684 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3686 switch (IID) {
3687 default: llvm_unreachable("Unexpected intrinsic!");
3688 case Intrinsic::sadd_with_overflow:
3689 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3690 CC = AArch64CC::VS;
3691 break;
3692 case Intrinsic::uadd_with_overflow:
3693 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3694 CC = AArch64CC::HS;
3695 break;
3696 case Intrinsic::ssub_with_overflow:
3697 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3698 CC = AArch64CC::VS;
3699 break;
3700 case Intrinsic::usub_with_overflow:
3701 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3702 CC = AArch64CC::LO;
3703 break;
3704 case Intrinsic::smul_with_overflow: {
3705 CC = AArch64CC::NE;
3706 Register LHSReg = getRegForValue(LHS);
3707 if (!LHSReg)
3708 return false;
3709
3710 Register RHSReg = getRegForValue(RHS);
3711 if (!RHSReg)
3712 return false;
3713
3714 if (VT == MVT::i32) {
3715 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3716 Register MulSubReg =
3717 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3718 // cmp xreg, wreg, sxtw
3719 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3720 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3721 /*WantResult=*/false);
3722 MulReg = MulSubReg;
3723 } else {
3724 assert(VT == MVT::i64 && "Unexpected value type.");
3725 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3726 // reused in the next instruction.
3727 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3728 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3729 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3730 /*WantResult=*/false);
3731 }
3732 break;
3733 }
3734 case Intrinsic::umul_with_overflow: {
3735 CC = AArch64CC::NE;
3736 Register LHSReg = getRegForValue(LHS);
3737 if (!LHSReg)
3738 return false;
3739
3740 Register RHSReg = getRegForValue(RHS);
3741 if (!RHSReg)
3742 return false;
3743
3744 if (VT == MVT::i32) {
3745 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3746 // tst xreg, #0xffffffff00000000
3747 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3748 TII.get(AArch64::ANDSXri), AArch64::XZR)
3749 .addReg(MulReg)
3750 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3751 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3752 } else {
3753 assert(VT == MVT::i64 && "Unexpected value type.");
3754 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3755 // reused in the next instruction.
3756 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3757 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3758 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3759 }
3760 break;
3761 }
3762 }
3763
3764 if (MulReg) {
3765 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3766 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3767 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3768 }
3769
3770 if (!ResultReg1)
3771 return false;
3772
3773 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3774 AArch64::WZR, AArch64::WZR,
3775 getInvertedCondCode(CC));
3776 (void)ResultReg2;
3777 assert((ResultReg1 + 1) == ResultReg2 &&
3778 "Nonconsecutive result registers.");
3779 updateValueMap(II, ResultReg1, 2);
3780 return true;
3781 }
3782 case Intrinsic::aarch64_crc32b:
3783 case Intrinsic::aarch64_crc32h:
3784 case Intrinsic::aarch64_crc32w:
3785 case Intrinsic::aarch64_crc32x:
3786 case Intrinsic::aarch64_crc32cb:
3787 case Intrinsic::aarch64_crc32ch:
3788 case Intrinsic::aarch64_crc32cw:
3789 case Intrinsic::aarch64_crc32cx: {
3790 if (!Subtarget->hasCRC())
3791 return false;
3792
3793 unsigned Opc;
3794 switch (II->getIntrinsicID()) {
3795 default:
3796 llvm_unreachable("Unexpected intrinsic!");
3797 case Intrinsic::aarch64_crc32b:
3798 Opc = AArch64::CRC32Brr;
3799 break;
3800 case Intrinsic::aarch64_crc32h:
3801 Opc = AArch64::CRC32Hrr;
3802 break;
3803 case Intrinsic::aarch64_crc32w:
3804 Opc = AArch64::CRC32Wrr;
3805 break;
3806 case Intrinsic::aarch64_crc32x:
3807 Opc = AArch64::CRC32Xrr;
3808 break;
3809 case Intrinsic::aarch64_crc32cb:
3810 Opc = AArch64::CRC32CBrr;
3811 break;
3812 case Intrinsic::aarch64_crc32ch:
3813 Opc = AArch64::CRC32CHrr;
3814 break;
3815 case Intrinsic::aarch64_crc32cw:
3816 Opc = AArch64::CRC32CWrr;
3817 break;
3818 case Intrinsic::aarch64_crc32cx:
3819 Opc = AArch64::CRC32CXrr;
3820 break;
3821 }
3822
3823 Register LHSReg = getRegForValue(II->getArgOperand(0));
3824 Register RHSReg = getRegForValue(II->getArgOperand(1));
3825 if (!LHSReg || !RHSReg)
3826 return false;
3827
3828 Register ResultReg =
3829 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3830 updateValueMap(II, ResultReg);
3831 return true;
3832 }
3833 }
3834 return false;
3835}
3836
3837bool AArch64FastISel::selectRet(const Instruction *I) {
3838 const ReturnInst *Ret = cast<ReturnInst>(I);
3839 const Function &F = *I->getParent()->getParent();
3840
3841 if (!FuncInfo.CanLowerReturn)
3842 return false;
3843
3844 if (F.isVarArg())
3845 return false;
3846
3847 if (TLI.supportSwiftError() &&
3848 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3849 return false;
3850
3851 if (TLI.supportSplitCSR(FuncInfo.MF))
3852 return false;
3853
3854 // Build a list of return value registers.
3856
3857 if (Ret->getNumOperands() > 0) {
3858 CallingConv::ID CC = F.getCallingConv();
3860 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3861
3862 // Analyze operands of the call, assigning locations to each operand.
3864 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3865 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3866
3867 // Only handle a single return value for now.
3868 if (ValLocs.size() != 1)
3869 return false;
3870
3871 CCValAssign &VA = ValLocs[0];
3872 const Value *RV = Ret->getOperand(0);
3873
3874 // Don't bother handling odd stuff for now.
3875 if ((VA.getLocInfo() != CCValAssign::Full) &&
3876 (VA.getLocInfo() != CCValAssign::BCvt))
3877 return false;
3878
3879 // Only handle register returns for now.
3880 if (!VA.isRegLoc())
3881 return false;
3882
3883 Register Reg = getRegForValue(RV);
3884 if (Reg == 0)
3885 return false;
3886
3887 unsigned SrcReg = Reg + VA.getValNo();
3888 Register DestReg = VA.getLocReg();
3889 // Avoid a cross-class copy. This is very unlikely.
3890 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3891 return false;
3892
3893 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3894 if (!RVEVT.isSimple())
3895 return false;
3896
3897 // Vectors (of > 1 lane) in big endian need tricky handling.
3898 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3899 !Subtarget->isLittleEndian())
3900 return false;
3901
3902 MVT RVVT = RVEVT.getSimpleVT();
3903 if (RVVT == MVT::f128)
3904 return false;
3905
3906 MVT DestVT = VA.getValVT();
3907 // Special handling for extended integers.
3908 if (RVVT != DestVT) {
3909 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3910 return false;
3911
3912 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3913 return false;
3914
3915 bool IsZExt = Outs[0].Flags.isZExt();
3916 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3917 if (SrcReg == 0)
3918 return false;
3919 }
3920
3921 // "Callee" (i.e. value producer) zero extends pointers at function
3922 // boundary.
3923 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3924 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3925
3926 // Make the copy.
3927 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3928 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3929
3930 // Add register to return instruction.
3931 RetRegs.push_back(VA.getLocReg());
3932 }
3933
3934 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3935 TII.get(AArch64::RET_ReallyLR));
3936 for (unsigned RetReg : RetRegs)
3937 MIB.addReg(RetReg, RegState::Implicit);
3938 return true;
3939}
3940
3941bool AArch64FastISel::selectTrunc(const Instruction *I) {
3942 Type *DestTy = I->getType();
3943 Value *Op = I->getOperand(0);
3944 Type *SrcTy = Op->getType();
3945
3946 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3947 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3948 if (!SrcEVT.isSimple())
3949 return false;
3950 if (!DestEVT.isSimple())
3951 return false;
3952
3953 MVT SrcVT = SrcEVT.getSimpleVT();
3954 MVT DestVT = DestEVT.getSimpleVT();
3955
3956 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3957 SrcVT != MVT::i8)
3958 return false;
3959 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3960 DestVT != MVT::i1)
3961 return false;
3962
3963 Register SrcReg = getRegForValue(Op);
3964 if (!SrcReg)
3965 return false;
3966
3967 // If we're truncating from i64 to a smaller non-legal type then generate an
3968 // AND. Otherwise, we know the high bits are undefined and a truncate only
3969 // generate a COPY. We cannot mark the source register also as result
3970 // register, because this can incorrectly transfer the kill flag onto the
3971 // source register.
3972 unsigned ResultReg;
3973 if (SrcVT == MVT::i64) {
3974 uint64_t Mask = 0;
3975 switch (DestVT.SimpleTy) {
3976 default:
3977 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3978 return false;
3979 case MVT::i1:
3980 Mask = 0x1;
3981 break;
3982 case MVT::i8:
3983 Mask = 0xff;
3984 break;
3985 case MVT::i16:
3986 Mask = 0xffff;
3987 break;
3988 }
3989 // Issue an extract_subreg to get the lower 32-bits.
3990 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3991 AArch64::sub_32);
3992 // Create the AND instruction which performs the actual truncation.
3993 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3994 assert(ResultReg && "Unexpected AND instruction emission failure.");
3995 } else {
3996 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3998 TII.get(TargetOpcode::COPY), ResultReg)
3999 .addReg(SrcReg);
4000 }
4001
4002 updateValueMap(I, ResultReg);
4003 return true;
4004}
4005
4006unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4007 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4008 DestVT == MVT::i64) &&
4009 "Unexpected value type.");
4010 // Handle i8 and i16 as i32.
4011 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4012 DestVT = MVT::i32;
4013
4014 if (IsZExt) {
4015 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4016 assert(ResultReg && "Unexpected AND instruction emission failure.");
4017 if (DestVT == MVT::i64) {
4018 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4019 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4020 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4022 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4023 .addImm(0)
4024 .addReg(ResultReg)
4025 .addImm(AArch64::sub_32);
4026 ResultReg = Reg64;
4027 }
4028 return ResultReg;
4029 } else {
4030 if (DestVT == MVT::i64) {
4031 // FIXME: We're SExt i1 to i64.
4032 return 0;
4033 }
4034 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4035 0, 0);
4036 }
4037}
4038
4039unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4040 unsigned Opc, ZReg;
4041 switch (RetVT.SimpleTy) {
4042 default: return 0;
4043 case MVT::i8:
4044 case MVT::i16:
4045 case MVT::i32:
4046 RetVT = MVT::i32;
4047 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4048 case MVT::i64:
4049 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4050 }
4051
4052 const TargetRegisterClass *RC =
4053 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4054 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4055}
4056
4057unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4058 if (RetVT != MVT::i64)
4059 return 0;
4060
4061 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4062 Op0, Op1, AArch64::XZR);
4063}
4064
4065unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066 if (RetVT != MVT::i64)
4067 return 0;
4068
4069 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4070 Op0, Op1, AArch64::XZR);
4071}
4072
4073unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4074 unsigned Op1Reg) {
4075 unsigned Opc = 0;
4076 bool NeedTrunc = false;
4077 uint64_t Mask = 0;
4078 switch (RetVT.SimpleTy) {
4079 default: return 0;
4080 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4081 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4082 case MVT::i32: Opc = AArch64::LSLVWr; break;
4083 case MVT::i64: Opc = AArch64::LSLVXr; break;
4084 }
4085
4086 const TargetRegisterClass *RC =
4087 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4088 if (NeedTrunc)
4089 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4090
4091 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4092 if (NeedTrunc)
4093 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4094 return ResultReg;
4095}
4096
4097unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4098 uint64_t Shift, bool IsZExt) {
4099 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4100 "Unexpected source/return type pair.");
4101 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4102 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4103 "Unexpected source value type.");
4104 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4105 RetVT == MVT::i64) && "Unexpected return value type.");
4106
4107 bool Is64Bit = (RetVT == MVT::i64);
4108 unsigned RegSize = Is64Bit ? 64 : 32;
4109 unsigned DstBits = RetVT.getSizeInBits();
4110 unsigned SrcBits = SrcVT.getSizeInBits();
4111 const TargetRegisterClass *RC =
4112 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4113
4114 // Just emit a copy for "zero" shifts.
4115 if (Shift == 0) {
4116 if (RetVT == SrcVT) {
4117 Register ResultReg = createResultReg(RC);
4118 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4119 TII.get(TargetOpcode::COPY), ResultReg)
4120 .addReg(Op0);
4121 return ResultReg;
4122 } else
4123 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4124 }
4125
4126 // Don't deal with undefined shifts.
4127 if (Shift >= DstBits)
4128 return 0;
4129
4130 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4131 // {S|U}BFM Wd, Wn, #r, #s
4132 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4133
4134 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4135 // %2 = shl i16 %1, 4
4136 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4137 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4138 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4139 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4140
4141 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4142 // %2 = shl i16 %1, 8
4143 // Wd<32+7-24,32-24> = Wn<7:0>
4144 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4145 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4146 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4147
4148 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4149 // %2 = shl i16 %1, 12
4150 // Wd<32+3-20,32-20> = Wn<3:0>
4151 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4152 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4153 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4154
4155 unsigned ImmR = RegSize - Shift;
4156 // Limit the width to the length of the source type.
4157 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4158 static const unsigned OpcTable[2][2] = {
4159 {AArch64::SBFMWri, AArch64::SBFMXri},
4160 {AArch64::UBFMWri, AArch64::UBFMXri}
4161 };
4162 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4163 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4164 Register TmpReg = MRI.createVirtualRegister(RC);
4165 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4166 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4167 .addImm(0)
4168 .addReg(Op0)
4169 .addImm(AArch64::sub_32);
4170 Op0 = TmpReg;
4171 }
4172 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4173}
4174
4175unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4176 unsigned Op1Reg) {
4177 unsigned Opc = 0;
4178 bool NeedTrunc = false;
4179 uint64_t Mask = 0;
4180 switch (RetVT.SimpleTy) {
4181 default: return 0;
4182 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4183 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4184 case MVT::i32: Opc = AArch64::LSRVWr; break;
4185 case MVT::i64: Opc = AArch64::LSRVXr; break;
4186 }
4187
4188 const TargetRegisterClass *RC =
4189 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4190 if (NeedTrunc) {
4191 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4192 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4193 }
4194 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4195 if (NeedTrunc)
4196 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4197 return ResultReg;
4198}
4199
4200unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4201 uint64_t Shift, bool IsZExt) {
4202 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4203 "Unexpected source/return type pair.");
4204 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4205 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4206 "Unexpected source value type.");
4207 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4208 RetVT == MVT::i64) && "Unexpected return value type.");
4209
4210 bool Is64Bit = (RetVT == MVT::i64);
4211 unsigned RegSize = Is64Bit ? 64 : 32;
4212 unsigned DstBits = RetVT.getSizeInBits();
4213 unsigned SrcBits = SrcVT.getSizeInBits();
4214 const TargetRegisterClass *RC =
4215 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4216
4217 // Just emit a copy for "zero" shifts.
4218 if (Shift == 0) {
4219 if (RetVT == SrcVT) {
4220 Register ResultReg = createResultReg(RC);
4221 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4222 TII.get(TargetOpcode::COPY), ResultReg)
4223 .addReg(Op0);
4224 return ResultReg;
4225 } else
4226 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4227 }
4228
4229 // Don't deal with undefined shifts.
4230 if (Shift >= DstBits)
4231 return 0;
4232
4233 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4234 // {S|U}BFM Wd, Wn, #r, #s
4235 // Wd<s-r:0> = Wn<s:r> when r <= s
4236
4237 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4238 // %2 = lshr i16 %1, 4
4239 // Wd<7-4:0> = Wn<7:4>
4240 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4241 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4242 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4243
4244 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4245 // %2 = lshr i16 %1, 8
4246 // Wd<7-7,0> = Wn<7:7>
4247 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4250
4251 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4252 // %2 = lshr i16 %1, 12
4253 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4254 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4257
4258 if (Shift >= SrcBits && IsZExt)
4259 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4260
4261 // It is not possible to fold a sign-extend into the LShr instruction. In this
4262 // case emit a sign-extend.
4263 if (!IsZExt) {
4264 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4265 if (!Op0)
4266 return 0;
4267 SrcVT = RetVT;
4268 SrcBits = SrcVT.getSizeInBits();
4269 IsZExt = true;
4270 }
4271
4272 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4273 unsigned ImmS = SrcBits - 1;
4274 static const unsigned OpcTable[2][2] = {
4275 {AArch64::SBFMWri, AArch64::SBFMXri},
4276 {AArch64::UBFMWri, AArch64::UBFMXri}
4277 };
4278 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4279 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4280 Register TmpReg = MRI.createVirtualRegister(RC);
4281 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4282 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4283 .addImm(0)
4284 .addReg(Op0)
4285 .addImm(AArch64::sub_32);
4286 Op0 = TmpReg;
4287 }
4288 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4289}
4290
4291unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4292 unsigned Op1Reg) {
4293 unsigned Opc = 0;
4294 bool NeedTrunc = false;
4295 uint64_t Mask = 0;
4296 switch (RetVT.SimpleTy) {
4297 default: return 0;
4298 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4299 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4300 case MVT::i32: Opc = AArch64::ASRVWr; break;
4301 case MVT::i64: Opc = AArch64::ASRVXr; break;
4302 }
4303
4304 const TargetRegisterClass *RC =
4305 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4306 if (NeedTrunc) {
4307 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4308 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4309 }
4310 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4311 if (NeedTrunc)
4312 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4313 return ResultReg;
4314}
4315
4316unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4317 uint64_t Shift, bool IsZExt) {
4318 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4319 "Unexpected source/return type pair.");
4320 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4321 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4322 "Unexpected source value type.");
4323 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4324 RetVT == MVT::i64) && "Unexpected return value type.");
4325
4326 bool Is64Bit = (RetVT == MVT::i64);
4327 unsigned RegSize = Is64Bit ? 64 : 32;
4328 unsigned DstBits = RetVT.getSizeInBits();
4329 unsigned SrcBits = SrcVT.getSizeInBits();
4330 const TargetRegisterClass *RC =
4331 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4332
4333 // Just emit a copy for "zero" shifts.
4334 if (Shift == 0) {
4335 if (RetVT == SrcVT) {
4336 Register ResultReg = createResultReg(RC);
4337 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4338 TII.get(TargetOpcode::COPY), ResultReg)
4339 .addReg(Op0);
4340 return ResultReg;
4341 } else
4342 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4343 }
4344
4345 // Don't deal with undefined shifts.
4346 if (Shift >= DstBits)
4347 return 0;
4348
4349 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4350 // {S|U}BFM Wd, Wn, #r, #s
4351 // Wd<s-r:0> = Wn<s:r> when r <= s
4352
4353 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4354 // %2 = ashr i16 %1, 4
4355 // Wd<7-4:0> = Wn<7:4>
4356 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4357 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4358 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4359
4360 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4361 // %2 = ashr i16 %1, 8
4362 // Wd<7-7,0> = Wn<7:7>
4363 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4364 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4366
4367 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4368 // %2 = ashr i16 %1, 12
4369 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4370 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4373
4374 if (Shift >= SrcBits && IsZExt)
4375 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4376
4377 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4378 unsigned ImmS = SrcBits - 1;
4379 static const unsigned OpcTable[2][2] = {
4380 {AArch64::SBFMWri, AArch64::SBFMXri},
4381 {AArch64::UBFMWri, AArch64::UBFMXri}
4382 };
4383 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4384 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4385 Register TmpReg = MRI.createVirtualRegister(RC);
4386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4387 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4388 .addImm(0)
4389 .addReg(Op0)
4390 .addImm(AArch64::sub_32);
4391 Op0 = TmpReg;
4392 }
4393 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4394}
4395
4396unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4397 bool IsZExt) {
4398 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4399
4400 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4401 // DestVT are odd things, so test to make sure that they are both types we can
4402 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4403 // bail out to SelectionDAG.
4404 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4405 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4406 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4407 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4408 return 0;
4409
4410 unsigned Opc;
4411 unsigned Imm = 0;
4412
4413 switch (SrcVT.SimpleTy) {
4414 default:
4415 return 0;
4416 case MVT::i1:
4417 return emiti1Ext(SrcReg, DestVT, IsZExt);
4418 case MVT::i8:
4419 if (DestVT == MVT::i64)
4420 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4421 else
4422 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4423 Imm = 7;
4424 break;
4425 case MVT::i16:
4426 if (DestVT == MVT::i64)
4427 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4428 else
4429 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4430 Imm = 15;
4431 break;
4432 case MVT::i32:
4433 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4434 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4435 Imm = 31;
4436 break;
4437 }
4438
4439 // Handle i8 and i16 as i32.
4440 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4441 DestVT = MVT::i32;
4442 else if (DestVT == MVT::i64) {
4443 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4445 TII.get(AArch64::SUBREG_TO_REG), Src64)
4446 .addImm(0)
4447 .addReg(SrcReg)
4448 .addImm(AArch64::sub_32);
4449 SrcReg = Src64;
4450 }
4451
4452 const TargetRegisterClass *RC =
4453 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4454 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4455}
4456
4457static bool isZExtLoad(const MachineInstr *LI) {
4458 switch (LI->getOpcode()) {
4459 default:
4460 return false;
4461 case AArch64::LDURBBi:
4462 case AArch64::LDURHHi:
4463 case AArch64::LDURWi:
4464 case AArch64::LDRBBui:
4465 case AArch64::LDRHHui:
4466 case AArch64::LDRWui:
4467 case AArch64::LDRBBroX:
4468 case AArch64::LDRHHroX:
4469 case AArch64::LDRWroX:
4470 case AArch64::LDRBBroW:
4471 case AArch64::LDRHHroW:
4472 case AArch64::LDRWroW:
4473 return true;
4474 }
4475}
4476
4477static bool isSExtLoad(const MachineInstr *LI) {
4478 switch (LI->getOpcode()) {
4479 default:
4480 return false;
4481 case AArch64::LDURSBWi:
4482 case AArch64::LDURSHWi:
4483 case AArch64::LDURSBXi:
4484 case AArch64::LDURSHXi:
4485 case AArch64::LDURSWi:
4486 case AArch64::LDRSBWui:
4487 case AArch64::LDRSHWui:
4488 case AArch64::LDRSBXui:
4489 case AArch64::LDRSHXui:
4490 case AArch64::LDRSWui:
4491 case AArch64::LDRSBWroX:
4492 case AArch64::LDRSHWroX:
4493 case AArch64::LDRSBXroX:
4494 case AArch64::LDRSHXroX:
4495 case AArch64::LDRSWroX:
4496 case AArch64::LDRSBWroW:
4497 case AArch64::LDRSHWroW:
4498 case AArch64::LDRSBXroW:
4499 case AArch64::LDRSHXroW:
4500 case AArch64::LDRSWroW:
4501 return true;
4502 }
4503}
4504
4505bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4506 MVT SrcVT) {
4507 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4508 if (!LI || !LI->hasOneUse())
4509 return false;
4510
4511 // Check if the load instruction has already been selected.
4512 Register Reg = lookUpRegForValue(LI);
4513 if (!Reg)
4514 return false;
4515
4516 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4517 if (!MI)
4518 return false;
4519
4520 // Check if the correct load instruction has been emitted - SelectionDAG might
4521 // have emitted a zero-extending load, but we need a sign-extending load.
4522 bool IsZExt = isa<ZExtInst>(I);
4523 const auto *LoadMI = MI;
4524 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4525 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4526 Register LoadReg = MI->getOperand(1).getReg();
4527 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4528 assert(LoadMI && "Expected valid instruction");
4529 }
4530 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4531 return false;
4532
4533 // Nothing to be done.
4534 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4535 updateValueMap(I, Reg);
4536 return true;
4537 }
4538
4539 if (IsZExt) {
4540 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4542 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4543 .addImm(0)
4544 .addReg(Reg, getKillRegState(true))
4545 .addImm(AArch64::sub_32);
4546 Reg = Reg64;
4547 } else {
4548 assert((MI->getOpcode() == TargetOpcode::COPY &&
4549 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4550 "Expected copy instruction");
4551 Reg = MI->getOperand(1).getReg();
4553 removeDeadCode(I, std::next(I));
4554 }
4555 updateValueMap(I, Reg);
4556 return true;
4557}
4558
4559bool AArch64FastISel::selectIntExt(const Instruction *I) {
4560 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4561 "Unexpected integer extend instruction.");
4562 MVT RetVT;
4563 MVT SrcVT;
4564 if (!isTypeSupported(I->getType(), RetVT))
4565 return false;
4566
4567 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4568 return false;
4569
4570 // Try to optimize already sign-/zero-extended values from load instructions.
4571 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4572 return true;
4573
4574 Register SrcReg = getRegForValue(I->getOperand(0));
4575 if (!SrcReg)
4576 return false;
4577
4578 // Try to optimize already sign-/zero-extended values from function arguments.
4579 bool IsZExt = isa<ZExtInst>(I);
4580 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4581 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4582 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4583 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4584 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4585 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4586 .addImm(0)
4587 .addReg(SrcReg)
4588 .addImm(AArch64::sub_32);
4589 SrcReg = ResultReg;
4590 }
4591
4592 updateValueMap(I, SrcReg);
4593 return true;
4594 }
4595 }
4596
4597 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4598 if (!ResultReg)
4599 return false;
4600
4601 updateValueMap(I, ResultReg);
4602 return true;
4603}
4604
4605bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4606 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4607 if (!DestEVT.isSimple())
4608 return false;
4609
4610 MVT DestVT = DestEVT.getSimpleVT();
4611 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4612 return false;
4613
4614 unsigned DivOpc;
4615 bool Is64bit = (DestVT == MVT::i64);
4616 switch (ISDOpcode) {
4617 default:
4618 return false;
4619 case ISD::SREM:
4620 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4621 break;
4622 case ISD::UREM:
4623 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4624 break;
4625 }
4626 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4627 Register Src0Reg = getRegForValue(I->getOperand(0));
4628 if (!Src0Reg)
4629 return false;
4630
4631 Register Src1Reg = getRegForValue(I->getOperand(1));
4632 if (!Src1Reg)
4633 return false;
4634
4635 const TargetRegisterClass *RC =
4636 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4637 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4638 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4639 // The remainder is computed as numerator - (quotient * denominator) using the
4640 // MSUB instruction.
4641 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4642 updateValueMap(I, ResultReg);
4643 return true;
4644}
4645
4646bool AArch64FastISel::selectMul(const Instruction *I) {
4647 MVT VT;
4648 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4649 return false;
4650
4651 if (VT.isVector())
4652 return selectBinaryOp(I, ISD::MUL);
4653
4654 const Value *Src0 = I->getOperand(0);
4655 const Value *Src1 = I->getOperand(1);
4656 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4657 if (C->getValue().isPowerOf2())
4658 std::swap(Src0, Src1);
4659
4660 // Try to simplify to a shift instruction.
4661 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4662 if (C->getValue().isPowerOf2()) {
4663 uint64_t ShiftVal = C->getValue().logBase2();
4664 MVT SrcVT = VT;
4665 bool IsZExt = true;
4666 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4667 if (!isIntExtFree(ZExt)) {
4668 MVT VT;
4669 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4670 SrcVT = VT;
4671 IsZExt = true;
4672 Src0 = ZExt->getOperand(0);
4673 }
4674 }
4675 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4676 if (!isIntExtFree(SExt)) {
4677 MVT VT;
4678 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4679 SrcVT = VT;
4680 IsZExt = false;
4681 Src0 = SExt->getOperand(0);
4682 }
4683 }
4684 }
4685
4686 Register Src0Reg = getRegForValue(Src0);
4687 if (!Src0Reg)
4688 return false;
4689
4690 unsigned ResultReg =
4691 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4692
4693 if (ResultReg) {
4694 updateValueMap(I, ResultReg);
4695 return true;
4696 }
4697 }
4698
4699 Register Src0Reg = getRegForValue(I->getOperand(0));
4700 if (!Src0Reg)
4701 return false;
4702
4703 Register Src1Reg = getRegForValue(I->getOperand(1));
4704 if (!Src1Reg)
4705 return false;
4706
4707 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4708
4709 if (!ResultReg)
4710 return false;
4711
4712 updateValueMap(I, ResultReg);
4713 return true;
4714}
4715
4716bool AArch64FastISel::selectShift(const Instruction *I) {
4717 MVT RetVT;
4718 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4719 return false;
4720
4721 if (RetVT.isVector())
4722 return selectOperator(I, I->getOpcode());
4723
4724 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4725 unsigned ResultReg = 0;
4726 uint64_t ShiftVal = C->getZExtValue();
4727 MVT SrcVT = RetVT;
4728 bool IsZExt = I->getOpcode() != Instruction::AShr;
4729 const Value *Op0 = I->getOperand(0);
4730 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4731 if (!isIntExtFree(ZExt)) {
4732 MVT TmpVT;
4733 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4734 SrcVT = TmpVT;
4735 IsZExt = true;
4736 Op0 = ZExt->getOperand(0);
4737 }
4738 }
4739 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4740 if (!isIntExtFree(SExt)) {
4741 MVT TmpVT;
4742 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4743 SrcVT = TmpVT;
4744 IsZExt = false;
4745 Op0 = SExt->getOperand(0);
4746 }
4747 }
4748 }
4749
4750 Register Op0Reg = getRegForValue(Op0);
4751 if (!Op0Reg)
4752 return false;
4753
4754 switch (I->getOpcode()) {
4755 default: llvm_unreachable("Unexpected instruction.");
4756 case Instruction::Shl:
4757 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4758 break;
4759 case Instruction::AShr:
4760 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4761 break;
4762 case Instruction::LShr:
4763 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4764 break;
4765 }
4766 if (!ResultReg)
4767 return false;
4768
4769 updateValueMap(I, ResultReg);
4770 return true;
4771 }
4772
4773 Register Op0Reg = getRegForValue(I->getOperand(0));
4774 if (!Op0Reg)
4775 return false;
4776
4777 Register Op1Reg = getRegForValue(I->getOperand(1));
4778 if (!Op1Reg)
4779 return false;
4780
4781 unsigned ResultReg = 0;
4782 switch (I->getOpcode()) {
4783 default: llvm_unreachable("Unexpected instruction.");
4784 case Instruction::Shl:
4785 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4786 break;
4787 case Instruction::AShr:
4788 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4789 break;
4790 case Instruction::LShr:
4791 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4792 break;
4793 }
4794
4795 if (!ResultReg)
4796 return false;
4797
4798 updateValueMap(I, ResultReg);
4799 return true;
4800}
4801
4802bool AArch64FastISel::selectBitCast(const Instruction *I) {
4803 MVT RetVT, SrcVT;
4804
4805 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4806 return false;
4807 if (!isTypeLegal(I->getType(), RetVT))
4808 return false;
4809
4810 unsigned Opc;
4811 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4812 Opc = AArch64::FMOVWSr;
4813 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4814 Opc = AArch64::FMOVXDr;
4815 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4816 Opc = AArch64::FMOVSWr;
4817 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4818 Opc = AArch64::FMOVDXr;
4819 else
4820 return false;
4821
4822 const TargetRegisterClass *RC = nullptr;
4823 switch (RetVT.SimpleTy) {
4824 default: llvm_unreachable("Unexpected value type.");
4825 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4826 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4827 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4828 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4829 }
4830 Register Op0Reg = getRegForValue(I->getOperand(0));
4831 if (!Op0Reg)
4832 return false;
4833
4834 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4835 if (!ResultReg)
4836 return false;
4837
4838 updateValueMap(I, ResultReg);
4839 return true;
4840}
4841
4842bool AArch64FastISel::selectFRem(const Instruction *I) {
4843 MVT RetVT;
4844 if (!isTypeLegal(I->getType(), RetVT))
4845 return false;
4846
4847 RTLIB::Libcall LC;
4848 switch (RetVT.SimpleTy) {
4849 default:
4850 return false;
4851 case MVT::f32:
4852 LC = RTLIB::REM_F32;
4853 break;
4854 case MVT::f64:
4855 LC = RTLIB::REM_F64;
4856 break;
4857 }
4858
4859 ArgListTy Args;
4860 Args.reserve(I->getNumOperands());
4861
4862 // Populate the argument list.
4863 for (auto &Arg : I->operands()) {
4864 ArgListEntry Entry;
4865 Entry.Val = Arg;
4866 Entry.Ty = Arg->getType();
4867 Args.push_back(Entry);
4868 }
4869
4870 CallLoweringInfo CLI;
4871 MCContext &Ctx = MF->getContext();
4872 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4873 TLI.getLibcallName(LC), std::move(Args));
4874 if (!lowerCallTo(CLI))
4875 return false;
4876 updateValueMap(I, CLI.ResultReg);
4877 return true;
4878}
4879
4880bool AArch64FastISel::selectSDiv(const Instruction *I) {
4881 MVT VT;
4882 if (!isTypeLegal(I->getType(), VT))
4883 return false;
4884
4885 if (!isa<ConstantInt>(I->getOperand(1)))
4886 return selectBinaryOp(I, ISD::SDIV);
4887
4888 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4889 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4890 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4891 return selectBinaryOp(I, ISD::SDIV);
4892
4893 unsigned Lg2 = C.countr_zero();
4894 Register Src0Reg = getRegForValue(I->getOperand(0));
4895 if (!Src0Reg)
4896 return false;
4897
4898 if (cast<BinaryOperator>(I)->isExact()) {
4899 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4900 if (!ResultReg)
4901 return false;
4902 updateValueMap(I, ResultReg);
4903 return true;
4904 }
4905
4906 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4907 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4908 if (!AddReg)
4909 return false;
4910
4911 // (Src0 < 0) ? Pow2 - 1 : 0;
4912 if (!emitICmp_ri(VT, Src0Reg, 0))
4913 return false;
4914
4915 unsigned SelectOpc;
4916 const TargetRegisterClass *RC;
4917 if (VT == MVT::i64) {
4918 SelectOpc = AArch64::CSELXr;
4919 RC = &AArch64::GPR64RegClass;
4920 } else {
4921 SelectOpc = AArch64::CSELWr;
4922 RC = &AArch64::GPR32RegClass;
4923 }
4924 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4926 if (!SelectReg)
4927 return false;
4928
4929 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4930 // negate the result.
4931 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4932 unsigned ResultReg;
4933 if (C.isNegative())
4934 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4935 AArch64_AM::ASR, Lg2);
4936 else
4937 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4938
4939 if (!ResultReg)
4940 return false;
4941
4942 updateValueMap(I, ResultReg);
4943 return true;
4944}
4945
4946/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4947/// have to duplicate it for AArch64, because otherwise we would fail during the
4948/// sign-extend emission.
4949unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4950 Register IdxN = getRegForValue(Idx);
4951 if (IdxN == 0)
4952 // Unhandled operand. Halt "fast" selection and bail.
4953 return 0;
4954
4955 // If the index is smaller or larger than intptr_t, truncate or extend it.
4956 MVT PtrVT = TLI.getPointerTy(DL);
4957 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4958 if (IdxVT.bitsLT(PtrVT)) {
4959 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4960 } else if (IdxVT.bitsGT(PtrVT))
4961 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4962 return IdxN;
4963}
4964
4965/// This is mostly a copy of the existing FastISel GEP code, but we have to
4966/// duplicate it for AArch64, because otherwise we would bail out even for
4967/// simple cases. This is because the standard fastEmit functions don't cover
4968/// MUL at all and ADD is lowered very inefficientily.
4969bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4970 if (Subtarget->isTargetILP32())
4971 return false;
4972
4973 Register N = getRegForValue(I->getOperand(0));
4974 if (!N)
4975 return false;
4976
4977 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4978 // into a single N = N + TotalOffset.
4979 uint64_t TotalOffs = 0;
4980 MVT VT = TLI.getPointerTy(DL);
4982 GTI != E; ++GTI) {
4983 const Value *Idx = GTI.getOperand();
4984 if (auto *StTy = GTI.getStructTypeOrNull()) {
4985 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4986 // N = N + Offset
4987 if (Field)
4988 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4989 } else {
4990 Type *Ty = GTI.getIndexedType();
4991
4992 // If this is a constant subscript, handle it quickly.
4993 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4994 if (CI->isZero())
4995 continue;
4996 // N = N + Offset
4997 TotalOffs +=
4998 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4999 continue;
5000 }
5001 if (TotalOffs) {
5002 N = emitAdd_ri_(VT, N, TotalOffs);
5003 if (!N)
5004 return false;
5005 TotalOffs = 0;
5006 }
5007
5008 // N = N + Idx * ElementSize;
5009 uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5010 unsigned IdxN = getRegForGEPIndex(Idx);
5011 if (!IdxN)
5012 return false;
5013
5014 if (ElementSize != 1) {
5015 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5016 if (!C)
5017 return false;
5018 IdxN = emitMul_rr(VT, IdxN, C);
5019 if (!IdxN)
5020 return false;
5021 }
5022 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5023 if (!N)
5024 return false;
5025 }
5026 }
5027 if (TotalOffs) {
5028 N = emitAdd_ri_(VT, N, TotalOffs);
5029 if (!N)
5030 return false;
5031 }
5032 updateValueMap(I, N);
5033 return true;
5034}
5035
5036bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5037 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5038 "cmpxchg survived AtomicExpand at optlevel > -O0");
5039
5040 auto *RetPairTy = cast<StructType>(I->getType());
5041 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5042 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5043 "cmpxchg has a non-i1 status result");
5044
5045 MVT VT;
5046 if (!isTypeLegal(RetTy, VT))
5047 return false;
5048
5049 const TargetRegisterClass *ResRC;
5050 unsigned Opc, CmpOpc;
5051 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5052 // extractvalue selection doesn't support that.
5053 if (VT == MVT::i32) {
5054 Opc = AArch64::CMP_SWAP_32;
5055 CmpOpc = AArch64::SUBSWrs;
5056 ResRC = &AArch64::GPR32RegClass;
5057 } else if (VT == MVT::i64) {
5058 Opc = AArch64::CMP_SWAP_64;
5059 CmpOpc = AArch64::SUBSXrs;
5060 ResRC = &AArch64::GPR64RegClass;
5061 } else {
5062 return false;
5063 }
5064
5065 const MCInstrDesc &II = TII.get(Opc);
5066
5067 const Register AddrReg = constrainOperandRegClass(
5068 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5069 const Register DesiredReg = constrainOperandRegClass(
5070 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5071 const Register NewReg = constrainOperandRegClass(
5072 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5073
5074 const Register ResultReg1 = createResultReg(ResRC);
5075 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5076 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5077
5078 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5079 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5080 .addDef(ResultReg1)
5081 .addDef(ScratchReg)
5082 .addUse(AddrReg)
5083 .addUse(DesiredReg)
5084 .addUse(NewReg);
5085
5086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5087 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5088 .addUse(ResultReg1)
5089 .addUse(DesiredReg)
5090 .addImm(0);
5091
5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5093 .addDef(ResultReg2)
5094 .addUse(AArch64::WZR)
5095 .addUse(AArch64::WZR)
5097
5098 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5099 updateValueMap(I, ResultReg1, 2);
5100 return true;
5101}
5102
5103bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5104 if (TLI.fallBackToDAGISel(*I))
5105 return false;
5106 switch (I->getOpcode()) {
5107 default:
5108 break;
5109 case Instruction::Add:
5110 case Instruction::Sub:
5111 return selectAddSub(I);
5112 case Instruction::Mul:
5113 return selectMul(I);
5114 case Instruction::SDiv:
5115 return selectSDiv(I);
5116 case Instruction::SRem:
5117 if (!selectBinaryOp(I, ISD::SREM))
5118 return selectRem(I, ISD::SREM);
5119 return true;
5120 case Instruction::URem:
5121 if (!selectBinaryOp(I, ISD::UREM))
5122 return selectRem(I, ISD::UREM);
5123 return true;
5124 case Instruction::Shl:
5125 case Instruction::LShr:
5126 case Instruction::AShr:
5127 return selectShift(I);
5128 case Instruction::And:
5129 case Instruction::Or:
5130 case Instruction::Xor:
5131 return selectLogicalOp(I);
5132 case Instruction::Br:
5133 return selectBranch(I);
5134 case Instruction::IndirectBr:
5135 return selectIndirectBr(I);
5136 case Instruction::BitCast:
5138 return selectBitCast(I);
5139 return true;
5140 case Instruction::FPToSI:
5141 if (!selectCast(I, ISD::FP_TO_SINT))
5142 return selectFPToInt(I, /*Signed=*/true);
5143 return true;
5144 case Instruction::FPToUI:
5145 return selectFPToInt(I, /*Signed=*/false);
5146 case Instruction::ZExt:
5147 case Instruction::SExt:
5148 return selectIntExt(I);
5149 case Instruction::Trunc:
5150 if (!selectCast(I, ISD::TRUNCATE))
5151 return selectTrunc(I);
5152 return true;
5153 case Instruction::FPExt:
5154 return selectFPExt(I);
5155 case Instruction::FPTrunc:
5156 return selectFPTrunc(I);
5157 case Instruction::SIToFP:
5158 if (!selectCast(I, ISD::SINT_TO_FP))
5159 return selectIntToFP(I, /*Signed=*/true);
5160 return true;
5161 case Instruction::UIToFP:
5162 return selectIntToFP(I, /*Signed=*/false);
5163 case Instruction::Load:
5164 return selectLoad(I);
5165 case Instruction::Store:
5166 return selectStore(I);
5167 case Instruction::FCmp:
5168 case Instruction::ICmp:
5169 return selectCmp(I);
5170 case Instruction::Select:
5171 return selectSelect(I);
5172 case Instruction::Ret:
5173 return selectRet(I);
5174 case Instruction::FRem:
5175 return selectFRem(I);
5176 case Instruction::GetElementPtr:
5177 return selectGetElementPtr(I);
5178 case Instruction::AtomicCmpXchg:
5179 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5180 }
5181
5182 // fall-back to target-independent instruction selection.
5183 return selectOperator(I, I->getOpcode());
5184}
5185
5187 const TargetLibraryInfo *LibInfo) {
5188
5189 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5190 if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
5191 CallerAttrs.hasStreamingCompatibleInterface())
5192 return nullptr;
5193 return new AArch64FastISel(FuncInfo, LibInfo);
5194}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:478
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56