LLVM 19.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/User.h"
61#include "llvm/IR/Value.h"
62#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338 if (CC == CallingConv::GHC)
339 return CC_AArch64_GHC;
342 if (Subtarget->isTargetDarwin())
344 if (Subtarget->isTargetWindows())
345 return CC_AArch64_Win64PCS;
346 return CC_AArch64_AAPCS;
347}
348
349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351 "Alloca should always return a pointer.");
352
353 // Don't handle dynamic allocas.
354 if (!FuncInfo.StaticAllocaMap.count(AI))
355 return 0;
356
358 FuncInfo.StaticAllocaMap.find(AI);
359
360 if (SI != FuncInfo.StaticAllocaMap.end()) {
361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
363 ResultReg)
364 .addFrameIndex(SI->second)
365 .addImm(0)
366 .addImm(0);
367 return ResultReg;
368 }
369
370 return 0;
371}
372
373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374 if (VT > MVT::i64)
375 return 0;
376
377 if (!CI->isZero())
378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
379
380 // Create a copy from the zero register to materialize a "0" value.
381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382 : &AArch64::GPR32RegClass;
383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384 Register ResultReg = createResultReg(RC);
385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
386 ResultReg).addReg(ZeroReg, getKillRegState(true));
387 return ResultReg;
388}
389
390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391 // Positive zero (+0.0) has to be materialized with a fmov from the zero
392 // register, because the immediate version of fmov cannot encode zero.
393 if (CFP->isNullValue())
394 return fastMaterializeFloatZero(CFP);
395
396 if (VT != MVT::f32 && VT != MVT::f64)
397 return 0;
398
399 const APFloat Val = CFP->getValueAPF();
400 bool Is64Bit = (VT == MVT::f64);
401 // This checks to see if we can use FMOV instructions to materialize
402 // a constant, otherwise we have to materialize via the constant pool.
403 int Imm =
404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
405 if (Imm != -1) {
406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
408 }
409
410 // For the large code model materialize the FP constant in code.
411 if (TM.getCodeModel() == CodeModel::Large) {
412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413 const TargetRegisterClass *RC = Is64Bit ?
414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416 Register TmpReg = createResultReg(RC);
417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
422 TII.get(TargetOpcode::COPY), ResultReg)
423 .addReg(TmpReg, getKillRegState(true));
424
425 return ResultReg;
426 }
427
428 // Materialize via constant pool. MachineConstantPool wants an explicit
429 // alignment.
430 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
431
432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
440 .addReg(ADRPReg)
442 return ResultReg;
443}
444
445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446 // We can't handle thread-local variables quickly yet.
447 if (GV->isThreadLocal())
448 return 0;
449
450 // MachO still uses GOT for large code-model accesses, but ELF requires
451 // movz/movk sequences, which FastISel doesn't handle yet.
452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 return 0;
454
455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456
457 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
458 if (!DestEVT.isSimple())
459 return 0;
460
461 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
462 unsigned ResultReg;
463
464 if (OpFlags & AArch64II::MO_GOT) {
465 // ADRP + LDRX
466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
467 ADRPReg)
468 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
469
470 unsigned LdrOpc;
471 if (Subtarget->isTargetILP32()) {
472 ResultReg = createResultReg(&AArch64::GPR32RegClass);
473 LdrOpc = AArch64::LDRWui;
474 } else {
475 ResultReg = createResultReg(&AArch64::GPR64RegClass);
476 LdrOpc = AArch64::LDRXui;
477 }
478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
479 ResultReg)
480 .addReg(ADRPReg)
482 AArch64II::MO_NC | OpFlags);
483 if (!Subtarget->isTargetILP32())
484 return ResultReg;
485
486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487 // so we must extend the result on ILP32.
488 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
490 TII.get(TargetOpcode::SUBREG_TO_REG))
491 .addDef(Result64)
492 .addImm(0)
493 .addReg(ResultReg, RegState::Kill)
494 .addImm(AArch64::sub_32);
495 return Result64;
496 } else {
497 // ADRP + ADDX
498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
499 ADRPReg)
500 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
501
502 if (OpFlags & AArch64II::MO_TAGGED) {
503 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
504 // We do so by creating a MOVK that sets bits 48-63 of the register to
505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506 // the small code model so we can assume a binary size of <= 4GB, which
507 // makes the untagged PC relative offset positive. The binary must also be
508 // loaded into address range [0, 2^48). Both of these properties need to
509 // be ensured at runtime when using tagged addresses.
510 //
511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513 // are not exactly 1:1 with FastISel so we cannot easily abstract this
514 // out. At some point, it would be nice to find a way to not have this
515 // duplciate code.
516 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
518 DstReg)
519 .addReg(ADRPReg)
520 .addGlobalAddress(GV, /*Offset=*/0x100000000,
522 .addImm(48);
523 ADRPReg = DstReg;
524 }
525
526 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
528 ResultReg)
529 .addReg(ADRPReg)
530 .addGlobalAddress(GV, 0,
532 .addImm(0);
533 }
534 return ResultReg;
535}
536
537unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
539
540 // Only handle simple types.
541 if (!CEVT.isSimple())
542 return 0;
543 MVT VT = CEVT.getSimpleVT();
544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545 // 'null' pointers need to have a somewhat special treatment.
546 if (isa<ConstantPointerNull>(C)) {
547 assert(VT == MVT::i64 && "Expected 64-bit pointers");
548 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
549 }
550
551 if (const auto *CI = dyn_cast<ConstantInt>(C))
552 return materializeInt(CI, VT);
553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
554 return materializeFP(CFP, VT);
555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
556 return materializeGV(GV);
557
558 return 0;
559}
560
561unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562 assert(CFP->isNullValue() &&
563 "Floating-point constant is not a positive zero.");
564 MVT VT;
565 if (!isTypeLegal(CFP->getType(), VT))
566 return 0;
567
568 if (VT != MVT::f32 && VT != MVT::f64)
569 return 0;
570
571 bool Is64Bit = (VT == MVT::f64);
572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
575}
576
577/// Check if the multiply is by a power-of-2 constant.
578static bool isMulPowOf2(const Value *I) {
579 if (const auto *MI = dyn_cast<MulOperator>(I)) {
580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
581 if (C->getValue().isPowerOf2())
582 return true;
583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
584 if (C->getValue().isPowerOf2())
585 return true;
586 }
587 return false;
588}
589
590// Computes the address to get to an object.
591bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
592{
593 const User *U = nullptr;
594 unsigned Opcode = Instruction::UserOp1;
595 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
596 // Don't walk into other basic blocks unless the object is an alloca from
597 // another block, otherwise it may not have a virtual register assigned.
598 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
600 Opcode = I->getOpcode();
601 U = I;
602 }
603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
604 Opcode = C->getOpcode();
605 U = C;
606 }
607
608 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
609 if (Ty->getAddressSpace() > 255)
610 // Fast instruction selection doesn't support the special
611 // address spaces.
612 return false;
613
614 switch (Opcode) {
615 default:
616 break;
617 case Instruction::BitCast:
618 // Look through bitcasts.
619 return computeAddress(U->getOperand(0), Addr, Ty);
620
621 case Instruction::IntToPtr:
622 // Look past no-op inttoptrs.
623 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
624 TLI.getPointerTy(DL))
625 return computeAddress(U->getOperand(0), Addr, Ty);
626 break;
627
628 case Instruction::PtrToInt:
629 // Look past no-op ptrtoints.
630 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
631 return computeAddress(U->getOperand(0), Addr, Ty);
632 break;
633
634 case Instruction::GetElementPtr: {
635 Address SavedAddr = Addr;
636 uint64_t TmpOffset = Addr.getOffset();
637
638 // Iterate through the GEP folding the constants into offsets where
639 // we can.
640 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
641 GTI != E; ++GTI) {
642 const Value *Op = GTI.getOperand();
643 if (StructType *STy = GTI.getStructTypeOrNull()) {
644 const StructLayout *SL = DL.getStructLayout(STy);
645 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
646 TmpOffset += SL->getElementOffset(Idx);
647 } else {
648 uint64_t S = GTI.getSequentialElementStride(DL);
649 while (true) {
650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
651 // Constant-offset addressing.
652 TmpOffset += CI->getSExtValue() * S;
653 break;
654 }
655 if (canFoldAddIntoGEP(U, Op)) {
656 // A compatible add with a constant operand. Fold the constant.
657 ConstantInt *CI =
658 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
659 TmpOffset += CI->getSExtValue() * S;
660 // Iterate on the other operand.
661 Op = cast<AddOperator>(Op)->getOperand(0);
662 continue;
663 }
664 // Unsupported
665 goto unsupported_gep;
666 }
667 }
668 }
669
670 // Try to grab the base operand now.
671 Addr.setOffset(TmpOffset);
672 if (computeAddress(U->getOperand(0), Addr, Ty))
673 return true;
674
675 // We failed, restore everything and try the other options.
676 Addr = SavedAddr;
677
678 unsupported_gep:
679 break;
680 }
681 case Instruction::Alloca: {
682 const AllocaInst *AI = cast<AllocaInst>(Obj);
684 FuncInfo.StaticAllocaMap.find(AI);
685 if (SI != FuncInfo.StaticAllocaMap.end()) {
686 Addr.setKind(Address::FrameIndexBase);
687 Addr.setFI(SI->second);
688 return true;
689 }
690 break;
691 }
692 case Instruction::Add: {
693 // Adds of constants are common and easy enough.
694 const Value *LHS = U->getOperand(0);
695 const Value *RHS = U->getOperand(1);
696
697 if (isa<ConstantInt>(LHS))
698 std::swap(LHS, RHS);
699
700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702 return computeAddress(LHS, Addr, Ty);
703 }
704
705 Address Backup = Addr;
706 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
707 return true;
708 Addr = Backup;
709
710 break;
711 }
712 case Instruction::Sub: {
713 // Subs of constants are common and easy enough.
714 const Value *LHS = U->getOperand(0);
715 const Value *RHS = U->getOperand(1);
716
717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719 return computeAddress(LHS, Addr, Ty);
720 }
721 break;
722 }
723 case Instruction::Shl: {
724 if (Addr.getOffsetReg())
725 break;
726
727 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
728 if (!CI)
729 break;
730
731 unsigned Val = CI->getZExtValue();
732 if (Val < 1 || Val > 3)
733 break;
734
735 uint64_t NumBytes = 0;
736 if (Ty && Ty->isSized()) {
737 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738 NumBytes = NumBits / 8;
739 if (!isPowerOf2_64(NumBits))
740 NumBytes = 0;
741 }
742
743 if (NumBytes != (1ULL << Val))
744 break;
745
746 Addr.setShift(Val);
747 Addr.setExtendType(AArch64_AM::LSL);
748
749 const Value *Src = U->getOperand(0);
750 if (const auto *I = dyn_cast<Instruction>(Src)) {
751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
752 // Fold the zext or sext when it won't become a noop.
753 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
754 if (!isIntExtFree(ZE) &&
755 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
756 Addr.setExtendType(AArch64_AM::UXTW);
757 Src = ZE->getOperand(0);
758 }
759 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
760 if (!isIntExtFree(SE) &&
761 SE->getOperand(0)->getType()->isIntegerTy(32)) {
762 Addr.setExtendType(AArch64_AM::SXTW);
763 Src = SE->getOperand(0);
764 }
765 }
766 }
767 }
768
769 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
770 if (AI->getOpcode() == Instruction::And) {
771 const Value *LHS = AI->getOperand(0);
772 const Value *RHS = AI->getOperand(1);
773
774 if (const auto *C = dyn_cast<ConstantInt>(LHS))
775 if (C->getValue() == 0xffffffff)
776 std::swap(LHS, RHS);
777
778 if (const auto *C = dyn_cast<ConstantInt>(RHS))
779 if (C->getValue() == 0xffffffff) {
780 Addr.setExtendType(AArch64_AM::UXTW);
781 Register Reg = getRegForValue(LHS);
782 if (!Reg)
783 return false;
784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785 Addr.setOffsetReg(Reg);
786 return true;
787 }
788 }
789
790 Register Reg = getRegForValue(Src);
791 if (!Reg)
792 return false;
793 Addr.setOffsetReg(Reg);
794 return true;
795 }
796 case Instruction::Mul: {
797 if (Addr.getOffsetReg())
798 break;
799
800 if (!isMulPowOf2(U))
801 break;
802
803 const Value *LHS = U->getOperand(0);
804 const Value *RHS = U->getOperand(1);
805
806 // Canonicalize power-of-2 value to the RHS.
807 if (const auto *C = dyn_cast<ConstantInt>(LHS))
808 if (C->getValue().isPowerOf2())
809 std::swap(LHS, RHS);
810
811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812 const auto *C = cast<ConstantInt>(RHS);
813 unsigned Val = C->getValue().logBase2();
814 if (Val < 1 || Val > 3)
815 break;
816
817 uint64_t NumBytes = 0;
818 if (Ty && Ty->isSized()) {
819 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820 NumBytes = NumBits / 8;
821 if (!isPowerOf2_64(NumBits))
822 NumBytes = 0;
823 }
824
825 if (NumBytes != (1ULL << Val))
826 break;
827
828 Addr.setShift(Val);
829 Addr.setExtendType(AArch64_AM::LSL);
830
831 const Value *Src = LHS;
832 if (const auto *I = dyn_cast<Instruction>(Src)) {
833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834 // Fold the zext or sext when it won't become a noop.
835 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
836 if (!isIntExtFree(ZE) &&
837 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
838 Addr.setExtendType(AArch64_AM::UXTW);
839 Src = ZE->getOperand(0);
840 }
841 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
842 if (!isIntExtFree(SE) &&
843 SE->getOperand(0)->getType()->isIntegerTy(32)) {
844 Addr.setExtendType(AArch64_AM::SXTW);
845 Src = SE->getOperand(0);
846 }
847 }
848 }
849 }
850
851 Register Reg = getRegForValue(Src);
852 if (!Reg)
853 return false;
854 Addr.setOffsetReg(Reg);
855 return true;
856 }
857 case Instruction::And: {
858 if (Addr.getOffsetReg())
859 break;
860
861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
862 break;
863
864 const Value *LHS = U->getOperand(0);
865 const Value *RHS = U->getOperand(1);
866
867 if (const auto *C = dyn_cast<ConstantInt>(LHS))
868 if (C->getValue() == 0xffffffff)
869 std::swap(LHS, RHS);
870
871 if (const auto *C = dyn_cast<ConstantInt>(RHS))
872 if (C->getValue() == 0xffffffff) {
873 Addr.setShift(0);
874 Addr.setExtendType(AArch64_AM::LSL);
875 Addr.setExtendType(AArch64_AM::UXTW);
876
877 Register Reg = getRegForValue(LHS);
878 if (!Reg)
879 return false;
880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881 Addr.setOffsetReg(Reg);
882 return true;
883 }
884 break;
885 }
886 case Instruction::SExt:
887 case Instruction::ZExt: {
888 if (!Addr.getReg() || Addr.getOffsetReg())
889 break;
890
891 const Value *Src = nullptr;
892 // Fold the zext or sext when it won't become a noop.
893 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
894 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
895 Addr.setExtendType(AArch64_AM::UXTW);
896 Src = ZE->getOperand(0);
897 }
898 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
899 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
900 Addr.setExtendType(AArch64_AM::SXTW);
901 Src = SE->getOperand(0);
902 }
903 }
904
905 if (!Src)
906 break;
907
908 Addr.setShift(0);
909 Register Reg = getRegForValue(Src);
910 if (!Reg)
911 return false;
912 Addr.setOffsetReg(Reg);
913 return true;
914 }
915 } // end switch
916
917 if (Addr.isRegBase() && !Addr.getReg()) {
918 Register Reg = getRegForValue(Obj);
919 if (!Reg)
920 return false;
921 Addr.setReg(Reg);
922 return true;
923 }
924
925 if (!Addr.getOffsetReg()) {
926 Register Reg = getRegForValue(Obj);
927 if (!Reg)
928 return false;
929 Addr.setOffsetReg(Reg);
930 return true;
931 }
932
933 return false;
934}
935
936bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937 const User *U = nullptr;
938 unsigned Opcode = Instruction::UserOp1;
939 bool InMBB = true;
940
941 if (const auto *I = dyn_cast<Instruction>(V)) {
942 Opcode = I->getOpcode();
943 U = I;
944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
946 Opcode = C->getOpcode();
947 U = C;
948 }
949
950 switch (Opcode) {
951 default: break;
952 case Instruction::BitCast:
953 // Look past bitcasts if its operand is in the same BB.
954 if (InMBB)
955 return computeCallAddress(U->getOperand(0), Addr);
956 break;
957 case Instruction::IntToPtr:
958 // Look past no-op inttoptrs if its operand is in the same BB.
959 if (InMBB &&
960 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
961 TLI.getPointerTy(DL))
962 return computeCallAddress(U->getOperand(0), Addr);
963 break;
964 case Instruction::PtrToInt:
965 // Look past no-op ptrtoints if its operand is in the same BB.
966 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
967 return computeCallAddress(U->getOperand(0), Addr);
968 break;
969 }
970
971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
972 Addr.setGlobalValue(GV);
973 return true;
974 }
975
976 // If all else fails, try to materialize the value in a register.
977 if (!Addr.getGlobalValue()) {
978 Addr.setReg(getRegForValue(V));
979 return Addr.getReg() != 0;
980 }
981
982 return false;
983}
984
985bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986 EVT evt = TLI.getValueType(DL, Ty, true);
987
988 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989 return false;
990
991 // Only handle simple types.
992 if (evt == MVT::Other || !evt.isSimple())
993 return false;
994 VT = evt.getSimpleVT();
995
996 // This is a legal type, but it's not something we handle in fast-isel.
997 if (VT == MVT::f128)
998 return false;
999
1000 // Handle all other legal types, i.e. a register that will directly hold this
1001 // value.
1002 return TLI.isTypeLegal(VT);
1003}
1004
1005/// Determine if the value type is supported by FastISel.
1006///
1007/// FastISel for AArch64 can handle more value types than are legal. This adds
1008/// simple value type such as i1, i8, and i16.
1009bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1010 if (Ty->isVectorTy() && !IsVectorAllowed)
1011 return false;
1012
1013 if (isTypeLegal(Ty, VT))
1014 return true;
1015
1016 // If this is a type than can be sign or zero-extended to a basic operation
1017 // go ahead and accept it now.
1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1019 return true;
1020
1021 return false;
1022}
1023
1024bool AArch64FastISel::isValueAvailable(const Value *V) const {
1025 if (!isa<Instruction>(V))
1026 return true;
1027
1028 const auto *I = cast<Instruction>(V);
1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1030}
1031
1032bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033 if (Subtarget->isTargetILP32())
1034 return false;
1035
1036 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037 if (!ScaleFactor)
1038 return false;
1039
1040 bool ImmediateOffsetNeedsLowering = false;
1041 bool RegisterOffsetNeedsLowering = false;
1042 int64_t Offset = Addr.getOffset();
1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1044 ImmediateOffsetNeedsLowering = true;
1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1046 !isUInt<12>(Offset / ScaleFactor))
1047 ImmediateOffsetNeedsLowering = true;
1048
1049 // Cannot encode an offset register and an immediate offset in the same
1050 // instruction. Fold the immediate offset into the load/store instruction and
1051 // emit an additional add to take care of the offset register.
1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // Cannot encode zero register as base.
1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057 RegisterOffsetNeedsLowering = true;
1058
1059 // If this is a stack pointer and the offset needs to be simplified then put
1060 // the alloca address into a register, set the base type back to register and
1061 // continue. This should almost never happen.
1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1063 {
1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066 ResultReg)
1067 .addFrameIndex(Addr.getFI())
1068 .addImm(0)
1069 .addImm(0);
1070 Addr.setKind(Address::RegBase);
1071 Addr.setReg(ResultReg);
1072 }
1073
1074 if (RegisterOffsetNeedsLowering) {
1075 unsigned ResultReg = 0;
1076 if (Addr.getReg()) {
1077 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1078 Addr.getExtendType() == AArch64_AM::UXTW )
1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), Addr.getExtendType(),
1081 Addr.getShift());
1082 else
1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1084 Addr.getOffsetReg(), AArch64_AM::LSL,
1085 Addr.getShift());
1086 } else {
1087 if (Addr.getExtendType() == AArch64_AM::UXTW)
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089 Addr.getShift(), /*IsZExt=*/true);
1090 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092 Addr.getShift(), /*IsZExt=*/false);
1093 else
1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095 Addr.getShift());
1096 }
1097 if (!ResultReg)
1098 return false;
1099
1100 Addr.setReg(ResultReg);
1101 Addr.setOffsetReg(0);
1102 Addr.setShift(0);
1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104 }
1105
1106 // Since the offset is too large for the load/store instruction get the
1107 // reg+offset into a register.
1108 if (ImmediateOffsetNeedsLowering) {
1109 unsigned ResultReg;
1110 if (Addr.getReg())
1111 // Try to fold the immediate into the add instruction.
1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113 else
1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115
1116 if (!ResultReg)
1117 return false;
1118 Addr.setReg(ResultReg);
1119 Addr.setOffset(0);
1120 }
1121 return true;
1122}
1123
1124void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125 const MachineInstrBuilder &MIB,
1127 unsigned ScaleFactor,
1128 MachineMemOperand *MMO) {
1129 int64_t Offset = Addr.getOffset() / ScaleFactor;
1130 // Frame base works a bit differently. Handle it separately.
1131 if (Addr.isFIBase()) {
1132 int FI = Addr.getFI();
1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1134 // and alignment should be based on the VT.
1135 MMO = FuncInfo.MF->getMachineMemOperand(
1136 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1137 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1138 // Now add the rest of the operands.
1139 MIB.addFrameIndex(FI).addImm(Offset);
1140 } else {
1141 assert(Addr.isRegBase() && "Unexpected address kind.");
1142 const MCInstrDesc &II = MIB->getDesc();
1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1144 Addr.setReg(
1145 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1146 Addr.setOffsetReg(
1147 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1148 if (Addr.getOffsetReg()) {
1149 assert(Addr.getOffset() == 0 && "Unexpected offset");
1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1151 Addr.getExtendType() == AArch64_AM::SXTX;
1152 MIB.addReg(Addr.getReg());
1153 MIB.addReg(Addr.getOffsetReg());
1154 MIB.addImm(IsSigned);
1155 MIB.addImm(Addr.getShift() != 0);
1156 } else
1157 MIB.addReg(Addr.getReg()).addImm(Offset);
1158 }
1159
1160 if (MMO)
1161 MIB.addMemOperand(MMO);
1162}
1163
1164unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165 const Value *RHS, bool SetFlags,
1166 bool WantResult, bool IsZExt) {
1168 bool NeedExtend = false;
1169 switch (RetVT.SimpleTy) {
1170 default:
1171 return 0;
1172 case MVT::i1:
1173 NeedExtend = true;
1174 break;
1175 case MVT::i8:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178 break;
1179 case MVT::i16:
1180 NeedExtend = true;
1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182 break;
1183 case MVT::i32: // fall-through
1184 case MVT::i64:
1185 break;
1186 }
1187 MVT SrcVT = RetVT;
1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189
1190 // Canonicalize immediates to the RHS first.
1191 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1192 std::swap(LHS, RHS);
1193
1194 // Canonicalize mul by power of 2 to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196 if (isMulPowOf2(LHS))
1197 std::swap(LHS, RHS);
1198
1199 // Canonicalize shift immediate to the RHS.
1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1201 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1202 if (isa<ConstantInt>(SI->getOperand(1)))
1203 if (SI->getOpcode() == Instruction::Shl ||
1204 SI->getOpcode() == Instruction::LShr ||
1205 SI->getOpcode() == Instruction::AShr )
1206 std::swap(LHS, RHS);
1207
1208 Register LHSReg = getRegForValue(LHS);
1209 if (!LHSReg)
1210 return 0;
1211
1212 if (NeedExtend)
1213 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1214
1215 unsigned ResultReg = 0;
1216 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218 if (C->isNegative())
1219 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1220 WantResult);
1221 else
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223 WantResult);
1224 } else if (const auto *C = dyn_cast<Constant>(RHS))
1225 if (C->isNullValue())
1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1227
1228 if (ResultReg)
1229 return ResultReg;
1230
1231 // Only extend the RHS within the instruction if there is a valid extend type.
1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233 isValueAvailable(RHS)) {
1234 Register RHSReg = getRegForValue(RHS);
1235 if (!RHSReg)
1236 return 0;
1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1238 SetFlags, WantResult);
1239 }
1240
1241 // Check if the mul can be folded into the instruction.
1242 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1243 if (isMulPowOf2(RHS)) {
1244 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1245 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1246
1247 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1248 if (C->getValue().isPowerOf2())
1249 std::swap(MulLHS, MulRHS);
1250
1251 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1252 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1253 Register RHSReg = getRegForValue(MulLHS);
1254 if (!RHSReg)
1255 return 0;
1256 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1257 ShiftVal, SetFlags, WantResult);
1258 if (ResultReg)
1259 return ResultReg;
1260 }
1261 }
1262
1263 // Check if the shift can be folded into the instruction.
1264 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1265 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1266 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268 switch (SI->getOpcode()) {
1269 default: break;
1270 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1271 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1272 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1273 }
1274 uint64_t ShiftVal = C->getZExtValue();
1275 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1276 Register RHSReg = getRegForValue(SI->getOperand(0));
1277 if (!RHSReg)
1278 return 0;
1279 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1280 ShiftVal, SetFlags, WantResult);
1281 if (ResultReg)
1282 return ResultReg;
1283 }
1284 }
1285 }
1286 }
1287
1288 Register RHSReg = getRegForValue(RHS);
1289 if (!RHSReg)
1290 return 0;
1291
1292 if (NeedExtend)
1293 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1294
1295 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1296}
1297
1298unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1299 unsigned RHSReg, bool SetFlags,
1300 bool WantResult) {
1301 assert(LHSReg && RHSReg && "Invalid register number.");
1302
1303 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1304 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1305 return 0;
1306
1307 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1308 return 0;
1309
1310 static const unsigned OpcTable[2][2][2] = {
1311 { { AArch64::SUBWrr, AArch64::SUBXrr },
1312 { AArch64::ADDWrr, AArch64::ADDXrr } },
1313 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1314 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1315 };
1316 bool Is64Bit = RetVT == MVT::i64;
1317 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1318 const TargetRegisterClass *RC =
1319 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1320 unsigned ResultReg;
1321 if (WantResult)
1322 ResultReg = createResultReg(RC);
1323 else
1324 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1325
1326 const MCInstrDesc &II = TII.get(Opc);
1327 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1328 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1329 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1330 .addReg(LHSReg)
1331 .addReg(RHSReg);
1332 return ResultReg;
1333}
1334
1335unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1336 uint64_t Imm, bool SetFlags,
1337 bool WantResult) {
1338 assert(LHSReg && "Invalid register number.");
1339
1340 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1341 return 0;
1342
1343 unsigned ShiftImm;
1344 if (isUInt<12>(Imm))
1345 ShiftImm = 0;
1346 else if ((Imm & 0xfff000) == Imm) {
1347 ShiftImm = 12;
1348 Imm >>= 12;
1349 } else
1350 return 0;
1351
1352 static const unsigned OpcTable[2][2][2] = {
1353 { { AArch64::SUBWri, AArch64::SUBXri },
1354 { AArch64::ADDWri, AArch64::ADDXri } },
1355 { { AArch64::SUBSWri, AArch64::SUBSXri },
1356 { AArch64::ADDSWri, AArch64::ADDSXri } }
1357 };
1358 bool Is64Bit = RetVT == MVT::i64;
1359 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1360 const TargetRegisterClass *RC;
1361 if (SetFlags)
1362 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1363 else
1364 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1365 unsigned ResultReg;
1366 if (WantResult)
1367 ResultReg = createResultReg(RC);
1368 else
1369 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1370
1371 const MCInstrDesc &II = TII.get(Opc);
1372 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1374 .addReg(LHSReg)
1375 .addImm(Imm)
1376 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1377 return ResultReg;
1378}
1379
1380unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1381 unsigned RHSReg,
1383 uint64_t ShiftImm, bool SetFlags,
1384 bool WantResult) {
1385 assert(LHSReg && RHSReg && "Invalid register number.");
1386 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1387 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1388
1389 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1390 return 0;
1391
1392 // Don't deal with undefined shifts.
1393 if (ShiftImm >= RetVT.getSizeInBits())
1394 return 0;
1395
1396 static const unsigned OpcTable[2][2][2] = {
1397 { { AArch64::SUBWrs, AArch64::SUBXrs },
1398 { AArch64::ADDWrs, AArch64::ADDXrs } },
1399 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1400 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1401 };
1402 bool Is64Bit = RetVT == MVT::i64;
1403 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1404 const TargetRegisterClass *RC =
1405 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1406 unsigned ResultReg;
1407 if (WantResult)
1408 ResultReg = createResultReg(RC);
1409 else
1410 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1411
1412 const MCInstrDesc &II = TII.get(Opc);
1413 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1414 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1416 .addReg(LHSReg)
1417 .addReg(RHSReg)
1418 .addImm(getShifterImm(ShiftType, ShiftImm));
1419 return ResultReg;
1420}
1421
1422unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1423 unsigned RHSReg,
1425 uint64_t ShiftImm, bool SetFlags,
1426 bool WantResult) {
1427 assert(LHSReg && RHSReg && "Invalid register number.");
1428 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1429 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1430
1431 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1432 return 0;
1433
1434 if (ShiftImm >= 4)
1435 return 0;
1436
1437 static const unsigned OpcTable[2][2][2] = {
1438 { { AArch64::SUBWrx, AArch64::SUBXrx },
1439 { AArch64::ADDWrx, AArch64::ADDXrx } },
1440 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1441 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1442 };
1443 bool Is64Bit = RetVT == MVT::i64;
1444 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1445 const TargetRegisterClass *RC = nullptr;
1446 if (SetFlags)
1447 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1448 else
1449 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1450 unsigned ResultReg;
1451 if (WantResult)
1452 ResultReg = createResultReg(RC);
1453 else
1454 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1455
1456 const MCInstrDesc &II = TII.get(Opc);
1457 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1458 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1460 .addReg(LHSReg)
1461 .addReg(RHSReg)
1462 .addImm(getArithExtendImm(ExtType, ShiftImm));
1463 return ResultReg;
1464}
1465
1466bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1467 Type *Ty = LHS->getType();
1468 EVT EVT = TLI.getValueType(DL, Ty, true);
1469 if (!EVT.isSimple())
1470 return false;
1471 MVT VT = EVT.getSimpleVT();
1472
1473 switch (VT.SimpleTy) {
1474 default:
1475 return false;
1476 case MVT::i1:
1477 case MVT::i8:
1478 case MVT::i16:
1479 case MVT::i32:
1480 case MVT::i64:
1481 return emitICmp(VT, LHS, RHS, IsZExt);
1482 case MVT::f32:
1483 case MVT::f64:
1484 return emitFCmp(VT, LHS, RHS);
1485 }
1486}
1487
1488bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1489 bool IsZExt) {
1490 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1491 IsZExt) != 0;
1492}
1493
1494bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1495 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1496 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1497}
1498
1499bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1500 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1501 return false;
1502
1503 // Check to see if the 2nd operand is a constant that we can encode directly
1504 // in the compare.
1505 bool UseImm = false;
1506 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1507 if (CFP->isZero() && !CFP->isNegative())
1508 UseImm = true;
1509
1510 Register LHSReg = getRegForValue(LHS);
1511 if (!LHSReg)
1512 return false;
1513
1514 if (UseImm) {
1515 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1517 .addReg(LHSReg);
1518 return true;
1519 }
1520
1521 Register RHSReg = getRegForValue(RHS);
1522 if (!RHSReg)
1523 return false;
1524
1525 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1527 .addReg(LHSReg)
1528 .addReg(RHSReg);
1529 return true;
1530}
1531
1532unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1533 bool SetFlags, bool WantResult, bool IsZExt) {
1534 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1535 IsZExt);
1536}
1537
1538/// This method is a wrapper to simplify add emission.
1539///
1540/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1541/// that fails, then try to materialize the immediate into a register and use
1542/// emitAddSub_rr instead.
1543unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1544 unsigned ResultReg;
1545 if (Imm < 0)
1546 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1547 else
1548 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1549
1550 if (ResultReg)
1551 return ResultReg;
1552
1553 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1554 if (!CReg)
1555 return 0;
1556
1557 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1558 return ResultReg;
1559}
1560
1561unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1562 bool SetFlags, bool WantResult, bool IsZExt) {
1563 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1564 IsZExt);
1565}
1566
1567unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1568 unsigned RHSReg, bool WantResult) {
1569 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1570 /*SetFlags=*/true, WantResult);
1571}
1572
1573unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1574 unsigned RHSReg,
1576 uint64_t ShiftImm, bool WantResult) {
1577 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1578 ShiftImm, /*SetFlags=*/true, WantResult);
1579}
1580
1581unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1582 const Value *LHS, const Value *RHS) {
1583 // Canonicalize immediates to the RHS first.
1584 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1585 std::swap(LHS, RHS);
1586
1587 // Canonicalize mul by power-of-2 to the RHS.
1588 if (LHS->hasOneUse() && isValueAvailable(LHS))
1589 if (isMulPowOf2(LHS))
1590 std::swap(LHS, RHS);
1591
1592 // Canonicalize shift immediate to the RHS.
1593 if (LHS->hasOneUse() && isValueAvailable(LHS))
1594 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1595 if (isa<ConstantInt>(SI->getOperand(1)))
1596 std::swap(LHS, RHS);
1597
1598 Register LHSReg = getRegForValue(LHS);
1599 if (!LHSReg)
1600 return 0;
1601
1602 unsigned ResultReg = 0;
1603 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1604 uint64_t Imm = C->getZExtValue();
1605 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1606 }
1607 if (ResultReg)
1608 return ResultReg;
1609
1610 // Check if the mul can be folded into the instruction.
1611 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1612 if (isMulPowOf2(RHS)) {
1613 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1614 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1615
1616 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1617 if (C->getValue().isPowerOf2())
1618 std::swap(MulLHS, MulRHS);
1619
1620 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1621 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1622
1623 Register RHSReg = getRegForValue(MulLHS);
1624 if (!RHSReg)
1625 return 0;
1626 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1627 if (ResultReg)
1628 return ResultReg;
1629 }
1630 }
1631
1632 // Check if the shift can be folded into the instruction.
1633 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1634 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1635 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1636 uint64_t ShiftVal = C->getZExtValue();
1637 Register RHSReg = getRegForValue(SI->getOperand(0));
1638 if (!RHSReg)
1639 return 0;
1640 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1641 if (ResultReg)
1642 return ResultReg;
1643 }
1644 }
1645
1646 Register RHSReg = getRegForValue(RHS);
1647 if (!RHSReg)
1648 return 0;
1649
1650 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1651 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1652 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1653 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1654 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1655 }
1656 return ResultReg;
1657}
1658
1659unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1660 unsigned LHSReg, uint64_t Imm) {
1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662 "ISD nodes are not consecutive!");
1663 static const unsigned OpcTable[3][2] = {
1664 { AArch64::ANDWri, AArch64::ANDXri },
1665 { AArch64::ORRWri, AArch64::ORRXri },
1666 { AArch64::EORWri, AArch64::EORXri }
1667 };
1668 const TargetRegisterClass *RC;
1669 unsigned Opc;
1670 unsigned RegSize;
1671 switch (RetVT.SimpleTy) {
1672 default:
1673 return 0;
1674 case MVT::i1:
1675 case MVT::i8:
1676 case MVT::i16:
1677 case MVT::i32: {
1678 unsigned Idx = ISDOpc - ISD::AND;
1679 Opc = OpcTable[Idx][0];
1680 RC = &AArch64::GPR32spRegClass;
1681 RegSize = 32;
1682 break;
1683 }
1684 case MVT::i64:
1685 Opc = OpcTable[ISDOpc - ISD::AND][1];
1686 RC = &AArch64::GPR64spRegClass;
1687 RegSize = 64;
1688 break;
1689 }
1690
1692 return 0;
1693
1694 Register ResultReg =
1695 fastEmitInst_ri(Opc, RC, LHSReg,
1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1700 }
1701 return ResultReg;
1702}
1703
1704unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705 unsigned LHSReg, unsigned RHSReg,
1706 uint64_t ShiftImm) {
1707 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708 "ISD nodes are not consecutive!");
1709 static const unsigned OpcTable[3][2] = {
1710 { AArch64::ANDWrs, AArch64::ANDXrs },
1711 { AArch64::ORRWrs, AArch64::ORRXrs },
1712 { AArch64::EORWrs, AArch64::EORXrs }
1713 };
1714
1715 // Don't deal with undefined shifts.
1716 if (ShiftImm >= RetVT.getSizeInBits())
1717 return 0;
1718
1719 const TargetRegisterClass *RC;
1720 unsigned Opc;
1721 switch (RetVT.SimpleTy) {
1722 default:
1723 return 0;
1724 case MVT::i1:
1725 case MVT::i8:
1726 case MVT::i16:
1727 case MVT::i32:
1728 Opc = OpcTable[ISDOpc - ISD::AND][0];
1729 RC = &AArch64::GPR32RegClass;
1730 break;
1731 case MVT::i64:
1732 Opc = OpcTable[ISDOpc - ISD::AND][1];
1733 RC = &AArch64::GPR64RegClass;
1734 break;
1735 }
1736 Register ResultReg =
1737 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1742 }
1743 return ResultReg;
1744}
1745
1746unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1747 uint64_t Imm) {
1748 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1749}
1750
1751unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752 bool WantZExt, MachineMemOperand *MMO) {
1753 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754 return 0;
1755
1756 // Simplify this down to something we can handle.
1757 if (!simplifyAddress(Addr, VT))
1758 return 0;
1759
1760 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761 if (!ScaleFactor)
1762 llvm_unreachable("Unexpected value type.");
1763
1764 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766 bool UseScaled = true;
1767 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768 UseScaled = false;
1769 ScaleFactor = 1;
1770 }
1771
1772 static const unsigned GPOpcTable[2][8][4] = {
1773 // Sign-extend.
1774 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1775 AArch64::LDURXi },
1776 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1777 AArch64::LDURXi },
1778 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1779 AArch64::LDRXui },
1780 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1781 AArch64::LDRXui },
1782 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783 AArch64::LDRXroX },
1784 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785 AArch64::LDRXroX },
1786 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787 AArch64::LDRXroW },
1788 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789 AArch64::LDRXroW }
1790 },
1791 // Zero-extend.
1792 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1793 AArch64::LDURXi },
1794 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1795 AArch64::LDURXi },
1796 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1797 AArch64::LDRXui },
1798 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1799 AArch64::LDRXui },
1800 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1801 AArch64::LDRXroX },
1802 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1803 AArch64::LDRXroX },
1804 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1805 AArch64::LDRXroW },
1806 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1807 AArch64::LDRXroW }
1808 }
1809 };
1810
1811 static const unsigned FPOpcTable[4][2] = {
1812 { AArch64::LDURSi, AArch64::LDURDi },
1813 { AArch64::LDRSui, AArch64::LDRDui },
1814 { AArch64::LDRSroX, AArch64::LDRDroX },
1815 { AArch64::LDRSroW, AArch64::LDRDroW }
1816 };
1817
1818 unsigned Opc;
1819 const TargetRegisterClass *RC;
1820 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821 Addr.getOffsetReg();
1822 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824 Addr.getExtendType() == AArch64_AM::SXTW)
1825 Idx++;
1826
1827 bool IsRet64Bit = RetVT == MVT::i64;
1828 switch (VT.SimpleTy) {
1829 default:
1830 llvm_unreachable("Unexpected value type.");
1831 case MVT::i1: // Intentional fall-through.
1832 case MVT::i8:
1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834 RC = (IsRet64Bit && !WantZExt) ?
1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836 break;
1837 case MVT::i16:
1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839 RC = (IsRet64Bit && !WantZExt) ?
1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841 break;
1842 case MVT::i32:
1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844 RC = (IsRet64Bit && !WantZExt) ?
1845 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846 break;
1847 case MVT::i64:
1848 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849 RC = &AArch64::GPR64RegClass;
1850 break;
1851 case MVT::f32:
1852 Opc = FPOpcTable[Idx][0];
1853 RC = &AArch64::FPR32RegClass;
1854 break;
1855 case MVT::f64:
1856 Opc = FPOpcTable[Idx][1];
1857 RC = &AArch64::FPR64RegClass;
1858 break;
1859 }
1860
1861 // Create the base instruction, then add the operands.
1862 Register ResultReg = createResultReg(RC);
1863 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1864 TII.get(Opc), ResultReg);
1865 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
1867 // Loading an i1 requires special handling.
1868 if (VT == MVT::i1) {
1869 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1870 assert(ANDReg && "Unexpected AND instruction emission failure.");
1871 ResultReg = ANDReg;
1872 }
1873
1874 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875 // the 32bit reg to a 64bit reg.
1876 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1879 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880 .addImm(0)
1881 .addReg(ResultReg, getKillRegState(true))
1882 .addImm(AArch64::sub_32);
1883 ResultReg = Reg64;
1884 }
1885 return ResultReg;
1886}
1887
1888bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889 MVT VT;
1890 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891 return false;
1892
1893 if (VT.isVector())
1894 return selectOperator(I, I->getOpcode());
1895
1896 unsigned ResultReg;
1897 switch (I->getOpcode()) {
1898 default:
1899 llvm_unreachable("Unexpected instruction.");
1900 case Instruction::Add:
1901 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902 break;
1903 case Instruction::Sub:
1904 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905 break;
1906 }
1907 if (!ResultReg)
1908 return false;
1909
1910 updateValueMap(I, ResultReg);
1911 return true;
1912}
1913
1914bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915 MVT VT;
1916 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917 return false;
1918
1919 if (VT.isVector())
1920 return selectOperator(I, I->getOpcode());
1921
1922 unsigned ResultReg;
1923 switch (I->getOpcode()) {
1924 default:
1925 llvm_unreachable("Unexpected instruction.");
1926 case Instruction::And:
1927 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 case Instruction::Or:
1930 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1932 case Instruction::Xor:
1933 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934 break;
1935 }
1936 if (!ResultReg)
1937 return false;
1938
1939 updateValueMap(I, ResultReg);
1940 return true;
1941}
1942
1943bool AArch64FastISel::selectLoad(const Instruction *I) {
1944 MVT VT;
1945 // Verify we have a legal type before going any further. Currently, we handle
1946 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949 cast<LoadInst>(I)->isAtomic())
1950 return false;
1951
1952 const Value *SV = I->getOperand(0);
1953 if (TLI.supportSwiftError()) {
1954 // Swifterror values can come from either a function parameter with
1955 // swifterror attribute or an alloca with swifterror attribute.
1956 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1957 if (Arg->hasSwiftErrorAttr())
1958 return false;
1959 }
1960
1961 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1962 if (Alloca->isSwiftError())
1963 return false;
1964 }
1965 }
1966
1967 // See if we can handle this address.
1968 Address Addr;
1969 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970 return false;
1971
1972 // Fold the following sign-/zero-extend into the load instruction.
1973 bool WantZExt = true;
1974 MVT RetVT = VT;
1975 const Value *IntExtVal = nullptr;
1976 if (I->hasOneUse()) {
1977 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978 if (isTypeSupported(ZE->getType(), RetVT))
1979 IntExtVal = ZE;
1980 else
1981 RetVT = VT;
1982 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983 if (isTypeSupported(SE->getType(), RetVT))
1984 IntExtVal = SE;
1985 else
1986 RetVT = VT;
1987 WantZExt = false;
1988 }
1989 }
1990
1991 unsigned ResultReg =
1992 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993 if (!ResultReg)
1994 return false;
1995
1996 // There are a few different cases we have to handle, because the load or the
1997 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998 // SelectionDAG. There is also an ordering issue when both instructions are in
1999 // different basic blocks.
2000 // 1.) The load instruction is selected by FastISel, but the integer extend
2001 // not. This usually happens when the integer extend is in a different
2002 // basic block and SelectionDAG took over for that basic block.
2003 // 2.) The load instruction is selected before the integer extend. This only
2004 // happens when the integer extend is in a different basic block.
2005 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006 // by FastISel. This happens if there are instructions between the load
2007 // and the integer extend that couldn't be selected by FastISel.
2008 if (IntExtVal) {
2009 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011 // it when it selects the integer extend.
2012 Register Reg = lookUpRegForValue(IntExtVal);
2013 auto *MI = MRI.getUniqueVRegDef(Reg);
2014 if (!MI) {
2015 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016 if (WantZExt) {
2017 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2019 ResultReg = std::prev(I)->getOperand(0).getReg();
2020 removeDeadCode(I, std::next(I));
2021 } else
2022 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023 AArch64::sub_32);
2024 }
2025 updateValueMap(I, ResultReg);
2026 return true;
2027 }
2028
2029 // The integer extend has already been emitted - delete all the instructions
2030 // that have been emitted by the integer extend lowering code and use the
2031 // result from the load instruction directly.
2032 while (MI) {
2033 Reg = 0;
2034 for (auto &Opnd : MI->uses()) {
2035 if (Opnd.isReg()) {
2036 Reg = Opnd.getReg();
2037 break;
2038 }
2039 }
2041 removeDeadCode(I, std::next(I));
2042 MI = nullptr;
2043 if (Reg)
2044 MI = MRI.getUniqueVRegDef(Reg);
2045 }
2046 updateValueMap(IntExtVal, ResultReg);
2047 return true;
2048 }
2049
2050 updateValueMap(I, ResultReg);
2051 return true;
2052}
2053
2054bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055 unsigned AddrReg,
2056 MachineMemOperand *MMO) {
2057 unsigned Opc;
2058 switch (VT.SimpleTy) {
2059 default: return false;
2060 case MVT::i8: Opc = AArch64::STLRB; break;
2061 case MVT::i16: Opc = AArch64::STLRH; break;
2062 case MVT::i32: Opc = AArch64::STLRW; break;
2063 case MVT::i64: Opc = AArch64::STLRX; break;
2064 }
2065
2066 const MCInstrDesc &II = TII.get(Opc);
2067 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2070 .addReg(SrcReg)
2071 .addReg(AddrReg)
2072 .addMemOperand(MMO);
2073 return true;
2074}
2075
2076bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077 MachineMemOperand *MMO) {
2078 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079 return false;
2080
2081 // Simplify this down to something we can handle.
2082 if (!simplifyAddress(Addr, VT))
2083 return false;
2084
2085 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086 if (!ScaleFactor)
2087 llvm_unreachable("Unexpected value type.");
2088
2089 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091 bool UseScaled = true;
2092 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093 UseScaled = false;
2094 ScaleFactor = 1;
2095 }
2096
2097 static const unsigned OpcTable[4][6] = {
2098 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099 AArch64::STURSi, AArch64::STURDi },
2100 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101 AArch64::STRSui, AArch64::STRDui },
2102 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103 AArch64::STRSroX, AArch64::STRDroX },
2104 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105 AArch64::STRSroW, AArch64::STRDroW }
2106 };
2107
2108 unsigned Opc;
2109 bool VTIsi1 = false;
2110 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111 Addr.getOffsetReg();
2112 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114 Addr.getExtendType() == AArch64_AM::SXTW)
2115 Idx++;
2116
2117 switch (VT.SimpleTy) {
2118 default: llvm_unreachable("Unexpected value type.");
2119 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2120 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126 }
2127
2128 // Storing an i1 requires special handling.
2129 if (VTIsi1 && SrcReg != AArch64::WZR) {
2130 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2131 assert(ANDReg && "Unexpected AND instruction emission failure.");
2132 SrcReg = ANDReg;
2133 }
2134 // Create the base instruction, then add the operands.
2135 const MCInstrDesc &II = TII.get(Opc);
2136 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2139 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141 return true;
2142}
2143
2144bool AArch64FastISel::selectStore(const Instruction *I) {
2145 MVT VT;
2146 const Value *Op0 = I->getOperand(0);
2147 // Verify we have a legal type before going any further. Currently, we handle
2148 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151 return false;
2152
2153 const Value *PtrV = I->getOperand(1);
2154 if (TLI.supportSwiftError()) {
2155 // Swifterror values can come from either a function parameter with
2156 // swifterror attribute or an alloca with swifterror attribute.
2157 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158 if (Arg->hasSwiftErrorAttr())
2159 return false;
2160 }
2161
2162 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163 if (Alloca->isSwiftError())
2164 return false;
2165 }
2166 }
2167
2168 // Get the value to be stored into a register. Use the zero register directly
2169 // when possible to avoid an unnecessary copy and a wasted register.
2170 unsigned SrcReg = 0;
2171 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172 if (CI->isZero())
2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175 if (CF->isZero() && !CF->isNegative()) {
2177 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178 }
2179 }
2180
2181 if (!SrcReg)
2182 SrcReg = getRegForValue(Op0);
2183
2184 if (!SrcReg)
2185 return false;
2186
2187 auto *SI = cast<StoreInst>(I);
2188
2189 // Try to emit a STLR for seq_cst/release.
2190 if (SI->isAtomic()) {
2191 AtomicOrdering Ord = SI->getOrdering();
2192 // The non-atomic instructions are sufficient for relaxed stores.
2193 if (isReleaseOrStronger(Ord)) {
2194 // The STLR addressing mode only supports a base reg; pass that directly.
2195 Register AddrReg = getRegForValue(PtrV);
2196 return emitStoreRelease(VT, SrcReg, AddrReg,
2197 createMachineMemOperandFor(I));
2198 }
2199 }
2200
2201 // See if we can handle this address.
2202 Address Addr;
2203 if (!computeAddress(PtrV, Addr, Op0->getType()))
2204 return false;
2205
2206 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207 return false;
2208 return true;
2209}
2210
2212 switch (Pred) {
2213 case CmpInst::FCMP_ONE:
2214 case CmpInst::FCMP_UEQ:
2215 default:
2216 // AL is our "false" for now. The other two need more compares.
2217 return AArch64CC::AL;
2218 case CmpInst::ICMP_EQ:
2219 case CmpInst::FCMP_OEQ:
2220 return AArch64CC::EQ;
2221 case CmpInst::ICMP_SGT:
2222 case CmpInst::FCMP_OGT:
2223 return AArch64CC::GT;
2224 case CmpInst::ICMP_SGE:
2225 case CmpInst::FCMP_OGE:
2226 return AArch64CC::GE;
2227 case CmpInst::ICMP_UGT:
2228 case CmpInst::FCMP_UGT:
2229 return AArch64CC::HI;
2230 case CmpInst::FCMP_OLT:
2231 return AArch64CC::MI;
2232 case CmpInst::ICMP_ULE:
2233 case CmpInst::FCMP_OLE:
2234 return AArch64CC::LS;
2235 case CmpInst::FCMP_ORD:
2236 return AArch64CC::VC;
2237 case CmpInst::FCMP_UNO:
2238 return AArch64CC::VS;
2239 case CmpInst::FCMP_UGE:
2240 return AArch64CC::PL;
2241 case CmpInst::ICMP_SLT:
2242 case CmpInst::FCMP_ULT:
2243 return AArch64CC::LT;
2244 case CmpInst::ICMP_SLE:
2245 case CmpInst::FCMP_ULE:
2246 return AArch64CC::LE;
2247 case CmpInst::FCMP_UNE:
2248 case CmpInst::ICMP_NE:
2249 return AArch64CC::NE;
2250 case CmpInst::ICMP_UGE:
2251 return AArch64CC::HS;
2252 case CmpInst::ICMP_ULT:
2253 return AArch64CC::LO;
2254 }
2255}
2256
2257/// Try to emit a combined compare-and-branch instruction.
2258bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2260 // will not be produced, as they are conditional branch instructions that do
2261 // not set flags.
2262 if (FuncInfo.MF->getFunction().hasFnAttribute(
2263 Attribute::SpeculativeLoadHardening))
2264 return false;
2265
2266 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2267 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2268 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2269
2270 const Value *LHS = CI->getOperand(0);
2271 const Value *RHS = CI->getOperand(1);
2272
2273 MVT VT;
2274 if (!isTypeSupported(LHS->getType(), VT))
2275 return false;
2276
2277 unsigned BW = VT.getSizeInBits();
2278 if (BW > 64)
2279 return false;
2280
2281 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2282 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2283
2284 // Try to take advantage of fallthrough opportunities.
2285 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2286 std::swap(TBB, FBB);
2288 }
2289
2290 int TestBit = -1;
2291 bool IsCmpNE;
2292 switch (Predicate) {
2293 default:
2294 return false;
2295 case CmpInst::ICMP_EQ:
2296 case CmpInst::ICMP_NE:
2297 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2298 std::swap(LHS, RHS);
2299
2300 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2301 return false;
2302
2303 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2304 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2305 const Value *AndLHS = AI->getOperand(0);
2306 const Value *AndRHS = AI->getOperand(1);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2309 if (C->getValue().isPowerOf2())
2310 std::swap(AndLHS, AndRHS);
2311
2312 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2313 if (C->getValue().isPowerOf2()) {
2314 TestBit = C->getValue().logBase2();
2315 LHS = AndLHS;
2316 }
2317 }
2318
2319 if (VT == MVT::i1)
2320 TestBit = 0;
2321
2322 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2323 break;
2324 case CmpInst::ICMP_SLT:
2325 case CmpInst::ICMP_SGE:
2326 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2327 return false;
2328
2329 TestBit = BW - 1;
2330 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2331 break;
2332 case CmpInst::ICMP_SGT:
2333 case CmpInst::ICMP_SLE:
2334 if (!isa<ConstantInt>(RHS))
2335 return false;
2336
2337 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2338 return false;
2339
2340 TestBit = BW - 1;
2341 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2342 break;
2343 } // end switch
2344
2345 static const unsigned OpcTable[2][2][2] = {
2346 { {AArch64::CBZW, AArch64::CBZX },
2347 {AArch64::CBNZW, AArch64::CBNZX} },
2348 { {AArch64::TBZW, AArch64::TBZX },
2349 {AArch64::TBNZW, AArch64::TBNZX} }
2350 };
2351
2352 bool IsBitTest = TestBit != -1;
2353 bool Is64Bit = BW == 64;
2354 if (TestBit < 32 && TestBit >= 0)
2355 Is64Bit = false;
2356
2357 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2358 const MCInstrDesc &II = TII.get(Opc);
2359
2360 Register SrcReg = getRegForValue(LHS);
2361 if (!SrcReg)
2362 return false;
2363
2364 if (BW == 64 && !Is64Bit)
2365 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2366
2367 if ((BW < 32) && !IsBitTest)
2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369
2370 // Emit the combined compare and branch instruction.
2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2374 .addReg(SrcReg);
2375 if (IsBitTest)
2376 MIB.addImm(TestBit);
2377 MIB.addMBB(TBB);
2378
2379 finishCondBranch(BI->getParent(), TBB, FBB);
2380 return true;
2381}
2382
2383bool AArch64FastISel::selectBranch(const Instruction *I) {
2384 const BranchInst *BI = cast<BranchInst>(I);
2385 if (BI->isUnconditional()) {
2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387 fastEmitBranch(MSucc, BI->getDebugLoc());
2388 return true;
2389 }
2390
2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393
2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395 if (CI->hasOneUse() && isValueAvailable(CI)) {
2396 // Try to optimize or fold the cmp.
2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398 switch (Predicate) {
2399 default:
2400 break;
2402 fastEmitBranch(FBB, MIMD.getDL());
2403 return true;
2404 case CmpInst::FCMP_TRUE:
2405 fastEmitBranch(TBB, MIMD.getDL());
2406 return true;
2407 }
2408
2409 // Try to emit a combined compare-and-branch first.
2410 if (emitCompareAndBranch(BI))
2411 return true;
2412
2413 // Try to take advantage of fallthrough opportunities.
2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415 std::swap(TBB, FBB);
2417 }
2418
2419 // Emit the cmp.
2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421 return false;
2422
2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424 // instruction.
2425 AArch64CC::CondCode CC = getCompareCC(Predicate);
2427 switch (Predicate) {
2428 default:
2429 break;
2430 case CmpInst::FCMP_UEQ:
2431 ExtraCC = AArch64CC::EQ;
2432 CC = AArch64CC::VS;
2433 break;
2434 case CmpInst::FCMP_ONE:
2435 ExtraCC = AArch64CC::MI;
2436 CC = AArch64CC::GT;
2437 break;
2438 }
2439 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440
2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442 if (ExtraCC != AArch64CC::AL) {
2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2444 .addImm(ExtraCC)
2445 .addMBB(TBB);
2446 }
2447
2448 // Emit the branch.
2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450 .addImm(CC)
2451 .addMBB(TBB);
2452
2453 finishCondBranch(BI->getParent(), TBB, FBB);
2454 return true;
2455 }
2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457 uint64_t Imm = CI->getZExtValue();
2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2460 .addMBB(Target);
2461
2462 // Obtain the branch probability and add the target to the successor list.
2463 if (FuncInfo.BPI) {
2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465 BI->getParent(), Target->getBasicBlock());
2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467 } else
2468 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469 return true;
2470 } else {
2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473 // Fake request the condition, otherwise the intrinsic might be completely
2474 // optimized away.
2475 Register CondReg = getRegForValue(BI->getCondition());
2476 if (!CondReg)
2477 return false;
2478
2479 // Emit the branch.
2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2481 .addImm(CC)
2482 .addMBB(TBB);
2483
2484 finishCondBranch(BI->getParent(), TBB, FBB);
2485 return true;
2486 }
2487 }
2488
2489 Register CondReg = getRegForValue(BI->getCondition());
2490 if (CondReg == 0)
2491 return false;
2492
2493 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2494 unsigned Opcode = AArch64::TBNZW;
2495 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2496 std::swap(TBB, FBB);
2497 Opcode = AArch64::TBZW;
2498 }
2499
2500 const MCInstrDesc &II = TII.get(Opcode);
2501 Register ConstrainedCondReg
2502 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2503 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2504 .addReg(ConstrainedCondReg)
2505 .addImm(0)
2506 .addMBB(TBB);
2507
2508 finishCondBranch(BI->getParent(), TBB, FBB);
2509 return true;
2510}
2511
2512bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2513 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2514 Register AddrReg = getRegForValue(BI->getOperand(0));
2515 if (AddrReg == 0)
2516 return false;
2517
2518 // Emit the indirect branch.
2519 const MCInstrDesc &II = TII.get(AArch64::BR);
2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2522
2523 // Make sure the CFG is up-to-date.
2524 for (const auto *Succ : BI->successors())
2525 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2526
2527 return true;
2528}
2529
2530bool AArch64FastISel::selectCmp(const Instruction *I) {
2531 const CmpInst *CI = cast<CmpInst>(I);
2532
2533 // Vectors of i1 are weird: bail out.
2534 if (CI->getType()->isVectorTy())
2535 return false;
2536
2537 // Try to optimize or fold the cmp.
2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539 unsigned ResultReg = 0;
2540 switch (Predicate) {
2541 default:
2542 break;
2544 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546 TII.get(TargetOpcode::COPY), ResultReg)
2547 .addReg(AArch64::WZR, getKillRegState(true));
2548 break;
2549 case CmpInst::FCMP_TRUE:
2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551 break;
2552 }
2553
2554 if (ResultReg) {
2555 updateValueMap(I, ResultReg);
2556 return true;
2557 }
2558
2559 // Emit the cmp.
2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2561 return false;
2562
2563 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566 // condition codes are inverted, because they are used by CSINC.
2567 static unsigned CondCodeTable[2][2] = {
2570 };
2571 unsigned *CondCodes = nullptr;
2572 switch (Predicate) {
2573 default:
2574 break;
2575 case CmpInst::FCMP_UEQ:
2576 CondCodes = &CondCodeTable[0][0];
2577 break;
2578 case CmpInst::FCMP_ONE:
2579 CondCodes = &CondCodeTable[1][0];
2580 break;
2581 }
2582
2583 if (CondCodes) {
2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 TmpReg1)
2587 .addReg(AArch64::WZR, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[0]);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 ResultReg)
2592 .addReg(TmpReg1, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[1]);
2595
2596 updateValueMap(I, ResultReg);
2597 return true;
2598 }
2599
2600 // Now set a register based on the comparison.
2601 AArch64CC::CondCode CC = getCompareCC(Predicate);
2602 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605 ResultReg)
2606 .addReg(AArch64::WZR, getKillRegState(true))
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addImm(invertedCC);
2609
2610 updateValueMap(I, ResultReg);
2611 return true;
2612}
2613
2614/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615/// value.
2616bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617 if (!SI->getType()->isIntegerTy(1))
2618 return false;
2619
2620 const Value *Src1Val, *Src2Val;
2621 unsigned Opc = 0;
2622 bool NeedExtraOp = false;
2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2624 if (CI->isOne()) {
2625 Src1Val = SI->getCondition();
2626 Src2Val = SI->getFalseValue();
2627 Opc = AArch64::ORRWrr;
2628 } else {
2629 assert(CI->isZero());
2630 Src1Val = SI->getFalseValue();
2631 Src2Val = SI->getCondition();
2632 Opc = AArch64::BICWrr;
2633 }
2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2635 if (CI->isOne()) {
2636 Src1Val = SI->getCondition();
2637 Src2Val = SI->getTrueValue();
2638 Opc = AArch64::ORRWrr;
2639 NeedExtraOp = true;
2640 } else {
2641 assert(CI->isZero());
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ANDWrr;
2645 }
2646 }
2647
2648 if (!Opc)
2649 return false;
2650
2651 Register Src1Reg = getRegForValue(Src1Val);
2652 if (!Src1Reg)
2653 return false;
2654
2655 Register Src2Reg = getRegForValue(Src2Val);
2656 if (!Src2Reg)
2657 return false;
2658
2659 if (NeedExtraOp)
2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661
2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663 Src2Reg);
2664 updateValueMap(SI, ResultReg);
2665 return true;
2666}
2667
2668bool AArch64FastISel::selectSelect(const Instruction *I) {
2669 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670 MVT VT;
2671 if (!isTypeSupported(I->getType(), VT))
2672 return false;
2673
2674 unsigned Opc;
2675 const TargetRegisterClass *RC;
2676 switch (VT.SimpleTy) {
2677 default:
2678 return false;
2679 case MVT::i1:
2680 case MVT::i8:
2681 case MVT::i16:
2682 case MVT::i32:
2683 Opc = AArch64::CSELWr;
2684 RC = &AArch64::GPR32RegClass;
2685 break;
2686 case MVT::i64:
2687 Opc = AArch64::CSELXr;
2688 RC = &AArch64::GPR64RegClass;
2689 break;
2690 case MVT::f32:
2691 Opc = AArch64::FCSELSrrr;
2692 RC = &AArch64::FPR32RegClass;
2693 break;
2694 case MVT::f64:
2695 Opc = AArch64::FCSELDrrr;
2696 RC = &AArch64::FPR64RegClass;
2697 break;
2698 }
2699
2700 const SelectInst *SI = cast<SelectInst>(I);
2701 const Value *Cond = SI->getCondition();
2704
2705 if (optimizeSelect(SI))
2706 return true;
2707
2708 // Try to pickup the flags, so we don't have to emit another compare.
2709 if (foldXALUIntrinsic(CC, I, Cond)) {
2710 // Fake request the condition to force emission of the XALU intrinsic.
2711 Register CondReg = getRegForValue(Cond);
2712 if (!CondReg)
2713 return false;
2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2715 isValueAvailable(Cond)) {
2716 const auto *Cmp = cast<CmpInst>(Cond);
2717 // Try to optimize or fold the cmp.
2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2719 const Value *FoldSelect = nullptr;
2720 switch (Predicate) {
2721 default:
2722 break;
2724 FoldSelect = SI->getFalseValue();
2725 break;
2726 case CmpInst::FCMP_TRUE:
2727 FoldSelect = SI->getTrueValue();
2728 break;
2729 }
2730
2731 if (FoldSelect) {
2732 Register SrcReg = getRegForValue(FoldSelect);
2733 if (!SrcReg)
2734 return false;
2735
2736 updateValueMap(I, SrcReg);
2737 return true;
2738 }
2739
2740 // Emit the cmp.
2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742 return false;
2743
2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745 CC = getCompareCC(Predicate);
2746 switch (Predicate) {
2747 default:
2748 break;
2749 case CmpInst::FCMP_UEQ:
2750 ExtraCC = AArch64CC::EQ;
2751 CC = AArch64CC::VS;
2752 break;
2753 case CmpInst::FCMP_ONE:
2754 ExtraCC = AArch64CC::MI;
2755 CC = AArch64CC::GT;
2756 break;
2757 }
2758 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759 } else {
2760 Register CondReg = getRegForValue(Cond);
2761 if (!CondReg)
2762 return false;
2763
2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765 CondReg = constrainOperandRegClass(II, CondReg, 1);
2766
2767 // Emit a TST instruction (ANDS wzr, reg, #imm).
2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769 AArch64::WZR)
2770 .addReg(CondReg)
2772 }
2773
2774 Register Src1Reg = getRegForValue(SI->getTrueValue());
2775 Register Src2Reg = getRegForValue(SI->getFalseValue());
2776
2777 if (!Src1Reg || !Src2Reg)
2778 return false;
2779
2780 if (ExtraCC != AArch64CC::AL)
2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2782
2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2784 updateValueMap(I, ResultReg);
2785 return true;
2786}
2787
2788bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789 Value *V = I->getOperand(0);
2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791 return false;
2792
2793 Register Op = getRegForValue(V);
2794 if (Op == 0)
2795 return false;
2796
2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799 ResultReg).addReg(Op);
2800 updateValueMap(I, ResultReg);
2801 return true;
2802}
2803
2804bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805 Value *V = I->getOperand(0);
2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807 return false;
2808
2809 Register Op = getRegForValue(V);
2810 if (Op == 0)
2811 return false;
2812
2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815 ResultReg).addReg(Op);
2816 updateValueMap(I, ResultReg);
2817 return true;
2818}
2819
2820// FPToUI and FPToSI
2821bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822 MVT DestVT;
2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2824 return false;
2825
2826 Register SrcReg = getRegForValue(I->getOperand(0));
2827 if (SrcReg == 0)
2828 return false;
2829
2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2832 return false;
2833
2834 unsigned Opc;
2835 if (SrcVT == MVT::f64) {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840 } else {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845 }
2846 Register ResultReg = createResultReg(
2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2849 .addReg(SrcReg);
2850 updateValueMap(I, ResultReg);
2851 return true;
2852}
2853
2854bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855 MVT DestVT;
2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2857 return false;
2858 // Let regular ISEL handle FP16
2859 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2860 return false;
2861
2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863 "Unexpected value type.");
2864
2865 Register SrcReg = getRegForValue(I->getOperand(0));
2866 if (!SrcReg)
2867 return false;
2868
2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2870
2871 // Handle sign-extension.
2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873 SrcReg =
2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875 if (!SrcReg)
2876 return false;
2877 }
2878
2879 unsigned Opc;
2880 if (SrcVT == MVT::i64) {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885 } else {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890 }
2891
2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2893 updateValueMap(I, ResultReg);
2894 return true;
2895}
2896
2897bool AArch64FastISel::fastLowerArguments() {
2898 if (!FuncInfo.CanLowerReturn)
2899 return false;
2900
2901 const Function *F = FuncInfo.Fn;
2902 if (F->isVarArg())
2903 return false;
2904
2905 CallingConv::ID CC = F->getCallingConv();
2907 return false;
2908
2909 if (Subtarget->hasCustomCallingConv())
2910 return false;
2911
2912 // Only handle simple cases of up to 8 GPR and FPR each.
2913 unsigned GPRCnt = 0;
2914 unsigned FPRCnt = 0;
2915 for (auto const &Arg : F->args()) {
2916 if (Arg.hasAttribute(Attribute::ByVal) ||
2917 Arg.hasAttribute(Attribute::InReg) ||
2918 Arg.hasAttribute(Attribute::StructRet) ||
2919 Arg.hasAttribute(Attribute::SwiftSelf) ||
2920 Arg.hasAttribute(Attribute::SwiftAsync) ||
2921 Arg.hasAttribute(Attribute::SwiftError) ||
2922 Arg.hasAttribute(Attribute::Nest))
2923 return false;
2924
2925 Type *ArgTy = Arg.getType();
2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927 return false;
2928
2929 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2930 if (!ArgVT.isSimple())
2931 return false;
2932
2933 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935 return false;
2936
2937 if (VT.isVector() &&
2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939 return false;
2940
2941 if (VT >= MVT::i1 && VT <= MVT::i64)
2942 ++GPRCnt;
2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944 VT.is128BitVector())
2945 ++FPRCnt;
2946 else
2947 return false;
2948
2949 if (GPRCnt > 8 || FPRCnt > 8)
2950 return false;
2951 }
2952
2953 static const MCPhysReg Registers[6][8] = {
2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955 AArch64::W5, AArch64::W6, AArch64::W7 },
2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957 AArch64::X5, AArch64::X6, AArch64::X7 },
2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959 AArch64::H5, AArch64::H6, AArch64::H7 },
2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961 AArch64::S5, AArch64::S6, AArch64::S7 },
2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963 AArch64::D5, AArch64::D6, AArch64::D7 },
2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966 };
2967
2968 unsigned GPRIdx = 0;
2969 unsigned FPRIdx = 0;
2970 for (auto const &Arg : F->args()) {
2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2972 unsigned SrcReg;
2973 const TargetRegisterClass *RC;
2974 if (VT >= MVT::i1 && VT <= MVT::i32) {
2975 SrcReg = Registers[0][GPRIdx++];
2976 RC = &AArch64::GPR32RegClass;
2977 VT = MVT::i32;
2978 } else if (VT == MVT::i64) {
2979 SrcReg = Registers[1][GPRIdx++];
2980 RC = &AArch64::GPR64RegClass;
2981 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2982 SrcReg = Registers[2][FPRIdx++];
2983 RC = &AArch64::FPR16RegClass;
2984 } else if (VT == MVT::f32) {
2985 SrcReg = Registers[3][FPRIdx++];
2986 RC = &AArch64::FPR32RegClass;
2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988 SrcReg = Registers[4][FPRIdx++];
2989 RC = &AArch64::FPR64RegClass;
2990 } else if (VT.is128BitVector()) {
2991 SrcReg = Registers[5][FPRIdx++];
2992 RC = &AArch64::FPR128RegClass;
2993 } else
2994 llvm_unreachable("Unexpected value type.");
2995
2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998 // Without this, EmitLiveInCopies may eliminate the livein if its only
2999 // use is a bitcast (which isn't turned into an instruction).
3000 Register ResultReg = createResultReg(RC);
3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3002 TII.get(TargetOpcode::COPY), ResultReg)
3003 .addReg(DstReg, getKillRegState(true));
3004 updateValueMap(&Arg, ResultReg);
3005 }
3006 return true;
3007}
3008
3009bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010 SmallVectorImpl<MVT> &OutVTs,
3011 unsigned &NumBytes) {
3012 CallingConv::ID CC = CLI.CallConv;
3014 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3015 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3016
3017 // Get a count of how many bytes are to be pushed on the stack.
3018 NumBytes = CCInfo.getStackSize();
3019
3020 // Issue CALLSEQ_START
3021 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3023 .addImm(NumBytes).addImm(0);
3024
3025 // Process the args.
3026 for (CCValAssign &VA : ArgLocs) {
3027 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3028 MVT ArgVT = OutVTs[VA.getValNo()];
3029
3030 Register ArgReg = getRegForValue(ArgVal);
3031 if (!ArgReg)
3032 return false;
3033
3034 // Handle arg promotion: SExt, ZExt, AExt.
3035 switch (VA.getLocInfo()) {
3036 case CCValAssign::Full:
3037 break;
3038 case CCValAssign::SExt: {
3039 MVT DestVT = VA.getLocVT();
3040 MVT SrcVT = ArgVT;
3041 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3042 if (!ArgReg)
3043 return false;
3044 break;
3045 }
3046 case CCValAssign::AExt:
3047 // Intentional fall-through.
3048 case CCValAssign::ZExt: {
3049 MVT DestVT = VA.getLocVT();
3050 MVT SrcVT = ArgVT;
3051 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3052 if (!ArgReg)
3053 return false;
3054 break;
3055 }
3056 default:
3057 llvm_unreachable("Unknown arg promotion!");
3058 }
3059
3060 // Now copy/store arg to correct locations.
3061 if (VA.isRegLoc() && !VA.needsCustom()) {
3062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3063 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3064 CLI.OutRegs.push_back(VA.getLocReg());
3065 } else if (VA.needsCustom()) {
3066 // FIXME: Handle custom args.
3067 return false;
3068 } else {
3069 assert(VA.isMemLoc() && "Assuming store on stack.");
3070
3071 // Don't emit stores for undef values.
3072 if (isa<UndefValue>(ArgVal))
3073 continue;
3074
3075 // Need to store on the stack.
3076 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3077
3078 unsigned BEAlign = 0;
3079 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3080 BEAlign = 8 - ArgSize;
3081
3082 Address Addr;
3083 Addr.setKind(Address::RegBase);
3084 Addr.setReg(AArch64::SP);
3085 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3086
3087 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3088 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3089 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3090 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3091
3092 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3093 return false;
3094 }
3095 }
3096 return true;
3097}
3098
3099bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3100 CallingConv::ID CC = CLI.CallConv;
3101
3102 // Issue CALLSEQ_END
3103 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3105 .addImm(NumBytes).addImm(0);
3106
3107 // Now the return values.
3109 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3110 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3111
3112 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3113 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3114 CCValAssign &VA = RVLocs[i];
3115 MVT CopyVT = VA.getValVT();
3116 unsigned CopyReg = ResultReg + i;
3117
3118 // TODO: Handle big-endian results
3119 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3120 return false;
3121
3122 // Copy result out of their specified physreg.
3123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3124 CopyReg)
3125 .addReg(VA.getLocReg());
3126 CLI.InRegs.push_back(VA.getLocReg());
3127 }
3128
3129 CLI.ResultReg = ResultReg;
3130 CLI.NumResultRegs = RVLocs.size();
3131
3132 return true;
3133}
3134
3135bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3136 CallingConv::ID CC = CLI.CallConv;
3137 bool IsTailCall = CLI.IsTailCall;
3138 bool IsVarArg = CLI.IsVarArg;
3139 const Value *Callee = CLI.Callee;
3140 MCSymbol *Symbol = CLI.Symbol;
3141
3142 if (!Callee && !Symbol)
3143 return false;
3144
3145 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3146 // a bti instruction following the call.
3147 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3148 !Subtarget->noBTIAtReturnTwice() &&
3150 return false;
3151
3152 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3153 if (CLI.CB && CLI.CB->isIndirectCall() &&
3154 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle tail calls.
3158 if (IsTailCall)
3159 return false;
3160
3161 // FIXME: we could and should support this, but for now correctness at -O0 is
3162 // more important.
3163 if (Subtarget->isTargetILP32())
3164 return false;
3165
3166 CodeModel::Model CM = TM.getCodeModel();
3167 // Only support the small-addressing and large code models.
3168 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3169 return false;
3170
3171 // FIXME: Add large code model support for ELF.
3172 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3173 return false;
3174
3175 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3176 // attribute. Check "RtLibUseGOT" instead.
3177 if (MF->getFunction().getParent()->getRtLibUseGOT())
3178 return false;
3179
3180 // Let SDISel handle vararg functions.
3181 if (IsVarArg)
3182 return false;
3183
3184 if (Subtarget->isWindowsArm64EC())
3185 return false;
3186
3187 for (auto Flag : CLI.OutFlags)
3188 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3189 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3190 return false;
3191
3192 // Set up the argument vectors.
3193 SmallVector<MVT, 16> OutVTs;
3194 OutVTs.reserve(CLI.OutVals.size());
3195
3196 for (auto *Val : CLI.OutVals) {
3197 MVT VT;
3198 if (!isTypeLegal(Val->getType(), VT) &&
3199 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3200 return false;
3201
3202 // We don't handle vector parameters yet.
3203 if (VT.isVector() || VT.getSizeInBits() > 64)
3204 return false;
3205
3206 OutVTs.push_back(VT);
3207 }
3208
3209 Address Addr;
3210 if (Callee && !computeCallAddress(Callee, Addr))
3211 return false;
3212
3213 // The weak function target may be zero; in that case we must use indirect
3214 // addressing via a stub on windows as it may be out of range for a
3215 // PC-relative jump.
3216 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3217 Addr.getGlobalValue()->hasExternalWeakLinkage())
3218 return false;
3219
3220 // Handle the arguments now that we've gotten them.
3221 unsigned NumBytes;
3222 if (!processCallArgs(CLI, OutVTs, NumBytes))
3223 return false;
3224
3225 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3226 if (RegInfo->isAnyArgRegReserved(*MF))
3227 RegInfo->emitReservedArgRegCallError(*MF);
3228
3229 // Issue the call.
3231 if (Subtarget->useSmallAddressing()) {
3232 const MCInstrDesc &II =
3233 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3234 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3235 if (Symbol)
3236 MIB.addSym(Symbol, 0);
3237 else if (Addr.getGlobalValue())
3238 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3239 else if (Addr.getReg()) {
3240 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3241 MIB.addReg(Reg);
3242 } else
3243 return false;
3244 } else {
3245 unsigned CallReg = 0;
3246 if (Symbol) {
3247 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3249 ADRPReg)
3251
3252 CallReg = createResultReg(&AArch64::GPR64RegClass);
3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3254 TII.get(AArch64::LDRXui), CallReg)
3255 .addReg(ADRPReg)
3256 .addSym(Symbol,
3258 } else if (Addr.getGlobalValue())
3259 CallReg = materializeGV(Addr.getGlobalValue());
3260 else if (Addr.getReg())
3261 CallReg = Addr.getReg();
3262
3263 if (!CallReg)
3264 return false;
3265
3266 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3267 CallReg = constrainOperandRegClass(II, CallReg, 0);
3268 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3269 }
3270
3271 // Add implicit physical register uses to the call.
3272 for (auto Reg : CLI.OutRegs)
3273 MIB.addReg(Reg, RegState::Implicit);
3274
3275 // Add a register mask with the call-preserved registers.
3276 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3277 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3278
3279 CLI.Call = MIB;
3280
3281 // Finish off the call including any return values.
3282 return finishCall(CLI, NumBytes);
3283}
3284
3285bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3286 if (Alignment)
3287 return Len / Alignment->value() <= 4;
3288 else
3289 return Len < 32;
3290}
3291
3292bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3293 uint64_t Len, MaybeAlign Alignment) {
3294 // Make sure we don't bloat code by inlining very large memcpy's.
3295 if (!isMemCpySmall(Len, Alignment))
3296 return false;
3297
3298 int64_t UnscaledOffset = 0;
3299 Address OrigDest = Dest;
3300 Address OrigSrc = Src;
3301
3302 while (Len) {
3303 MVT VT;
3304 if (!Alignment || *Alignment >= 8) {
3305 if (Len >= 8)
3306 VT = MVT::i64;
3307 else if (Len >= 4)
3308 VT = MVT::i32;
3309 else if (Len >= 2)
3310 VT = MVT::i16;
3311 else {
3312 VT = MVT::i8;
3313 }
3314 } else {
3315 assert(Alignment && "Alignment is set in this branch");
3316 // Bound based on alignment.
3317 if (Len >= 4 && *Alignment == 4)
3318 VT = MVT::i32;
3319 else if (Len >= 2 && *Alignment == 2)
3320 VT = MVT::i16;
3321 else {
3322 VT = MVT::i8;
3323 }
3324 }
3325
3326 unsigned ResultReg = emitLoad(VT, VT, Src);
3327 if (!ResultReg)
3328 return false;
3329
3330 if (!emitStore(VT, ResultReg, Dest))
3331 return false;
3332
3333 int64_t Size = VT.getSizeInBits() / 8;
3334 Len -= Size;
3335 UnscaledOffset += Size;
3336
3337 // We need to recompute the unscaled offset for each iteration.
3338 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3339 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3340 }
3341
3342 return true;
3343}
3344
3345/// Check if it is possible to fold the condition from the XALU intrinsic
3346/// into the user. The condition code will only be updated on success.
3347bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3348 const Instruction *I,
3349 const Value *Cond) {
3350 if (!isa<ExtractValueInst>(Cond))
3351 return false;
3352
3353 const auto *EV = cast<ExtractValueInst>(Cond);
3354 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3355 return false;
3356
3357 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3358 MVT RetVT;
3359 const Function *Callee = II->getCalledFunction();
3360 Type *RetTy =
3361 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3362 if (!isTypeLegal(RetTy, RetVT))
3363 return false;
3364
3365 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3366 return false;
3367
3368 const Value *LHS = II->getArgOperand(0);
3369 const Value *RHS = II->getArgOperand(1);
3370
3371 // Canonicalize immediate to the RHS.
3372 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3373 std::swap(LHS, RHS);
3374
3375 // Simplify multiplies.
3376 Intrinsic::ID IID = II->getIntrinsicID();
3377 switch (IID) {
3378 default:
3379 break;
3380 case Intrinsic::smul_with_overflow:
3381 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382 if (C->getValue() == 2)
3383 IID = Intrinsic::sadd_with_overflow;
3384 break;
3385 case Intrinsic::umul_with_overflow:
3386 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387 if (C->getValue() == 2)
3388 IID = Intrinsic::uadd_with_overflow;
3389 break;
3390 }
3391
3392 AArch64CC::CondCode TmpCC;
3393 switch (IID) {
3394 default:
3395 return false;
3396 case Intrinsic::sadd_with_overflow:
3397 case Intrinsic::ssub_with_overflow:
3398 TmpCC = AArch64CC::VS;
3399 break;
3400 case Intrinsic::uadd_with_overflow:
3401 TmpCC = AArch64CC::HS;
3402 break;
3403 case Intrinsic::usub_with_overflow:
3404 TmpCC = AArch64CC::LO;
3405 break;
3406 case Intrinsic::smul_with_overflow:
3407 case Intrinsic::umul_with_overflow:
3408 TmpCC = AArch64CC::NE;
3409 break;
3410 }
3411
3412 // Check if both instructions are in the same basic block.
3413 if (!isValueAvailable(II))
3414 return false;
3415
3416 // Make sure nothing is in the way
3419 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3420 // We only expect extractvalue instructions between the intrinsic and the
3421 // instruction to be selected.
3422 if (!isa<ExtractValueInst>(Itr))
3423 return false;
3424
3425 // Check that the extractvalue operand comes from the intrinsic.
3426 const auto *EVI = cast<ExtractValueInst>(Itr);
3427 if (EVI->getAggregateOperand() != II)
3428 return false;
3429 }
3430
3431 CC = TmpCC;
3432 return true;
3433}
3434
3435bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3436 // FIXME: Handle more intrinsics.
3437 switch (II->getIntrinsicID()) {
3438 default: return false;
3439 case Intrinsic::frameaddress: {
3440 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3441 MFI.setFrameAddressIsTaken(true);
3442
3443 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3444 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3445 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3447 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3448 // Recursively load frame address
3449 // ldr x0, [fp]
3450 // ldr x0, [x0]
3451 // ldr x0, [x0]
3452 // ...
3453 unsigned DestReg;
3454 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3455 while (Depth--) {
3456 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3457 SrcReg, 0);
3458 assert(DestReg && "Unexpected LDR instruction emission failure.");
3459 SrcReg = DestReg;
3460 }
3461
3462 updateValueMap(II, SrcReg);
3463 return true;
3464 }
3465 case Intrinsic::sponentry: {
3466 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3467
3468 // SP = FP + Fixed Object + 16
3469 int FI = MFI.CreateFixedObject(4, 0, false);
3470 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3472 TII.get(AArch64::ADDXri), ResultReg)
3473 .addFrameIndex(FI)
3474 .addImm(0)
3475 .addImm(0);
3476
3477 updateValueMap(II, ResultReg);
3478 return true;
3479 }
3480 case Intrinsic::memcpy:
3481 case Intrinsic::memmove: {
3482 const auto *MTI = cast<MemTransferInst>(II);
3483 // Don't handle volatile.
3484 if (MTI->isVolatile())
3485 return false;
3486
3487 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3488 // we would emit dead code because we don't currently handle memmoves.
3489 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3490 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3491 // Small memcpy's are common enough that we want to do them without a call
3492 // if possible.
3493 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3494 MaybeAlign Alignment;
3495 if (MTI->getDestAlign() || MTI->getSourceAlign())
3496 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3497 MTI->getSourceAlign().valueOrOne());
3498 if (isMemCpySmall(Len, Alignment)) {
3499 Address Dest, Src;
3500 if (!computeAddress(MTI->getRawDest(), Dest) ||
3501 !computeAddress(MTI->getRawSource(), Src))
3502 return false;
3503 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3504 return true;
3505 }
3506 }
3507
3508 if (!MTI->getLength()->getType()->isIntegerTy(64))
3509 return false;
3510
3511 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3512 // Fast instruction selection doesn't support the special
3513 // address spaces.
3514 return false;
3515
3516 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3517 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3518 }
3519 case Intrinsic::memset: {
3520 const MemSetInst *MSI = cast<MemSetInst>(II);
3521 // Don't handle volatile.
3522 if (MSI->isVolatile())
3523 return false;
3524
3525 if (!MSI->getLength()->getType()->isIntegerTy(64))
3526 return false;
3527
3528 if (MSI->getDestAddressSpace() > 255)
3529 // Fast instruction selection doesn't support the special
3530 // address spaces.
3531 return false;
3532
3533 return lowerCallTo(II, "memset", II->arg_size() - 1);
3534 }
3535 case Intrinsic::sin:
3536 case Intrinsic::cos:
3537 case Intrinsic::pow: {
3538 MVT RetVT;
3539 if (!isTypeLegal(II->getType(), RetVT))
3540 return false;
3541
3542 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3543 return false;
3544
3545 static const RTLIB::Libcall LibCallTable[3][2] = {
3546 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3547 { RTLIB::COS_F32, RTLIB::COS_F64 },
3548 { RTLIB::POW_F32, RTLIB::POW_F64 }
3549 };
3550 RTLIB::Libcall LC;
3551 bool Is64Bit = RetVT == MVT::f64;
3552 switch (II->getIntrinsicID()) {
3553 default:
3554 llvm_unreachable("Unexpected intrinsic.");
3555 case Intrinsic::sin:
3556 LC = LibCallTable[0][Is64Bit];
3557 break;
3558 case Intrinsic::cos:
3559 LC = LibCallTable[1][Is64Bit];
3560 break;
3561 case Intrinsic::pow:
3562 LC = LibCallTable[2][Is64Bit];
3563 break;
3564 }
3565
3566 ArgListTy Args;
3567 Args.reserve(II->arg_size());
3568
3569 // Populate the argument list.
3570 for (auto &Arg : II->args()) {
3571 ArgListEntry Entry;
3572 Entry.Val = Arg;
3573 Entry.Ty = Arg->getType();
3574 Args.push_back(Entry);
3575 }
3576
3577 CallLoweringInfo CLI;
3578 MCContext &Ctx = MF->getContext();
3579 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3580 TLI.getLibcallName(LC), std::move(Args));
3581 if (!lowerCallTo(CLI))
3582 return false;
3583 updateValueMap(II, CLI.ResultReg);
3584 return true;
3585 }
3586 case Intrinsic::fabs: {
3587 MVT VT;
3588 if (!isTypeLegal(II->getType(), VT))
3589 return false;
3590
3591 unsigned Opc;
3592 switch (VT.SimpleTy) {
3593 default:
3594 return false;
3595 case MVT::f32:
3596 Opc = AArch64::FABSSr;
3597 break;
3598 case MVT::f64:
3599 Opc = AArch64::FABSDr;
3600 break;
3601 }
3602 Register SrcReg = getRegForValue(II->getOperand(0));
3603 if (!SrcReg)
3604 return false;
3605 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3606 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3607 .addReg(SrcReg);
3608 updateValueMap(II, ResultReg);
3609 return true;
3610 }
3611 case Intrinsic::trap:
3612 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3613 .addImm(1);
3614 return true;
3615 case Intrinsic::debugtrap:
3616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3617 .addImm(0xF000);
3618 return true;
3619
3620 case Intrinsic::sqrt: {
3622
3623 MVT VT;
3624 if (!isTypeLegal(RetTy, VT))
3625 return false;
3626
3627 Register Op0Reg = getRegForValue(II->getOperand(0));
3628 if (!Op0Reg)
3629 return false;
3630
3631 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3632 if (!ResultReg)
3633 return false;
3634
3635 updateValueMap(II, ResultReg);
3636 return true;
3637 }
3638 case Intrinsic::sadd_with_overflow:
3639 case Intrinsic::uadd_with_overflow:
3640 case Intrinsic::ssub_with_overflow:
3641 case Intrinsic::usub_with_overflow:
3642 case Intrinsic::smul_with_overflow:
3643 case Intrinsic::umul_with_overflow: {
3644 // This implements the basic lowering of the xalu with overflow intrinsics.
3645 const Function *Callee = II->getCalledFunction();
3646 auto *Ty = cast<StructType>(Callee->getReturnType());
3647 Type *RetTy = Ty->getTypeAtIndex(0U);
3648
3649 MVT VT;
3650 if (!isTypeLegal(RetTy, VT))
3651 return false;
3652
3653 if (VT != MVT::i32 && VT != MVT::i64)
3654 return false;
3655
3656 const Value *LHS = II->getArgOperand(0);
3657 const Value *RHS = II->getArgOperand(1);
3658 // Canonicalize immediate to the RHS.
3659 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3660 std::swap(LHS, RHS);
3661
3662 // Simplify multiplies.
3663 Intrinsic::ID IID = II->getIntrinsicID();
3664 switch (IID) {
3665 default:
3666 break;
3667 case Intrinsic::smul_with_overflow:
3668 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3669 if (C->getValue() == 2) {
3670 IID = Intrinsic::sadd_with_overflow;
3671 RHS = LHS;
3672 }
3673 break;
3674 case Intrinsic::umul_with_overflow:
3675 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3676 if (C->getValue() == 2) {
3677 IID = Intrinsic::uadd_with_overflow;
3678 RHS = LHS;
3679 }
3680 break;
3681 }
3682
3683 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3685 switch (IID) {
3686 default: llvm_unreachable("Unexpected intrinsic!");
3687 case Intrinsic::sadd_with_overflow:
3688 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3689 CC = AArch64CC::VS;
3690 break;
3691 case Intrinsic::uadd_with_overflow:
3692 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3693 CC = AArch64CC::HS;
3694 break;
3695 case Intrinsic::ssub_with_overflow:
3696 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3697 CC = AArch64CC::VS;
3698 break;
3699 case Intrinsic::usub_with_overflow:
3700 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::LO;
3702 break;
3703 case Intrinsic::smul_with_overflow: {
3704 CC = AArch64CC::NE;
3705 Register LHSReg = getRegForValue(LHS);
3706 if (!LHSReg)
3707 return false;
3708
3709 Register RHSReg = getRegForValue(RHS);
3710 if (!RHSReg)
3711 return false;
3712
3713 if (VT == MVT::i32) {
3714 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3715 Register MulSubReg =
3716 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3717 // cmp xreg, wreg, sxtw
3718 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3719 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3720 /*WantResult=*/false);
3721 MulReg = MulSubReg;
3722 } else {
3723 assert(VT == MVT::i64 && "Unexpected value type.");
3724 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3725 // reused in the next instruction.
3726 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3727 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3728 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3729 /*WantResult=*/false);
3730 }
3731 break;
3732 }
3733 case Intrinsic::umul_with_overflow: {
3734 CC = AArch64CC::NE;
3735 Register LHSReg = getRegForValue(LHS);
3736 if (!LHSReg)
3737 return false;
3738
3739 Register RHSReg = getRegForValue(RHS);
3740 if (!RHSReg)
3741 return false;
3742
3743 if (VT == MVT::i32) {
3744 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3745 // tst xreg, #0xffffffff00000000
3746 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3747 TII.get(AArch64::ANDSXri), AArch64::XZR)
3748 .addReg(MulReg)
3749 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3750 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3751 } else {
3752 assert(VT == MVT::i64 && "Unexpected value type.");
3753 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3754 // reused in the next instruction.
3755 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3756 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3757 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3758 }
3759 break;
3760 }
3761 }
3762
3763 if (MulReg) {
3764 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3765 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3766 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3767 }
3768
3769 if (!ResultReg1)
3770 return false;
3771
3772 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3773 AArch64::WZR, AArch64::WZR,
3774 getInvertedCondCode(CC));
3775 (void)ResultReg2;
3776 assert((ResultReg1 + 1) == ResultReg2 &&
3777 "Nonconsecutive result registers.");
3778 updateValueMap(II, ResultReg1, 2);
3779 return true;
3780 }
3781 case Intrinsic::aarch64_crc32b:
3782 case Intrinsic::aarch64_crc32h:
3783 case Intrinsic::aarch64_crc32w:
3784 case Intrinsic::aarch64_crc32x:
3785 case Intrinsic::aarch64_crc32cb:
3786 case Intrinsic::aarch64_crc32ch:
3787 case Intrinsic::aarch64_crc32cw:
3788 case Intrinsic::aarch64_crc32cx: {
3789 if (!Subtarget->hasCRC())
3790 return false;
3791
3792 unsigned Opc;
3793 switch (II->getIntrinsicID()) {
3794 default:
3795 llvm_unreachable("Unexpected intrinsic!");
3796 case Intrinsic::aarch64_crc32b:
3797 Opc = AArch64::CRC32Brr;
3798 break;
3799 case Intrinsic::aarch64_crc32h:
3800 Opc = AArch64::CRC32Hrr;
3801 break;
3802 case Intrinsic::aarch64_crc32w:
3803 Opc = AArch64::CRC32Wrr;
3804 break;
3805 case Intrinsic::aarch64_crc32x:
3806 Opc = AArch64::CRC32Xrr;
3807 break;
3808 case Intrinsic::aarch64_crc32cb:
3809 Opc = AArch64::CRC32CBrr;
3810 break;
3811 case Intrinsic::aarch64_crc32ch:
3812 Opc = AArch64::CRC32CHrr;
3813 break;
3814 case Intrinsic::aarch64_crc32cw:
3815 Opc = AArch64::CRC32CWrr;
3816 break;
3817 case Intrinsic::aarch64_crc32cx:
3818 Opc = AArch64::CRC32CXrr;
3819 break;
3820 }
3821
3822 Register LHSReg = getRegForValue(II->getArgOperand(0));
3823 Register RHSReg = getRegForValue(II->getArgOperand(1));
3824 if (!LHSReg || !RHSReg)
3825 return false;
3826
3827 Register ResultReg =
3828 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3829 updateValueMap(II, ResultReg);
3830 return true;
3831 }
3832 }
3833 return false;
3834}
3835
3836bool AArch64FastISel::selectRet(const Instruction *I) {
3837 const ReturnInst *Ret = cast<ReturnInst>(I);
3838 const Function &F = *I->getParent()->getParent();
3839
3840 if (!FuncInfo.CanLowerReturn)
3841 return false;
3842
3843 if (F.isVarArg())
3844 return false;
3845
3846 if (TLI.supportSwiftError() &&
3847 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3848 return false;
3849
3850 if (TLI.supportSplitCSR(FuncInfo.MF))
3851 return false;
3852
3853 // Build a list of return value registers.
3855
3856 if (Ret->getNumOperands() > 0) {
3857 CallingConv::ID CC = F.getCallingConv();
3859 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3860
3861 // Analyze operands of the call, assigning locations to each operand.
3863 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3864 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3865
3866 // Only handle a single return value for now.
3867 if (ValLocs.size() != 1)
3868 return false;
3869
3870 CCValAssign &VA = ValLocs[0];
3871 const Value *RV = Ret->getOperand(0);
3872
3873 // Don't bother handling odd stuff for now.
3874 if ((VA.getLocInfo() != CCValAssign::Full) &&
3875 (VA.getLocInfo() != CCValAssign::BCvt))
3876 return false;
3877
3878 // Only handle register returns for now.
3879 if (!VA.isRegLoc())
3880 return false;
3881
3882 Register Reg = getRegForValue(RV);
3883 if (Reg == 0)
3884 return false;
3885
3886 unsigned SrcReg = Reg + VA.getValNo();
3887 Register DestReg = VA.getLocReg();
3888 // Avoid a cross-class copy. This is very unlikely.
3889 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3890 return false;
3891
3892 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3893 if (!RVEVT.isSimple())
3894 return false;
3895
3896 // Vectors (of > 1 lane) in big endian need tricky handling.
3897 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3898 !Subtarget->isLittleEndian())
3899 return false;
3900
3901 MVT RVVT = RVEVT.getSimpleVT();
3902 if (RVVT == MVT::f128)
3903 return false;
3904
3905 MVT DestVT = VA.getValVT();
3906 // Special handling for extended integers.
3907 if (RVVT != DestVT) {
3908 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3909 return false;
3910
3911 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3912 return false;
3913
3914 bool IsZExt = Outs[0].Flags.isZExt();
3915 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3916 if (SrcReg == 0)
3917 return false;
3918 }
3919
3920 // "Callee" (i.e. value producer) zero extends pointers at function
3921 // boundary.
3922 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3923 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3924
3925 // Make the copy.
3926 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3927 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3928
3929 // Add register to return instruction.
3930 RetRegs.push_back(VA.getLocReg());
3931 }
3932
3933 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3934 TII.get(AArch64::RET_ReallyLR));
3935 for (unsigned RetReg : RetRegs)
3936 MIB.addReg(RetReg, RegState::Implicit);
3937 return true;
3938}
3939
3940bool AArch64FastISel::selectTrunc(const Instruction *I) {
3941 Type *DestTy = I->getType();
3942 Value *Op = I->getOperand(0);
3943 Type *SrcTy = Op->getType();
3944
3945 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3946 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3947 if (!SrcEVT.isSimple())
3948 return false;
3949 if (!DestEVT.isSimple())
3950 return false;
3951
3952 MVT SrcVT = SrcEVT.getSimpleVT();
3953 MVT DestVT = DestEVT.getSimpleVT();
3954
3955 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3956 SrcVT != MVT::i8)
3957 return false;
3958 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3959 DestVT != MVT::i1)
3960 return false;
3961
3962 Register SrcReg = getRegForValue(Op);
3963 if (!SrcReg)
3964 return false;
3965
3966 // If we're truncating from i64 to a smaller non-legal type then generate an
3967 // AND. Otherwise, we know the high bits are undefined and a truncate only
3968 // generate a COPY. We cannot mark the source register also as result
3969 // register, because this can incorrectly transfer the kill flag onto the
3970 // source register.
3971 unsigned ResultReg;
3972 if (SrcVT == MVT::i64) {
3973 uint64_t Mask = 0;
3974 switch (DestVT.SimpleTy) {
3975 default:
3976 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3977 return false;
3978 case MVT::i1:
3979 Mask = 0x1;
3980 break;
3981 case MVT::i8:
3982 Mask = 0xff;
3983 break;
3984 case MVT::i16:
3985 Mask = 0xffff;
3986 break;
3987 }
3988 // Issue an extract_subreg to get the lower 32-bits.
3989 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3990 AArch64::sub_32);
3991 // Create the AND instruction which performs the actual truncation.
3992 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3993 assert(ResultReg && "Unexpected AND instruction emission failure.");
3994 } else {
3995 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3996 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3997 TII.get(TargetOpcode::COPY), ResultReg)
3998 .addReg(SrcReg);
3999 }
4000
4001 updateValueMap(I, ResultReg);
4002 return true;
4003}
4004
4005unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4006 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4007 DestVT == MVT::i64) &&
4008 "Unexpected value type.");
4009 // Handle i8 and i16 as i32.
4010 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4011 DestVT = MVT::i32;
4012
4013 if (IsZExt) {
4014 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4015 assert(ResultReg && "Unexpected AND instruction emission failure.");
4016 if (DestVT == MVT::i64) {
4017 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4018 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4019 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4021 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4022 .addImm(0)
4023 .addReg(ResultReg)
4024 .addImm(AArch64::sub_32);
4025 ResultReg = Reg64;
4026 }
4027 return ResultReg;
4028 } else {
4029 if (DestVT == MVT::i64) {
4030 // FIXME: We're SExt i1 to i64.
4031 return 0;
4032 }
4033 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4034 0, 0);
4035 }
4036}
4037
4038unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4039 unsigned Opc, ZReg;
4040 switch (RetVT.SimpleTy) {
4041 default: return 0;
4042 case MVT::i8:
4043 case MVT::i16:
4044 case MVT::i32:
4045 RetVT = MVT::i32;
4046 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4047 case MVT::i64:
4048 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4049 }
4050
4051 const TargetRegisterClass *RC =
4052 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4053 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4054}
4055
4056unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4057 if (RetVT != MVT::i64)
4058 return 0;
4059
4060 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4061 Op0, Op1, AArch64::XZR);
4062}
4063
4064unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4065 if (RetVT != MVT::i64)
4066 return 0;
4067
4068 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4069 Op0, Op1, AArch64::XZR);
4070}
4071
4072unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4073 unsigned Op1Reg) {
4074 unsigned Opc = 0;
4075 bool NeedTrunc = false;
4076 uint64_t Mask = 0;
4077 switch (RetVT.SimpleTy) {
4078 default: return 0;
4079 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4080 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4081 case MVT::i32: Opc = AArch64::LSLVWr; break;
4082 case MVT::i64: Opc = AArch64::LSLVXr; break;
4083 }
4084
4085 const TargetRegisterClass *RC =
4086 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4087 if (NeedTrunc)
4088 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4089
4090 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4091 if (NeedTrunc)
4092 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4093 return ResultReg;
4094}
4095
4096unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4097 uint64_t Shift, bool IsZExt) {
4098 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4099 "Unexpected source/return type pair.");
4100 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4101 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4102 "Unexpected source value type.");
4103 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4104 RetVT == MVT::i64) && "Unexpected return value type.");
4105
4106 bool Is64Bit = (RetVT == MVT::i64);
4107 unsigned RegSize = Is64Bit ? 64 : 32;
4108 unsigned DstBits = RetVT.getSizeInBits();
4109 unsigned SrcBits = SrcVT.getSizeInBits();
4110 const TargetRegisterClass *RC =
4111 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4112
4113 // Just emit a copy for "zero" shifts.
4114 if (Shift == 0) {
4115 if (RetVT == SrcVT) {
4116 Register ResultReg = createResultReg(RC);
4117 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4118 TII.get(TargetOpcode::COPY), ResultReg)
4119 .addReg(Op0);
4120 return ResultReg;
4121 } else
4122 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4123 }
4124
4125 // Don't deal with undefined shifts.
4126 if (Shift >= DstBits)
4127 return 0;
4128
4129 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4130 // {S|U}BFM Wd, Wn, #r, #s
4131 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4132
4133 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4134 // %2 = shl i16 %1, 4
4135 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4136 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4137 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4138 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4139
4140 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4141 // %2 = shl i16 %1, 8
4142 // Wd<32+7-24,32-24> = Wn<7:0>
4143 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4144 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4145 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4146
4147 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4148 // %2 = shl i16 %1, 12
4149 // Wd<32+3-20,32-20> = Wn<3:0>
4150 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4151 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4152 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4153
4154 unsigned ImmR = RegSize - Shift;
4155 // Limit the width to the length of the source type.
4156 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4157 static const unsigned OpcTable[2][2] = {
4158 {AArch64::SBFMWri, AArch64::SBFMXri},
4159 {AArch64::UBFMWri, AArch64::UBFMXri}
4160 };
4161 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4162 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4163 Register TmpReg = MRI.createVirtualRegister(RC);
4164 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4165 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4166 .addImm(0)
4167 .addReg(Op0)
4168 .addImm(AArch64::sub_32);
4169 Op0 = TmpReg;
4170 }
4171 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4172}
4173
4174unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4175 unsigned Op1Reg) {
4176 unsigned Opc = 0;
4177 bool NeedTrunc = false;
4178 uint64_t Mask = 0;
4179 switch (RetVT.SimpleTy) {
4180 default: return 0;
4181 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4182 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4183 case MVT::i32: Opc = AArch64::LSRVWr; break;
4184 case MVT::i64: Opc = AArch64::LSRVXr; break;
4185 }
4186
4187 const TargetRegisterClass *RC =
4188 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4189 if (NeedTrunc) {
4190 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4191 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4192 }
4193 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4194 if (NeedTrunc)
4195 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4196 return ResultReg;
4197}
4198
4199unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4200 uint64_t Shift, bool IsZExt) {
4201 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4202 "Unexpected source/return type pair.");
4203 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4204 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4205 "Unexpected source value type.");
4206 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4207 RetVT == MVT::i64) && "Unexpected return value type.");
4208
4209 bool Is64Bit = (RetVT == MVT::i64);
4210 unsigned RegSize = Is64Bit ? 64 : 32;
4211 unsigned DstBits = RetVT.getSizeInBits();
4212 unsigned SrcBits = SrcVT.getSizeInBits();
4213 const TargetRegisterClass *RC =
4214 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4215
4216 // Just emit a copy for "zero" shifts.
4217 if (Shift == 0) {
4218 if (RetVT == SrcVT) {
4219 Register ResultReg = createResultReg(RC);
4220 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4221 TII.get(TargetOpcode::COPY), ResultReg)
4222 .addReg(Op0);
4223 return ResultReg;
4224 } else
4225 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4226 }
4227
4228 // Don't deal with undefined shifts.
4229 if (Shift >= DstBits)
4230 return 0;
4231
4232 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4233 // {S|U}BFM Wd, Wn, #r, #s
4234 // Wd<s-r:0> = Wn<s:r> when r <= s
4235
4236 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4237 // %2 = lshr i16 %1, 4
4238 // Wd<7-4:0> = Wn<7:4>
4239 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4240 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4241 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4242
4243 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4244 // %2 = lshr i16 %1, 8
4245 // Wd<7-7,0> = Wn<7:7>
4246 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4247 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4249
4250 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4251 // %2 = lshr i16 %1, 12
4252 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4253 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4254 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4256
4257 if (Shift >= SrcBits && IsZExt)
4258 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4259
4260 // It is not possible to fold a sign-extend into the LShr instruction. In this
4261 // case emit a sign-extend.
4262 if (!IsZExt) {
4263 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4264 if (!Op0)
4265 return 0;
4266 SrcVT = RetVT;
4267 SrcBits = SrcVT.getSizeInBits();
4268 IsZExt = true;
4269 }
4270
4271 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4272 unsigned ImmS = SrcBits - 1;
4273 static const unsigned OpcTable[2][2] = {
4274 {AArch64::SBFMWri, AArch64::SBFMXri},
4275 {AArch64::UBFMWri, AArch64::UBFMXri}
4276 };
4277 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4278 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4279 Register TmpReg = MRI.createVirtualRegister(RC);
4280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4281 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4282 .addImm(0)
4283 .addReg(Op0)
4284 .addImm(AArch64::sub_32);
4285 Op0 = TmpReg;
4286 }
4287 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4288}
4289
4290unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4291 unsigned Op1Reg) {
4292 unsigned Opc = 0;
4293 bool NeedTrunc = false;
4294 uint64_t Mask = 0;
4295 switch (RetVT.SimpleTy) {
4296 default: return 0;
4297 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4298 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4299 case MVT::i32: Opc = AArch64::ASRVWr; break;
4300 case MVT::i64: Opc = AArch64::ASRVXr; break;
4301 }
4302
4303 const TargetRegisterClass *RC =
4304 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4305 if (NeedTrunc) {
4306 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4307 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4308 }
4309 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4310 if (NeedTrunc)
4311 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4312 return ResultReg;
4313}
4314
4315unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4316 uint64_t Shift, bool IsZExt) {
4317 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4318 "Unexpected source/return type pair.");
4319 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4320 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4321 "Unexpected source value type.");
4322 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4323 RetVT == MVT::i64) && "Unexpected return value type.");
4324
4325 bool Is64Bit = (RetVT == MVT::i64);
4326 unsigned RegSize = Is64Bit ? 64 : 32;
4327 unsigned DstBits = RetVT.getSizeInBits();
4328 unsigned SrcBits = SrcVT.getSizeInBits();
4329 const TargetRegisterClass *RC =
4330 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4331
4332 // Just emit a copy for "zero" shifts.
4333 if (Shift == 0) {
4334 if (RetVT == SrcVT) {
4335 Register ResultReg = createResultReg(RC);
4336 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4337 TII.get(TargetOpcode::COPY), ResultReg)
4338 .addReg(Op0);
4339 return ResultReg;
4340 } else
4341 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4342 }
4343
4344 // Don't deal with undefined shifts.
4345 if (Shift >= DstBits)
4346 return 0;
4347
4348 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4349 // {S|U}BFM Wd, Wn, #r, #s
4350 // Wd<s-r:0> = Wn<s:r> when r <= s
4351
4352 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4353 // %2 = ashr i16 %1, 4
4354 // Wd<7-4:0> = Wn<7:4>
4355 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4356 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4357 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4358
4359 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4360 // %2 = ashr i16 %1, 8
4361 // Wd<7-7,0> = Wn<7:7>
4362 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4363 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4364 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4365
4366 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4367 // %2 = ashr i16 %1, 12
4368 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4369 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4370 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4372
4373 if (Shift >= SrcBits && IsZExt)
4374 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4375
4376 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4377 unsigned ImmS = SrcBits - 1;
4378 static const unsigned OpcTable[2][2] = {
4379 {AArch64::SBFMWri, AArch64::SBFMXri},
4380 {AArch64::UBFMWri, AArch64::UBFMXri}
4381 };
4382 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4383 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4384 Register TmpReg = MRI.createVirtualRegister(RC);
4385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4386 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4387 .addImm(0)
4388 .addReg(Op0)
4389 .addImm(AArch64::sub_32);
4390 Op0 = TmpReg;
4391 }
4392 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4393}
4394
4395unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4396 bool IsZExt) {
4397 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4398
4399 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4400 // DestVT are odd things, so test to make sure that they are both types we can
4401 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4402 // bail out to SelectionDAG.
4403 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4404 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4405 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4406 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4407 return 0;
4408
4409 unsigned Opc;
4410 unsigned Imm = 0;
4411
4412 switch (SrcVT.SimpleTy) {
4413 default:
4414 return 0;
4415 case MVT::i1:
4416 return emiti1Ext(SrcReg, DestVT, IsZExt);
4417 case MVT::i8:
4418 if (DestVT == MVT::i64)
4419 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4420 else
4421 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4422 Imm = 7;
4423 break;
4424 case MVT::i16:
4425 if (DestVT == MVT::i64)
4426 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4427 else
4428 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4429 Imm = 15;
4430 break;
4431 case MVT::i32:
4432 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4433 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4434 Imm = 31;
4435 break;
4436 }
4437
4438 // Handle i8 and i16 as i32.
4439 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4440 DestVT = MVT::i32;
4441 else if (DestVT == MVT::i64) {
4442 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4444 TII.get(AArch64::SUBREG_TO_REG), Src64)
4445 .addImm(0)
4446 .addReg(SrcReg)
4447 .addImm(AArch64::sub_32);
4448 SrcReg = Src64;
4449 }
4450
4451 const TargetRegisterClass *RC =
4452 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4453 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4454}
4455
4456static bool isZExtLoad(const MachineInstr *LI) {
4457 switch (LI->getOpcode()) {
4458 default:
4459 return false;
4460 case AArch64::LDURBBi:
4461 case AArch64::LDURHHi:
4462 case AArch64::LDURWi:
4463 case AArch64::LDRBBui:
4464 case AArch64::LDRHHui:
4465 case AArch64::LDRWui:
4466 case AArch64::LDRBBroX:
4467 case AArch64::LDRHHroX:
4468 case AArch64::LDRWroX:
4469 case AArch64::LDRBBroW:
4470 case AArch64::LDRHHroW:
4471 case AArch64::LDRWroW:
4472 return true;
4473 }
4474}
4475
4476static bool isSExtLoad(const MachineInstr *LI) {
4477 switch (LI->getOpcode()) {
4478 default:
4479 return false;
4480 case AArch64::LDURSBWi:
4481 case AArch64::LDURSHWi:
4482 case AArch64::LDURSBXi:
4483 case AArch64::LDURSHXi:
4484 case AArch64::LDURSWi:
4485 case AArch64::LDRSBWui:
4486 case AArch64::LDRSHWui:
4487 case AArch64::LDRSBXui:
4488 case AArch64::LDRSHXui:
4489 case AArch64::LDRSWui:
4490 case AArch64::LDRSBWroX:
4491 case AArch64::LDRSHWroX:
4492 case AArch64::LDRSBXroX:
4493 case AArch64::LDRSHXroX:
4494 case AArch64::LDRSWroX:
4495 case AArch64::LDRSBWroW:
4496 case AArch64::LDRSHWroW:
4497 case AArch64::LDRSBXroW:
4498 case AArch64::LDRSHXroW:
4499 case AArch64::LDRSWroW:
4500 return true;
4501 }
4502}
4503
4504bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4505 MVT SrcVT) {
4506 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4507 if (!LI || !LI->hasOneUse())
4508 return false;
4509
4510 // Check if the load instruction has already been selected.
4511 Register Reg = lookUpRegForValue(LI);
4512 if (!Reg)
4513 return false;
4514
4515 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4516 if (!MI)
4517 return false;
4518
4519 // Check if the correct load instruction has been emitted - SelectionDAG might
4520 // have emitted a zero-extending load, but we need a sign-extending load.
4521 bool IsZExt = isa<ZExtInst>(I);
4522 const auto *LoadMI = MI;
4523 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4524 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4525 Register LoadReg = MI->getOperand(1).getReg();
4526 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4527 assert(LoadMI && "Expected valid instruction");
4528 }
4529 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4530 return false;
4531
4532 // Nothing to be done.
4533 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4534 updateValueMap(I, Reg);
4535 return true;
4536 }
4537
4538 if (IsZExt) {
4539 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4540 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4541 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4542 .addImm(0)
4543 .addReg(Reg, getKillRegState(true))
4544 .addImm(AArch64::sub_32);
4545 Reg = Reg64;
4546 } else {
4547 assert((MI->getOpcode() == TargetOpcode::COPY &&
4548 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4549 "Expected copy instruction");
4550 Reg = MI->getOperand(1).getReg();
4552 removeDeadCode(I, std::next(I));
4553 }
4554 updateValueMap(I, Reg);
4555 return true;
4556}
4557
4558bool AArch64FastISel::selectIntExt(const Instruction *I) {
4559 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4560 "Unexpected integer extend instruction.");
4561 MVT RetVT;
4562 MVT SrcVT;
4563 if (!isTypeSupported(I->getType(), RetVT))
4564 return false;
4565
4566 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4567 return false;
4568
4569 // Try to optimize already sign-/zero-extended values from load instructions.
4570 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4571 return true;
4572
4573 Register SrcReg = getRegForValue(I->getOperand(0));
4574 if (!SrcReg)
4575 return false;
4576
4577 // Try to optimize already sign-/zero-extended values from function arguments.
4578 bool IsZExt = isa<ZExtInst>(I);
4579 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4580 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4581 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4582 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4583 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4584 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4585 .addImm(0)
4586 .addReg(SrcReg)
4587 .addImm(AArch64::sub_32);
4588 SrcReg = ResultReg;
4589 }
4590
4591 updateValueMap(I, SrcReg);
4592 return true;
4593 }
4594 }
4595
4596 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4597 if (!ResultReg)
4598 return false;
4599
4600 updateValueMap(I, ResultReg);
4601 return true;
4602}
4603
4604bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4605 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4606 if (!DestEVT.isSimple())
4607 return false;
4608
4609 MVT DestVT = DestEVT.getSimpleVT();
4610 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4611 return false;
4612
4613 unsigned DivOpc;
4614 bool Is64bit = (DestVT == MVT::i64);
4615 switch (ISDOpcode) {
4616 default:
4617 return false;
4618 case ISD::SREM:
4619 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4620 break;
4621 case ISD::UREM:
4622 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4623 break;
4624 }
4625 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4626 Register Src0Reg = getRegForValue(I->getOperand(0));
4627 if (!Src0Reg)
4628 return false;
4629
4630 Register Src1Reg = getRegForValue(I->getOperand(1));
4631 if (!Src1Reg)
4632 return false;
4633
4634 const TargetRegisterClass *RC =
4635 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4636 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4637 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4638 // The remainder is computed as numerator - (quotient * denominator) using the
4639 // MSUB instruction.
4640 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4641 updateValueMap(I, ResultReg);
4642 return true;
4643}
4644
4645bool AArch64FastISel::selectMul(const Instruction *I) {
4646 MVT VT;
4647 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4648 return false;
4649
4650 if (VT.isVector())
4651 return selectBinaryOp(I, ISD::MUL);
4652
4653 const Value *Src0 = I->getOperand(0);
4654 const Value *Src1 = I->getOperand(1);
4655 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4656 if (C->getValue().isPowerOf2())
4657 std::swap(Src0, Src1);
4658
4659 // Try to simplify to a shift instruction.
4660 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4661 if (C->getValue().isPowerOf2()) {
4662 uint64_t ShiftVal = C->getValue().logBase2();
4663 MVT SrcVT = VT;
4664 bool IsZExt = true;
4665 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4666 if (!isIntExtFree(ZExt)) {
4667 MVT VT;
4668 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4669 SrcVT = VT;
4670 IsZExt = true;
4671 Src0 = ZExt->getOperand(0);
4672 }
4673 }
4674 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4675 if (!isIntExtFree(SExt)) {
4676 MVT VT;
4677 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4678 SrcVT = VT;
4679 IsZExt = false;
4680 Src0 = SExt->getOperand(0);
4681 }
4682 }
4683 }
4684
4685 Register Src0Reg = getRegForValue(Src0);
4686 if (!Src0Reg)
4687 return false;
4688
4689 unsigned ResultReg =
4690 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4691
4692 if (ResultReg) {
4693 updateValueMap(I, ResultReg);
4694 return true;
4695 }
4696 }
4697
4698 Register Src0Reg = getRegForValue(I->getOperand(0));
4699 if (!Src0Reg)
4700 return false;
4701
4702 Register Src1Reg = getRegForValue(I->getOperand(1));
4703 if (!Src1Reg)
4704 return false;
4705
4706 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4707
4708 if (!ResultReg)
4709 return false;
4710
4711 updateValueMap(I, ResultReg);
4712 return true;
4713}
4714
4715bool AArch64FastISel::selectShift(const Instruction *I) {
4716 MVT RetVT;
4717 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4718 return false;
4719
4720 if (RetVT.isVector())
4721 return selectOperator(I, I->getOpcode());
4722
4723 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4724 unsigned ResultReg = 0;
4725 uint64_t ShiftVal = C->getZExtValue();
4726 MVT SrcVT = RetVT;
4727 bool IsZExt = I->getOpcode() != Instruction::AShr;
4728 const Value *Op0 = I->getOperand(0);
4729 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4730 if (!isIntExtFree(ZExt)) {
4731 MVT TmpVT;
4732 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4733 SrcVT = TmpVT;
4734 IsZExt = true;
4735 Op0 = ZExt->getOperand(0);
4736 }
4737 }
4738 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4739 if (!isIntExtFree(SExt)) {
4740 MVT TmpVT;
4741 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4742 SrcVT = TmpVT;
4743 IsZExt = false;
4744 Op0 = SExt->getOperand(0);
4745 }
4746 }
4747 }
4748
4749 Register Op0Reg = getRegForValue(Op0);
4750 if (!Op0Reg)
4751 return false;
4752
4753 switch (I->getOpcode()) {
4754 default: llvm_unreachable("Unexpected instruction.");
4755 case Instruction::Shl:
4756 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4757 break;
4758 case Instruction::AShr:
4759 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4760 break;
4761 case Instruction::LShr:
4762 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4763 break;
4764 }
4765 if (!ResultReg)
4766 return false;
4767
4768 updateValueMap(I, ResultReg);
4769 return true;
4770 }
4771
4772 Register Op0Reg = getRegForValue(I->getOperand(0));
4773 if (!Op0Reg)
4774 return false;
4775
4776 Register Op1Reg = getRegForValue(I->getOperand(1));
4777 if (!Op1Reg)
4778 return false;
4779
4780 unsigned ResultReg = 0;
4781 switch (I->getOpcode()) {
4782 default: llvm_unreachable("Unexpected instruction.");
4783 case Instruction::Shl:
4784 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4785 break;
4786 case Instruction::AShr:
4787 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4788 break;
4789 case Instruction::LShr:
4790 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4791 break;
4792 }
4793
4794 if (!ResultReg)
4795 return false;
4796
4797 updateValueMap(I, ResultReg);
4798 return true;
4799}
4800
4801bool AArch64FastISel::selectBitCast(const Instruction *I) {
4802 MVT RetVT, SrcVT;
4803
4804 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4805 return false;
4806 if (!isTypeLegal(I->getType(), RetVT))
4807 return false;
4808
4809 unsigned Opc;
4810 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4811 Opc = AArch64::FMOVWSr;
4812 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4813 Opc = AArch64::FMOVXDr;
4814 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4815 Opc = AArch64::FMOVSWr;
4816 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4817 Opc = AArch64::FMOVDXr;
4818 else
4819 return false;
4820
4821 const TargetRegisterClass *RC = nullptr;
4822 switch (RetVT.SimpleTy) {
4823 default: llvm_unreachable("Unexpected value type.");
4824 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4825 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4826 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4827 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4828 }
4829 Register Op0Reg = getRegForValue(I->getOperand(0));
4830 if (!Op0Reg)
4831 return false;
4832
4833 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4834 if (!ResultReg)
4835 return false;
4836
4837 updateValueMap(I, ResultReg);
4838 return true;
4839}
4840
4841bool AArch64FastISel::selectFRem(const Instruction *I) {
4842 MVT RetVT;
4843 if (!isTypeLegal(I->getType(), RetVT))
4844 return false;
4845
4846 RTLIB::Libcall LC;
4847 switch (RetVT.SimpleTy) {
4848 default:
4849 return false;
4850 case MVT::f32:
4851 LC = RTLIB::REM_F32;
4852 break;
4853 case MVT::f64:
4854 LC = RTLIB::REM_F64;
4855 break;
4856 }
4857
4858 ArgListTy Args;
4859 Args.reserve(I->getNumOperands());
4860
4861 // Populate the argument list.
4862 for (auto &Arg : I->operands()) {
4863 ArgListEntry Entry;
4864 Entry.Val = Arg;
4865 Entry.Ty = Arg->getType();
4866 Args.push_back(Entry);
4867 }
4868
4869 CallLoweringInfo CLI;
4870 MCContext &Ctx = MF->getContext();
4871 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4872 TLI.getLibcallName(LC), std::move(Args));
4873 if (!lowerCallTo(CLI))
4874 return false;
4875 updateValueMap(I, CLI.ResultReg);
4876 return true;
4877}
4878
4879bool AArch64FastISel::selectSDiv(const Instruction *I) {
4880 MVT VT;
4881 if (!isTypeLegal(I->getType(), VT))
4882 return false;
4883
4884 if (!isa<ConstantInt>(I->getOperand(1)))
4885 return selectBinaryOp(I, ISD::SDIV);
4886
4887 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4888 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4889 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4890 return selectBinaryOp(I, ISD::SDIV);
4891
4892 unsigned Lg2 = C.countr_zero();
4893 Register Src0Reg = getRegForValue(I->getOperand(0));
4894 if (!Src0Reg)
4895 return false;
4896
4897 if (cast<BinaryOperator>(I)->isExact()) {
4898 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4899 if (!ResultReg)
4900 return false;
4901 updateValueMap(I, ResultReg);
4902 return true;
4903 }
4904
4905 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4906 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4907 if (!AddReg)
4908 return false;
4909
4910 // (Src0 < 0) ? Pow2 - 1 : 0;
4911 if (!emitICmp_ri(VT, Src0Reg, 0))
4912 return false;
4913
4914 unsigned SelectOpc;
4915 const TargetRegisterClass *RC;
4916 if (VT == MVT::i64) {
4917 SelectOpc = AArch64::CSELXr;
4918 RC = &AArch64::GPR64RegClass;
4919 } else {
4920 SelectOpc = AArch64::CSELWr;
4921 RC = &AArch64::GPR32RegClass;
4922 }
4923 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4925 if (!SelectReg)
4926 return false;
4927
4928 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4929 // negate the result.
4930 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4931 unsigned ResultReg;
4932 if (C.isNegative())
4933 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4934 AArch64_AM::ASR, Lg2);
4935 else
4936 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4937
4938 if (!ResultReg)
4939 return false;
4940
4941 updateValueMap(I, ResultReg);
4942 return true;
4943}
4944
4945/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4946/// have to duplicate it for AArch64, because otherwise we would fail during the
4947/// sign-extend emission.
4948unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4949 Register IdxN = getRegForValue(Idx);
4950 if (IdxN == 0)
4951 // Unhandled operand. Halt "fast" selection and bail.
4952 return 0;
4953
4954 // If the index is smaller or larger than intptr_t, truncate or extend it.
4955 MVT PtrVT = TLI.getPointerTy(DL);
4956 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4957 if (IdxVT.bitsLT(PtrVT)) {
4958 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4959 } else if (IdxVT.bitsGT(PtrVT))
4960 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4961 return IdxN;
4962}
4963
4964/// This is mostly a copy of the existing FastISel GEP code, but we have to
4965/// duplicate it for AArch64, because otherwise we would bail out even for
4966/// simple cases. This is because the standard fastEmit functions don't cover
4967/// MUL at all and ADD is lowered very inefficientily.
4968bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4969 if (Subtarget->isTargetILP32())
4970 return false;
4971
4972 Register N = getRegForValue(I->getOperand(0));
4973 if (!N)
4974 return false;
4975
4976 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4977 // into a single N = N + TotalOffset.
4978 uint64_t TotalOffs = 0;
4979 MVT VT = TLI.getPointerTy(DL);
4981 GTI != E; ++GTI) {
4982 const Value *Idx = GTI.getOperand();
4983 if (auto *StTy = GTI.getStructTypeOrNull()) {
4984 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4985 // N = N + Offset
4986 if (Field)
4987 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4988 } else {
4989 // If this is a constant subscript, handle it quickly.
4990 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4991 if (CI->isZero())
4992 continue;
4993 // N = N + Offset
4994 TotalOffs += GTI.getSequentialElementStride(DL) *
4995 cast<ConstantInt>(CI)->getSExtValue();
4996 continue;
4997 }
4998 if (TotalOffs) {
4999 N = emitAdd_ri_(VT, N, TotalOffs);
5000 if (!N)
5001 return false;
5002 TotalOffs = 0;
5003 }
5004
5005 // N = N + Idx * ElementSize;
5006 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5007 unsigned IdxN = getRegForGEPIndex(Idx);
5008 if (!IdxN)
5009 return false;
5010
5011 if (ElementSize != 1) {
5012 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5013 if (!C)
5014 return false;
5015 IdxN = emitMul_rr(VT, IdxN, C);
5016 if (!IdxN)
5017 return false;
5018 }
5019 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5020 if (!N)
5021 return false;
5022 }
5023 }
5024 if (TotalOffs) {
5025 N = emitAdd_ri_(VT, N, TotalOffs);
5026 if (!N)
5027 return false;
5028 }
5029 updateValueMap(I, N);
5030 return true;
5031}
5032
5033bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5034 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5035 "cmpxchg survived AtomicExpand at optlevel > -O0");
5036
5037 auto *RetPairTy = cast<StructType>(I->getType());
5038 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5039 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5040 "cmpxchg has a non-i1 status result");
5041
5042 MVT VT;
5043 if (!isTypeLegal(RetTy, VT))
5044 return false;
5045
5046 const TargetRegisterClass *ResRC;
5047 unsigned Opc, CmpOpc;
5048 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5049 // extractvalue selection doesn't support that.
5050 if (VT == MVT::i32) {
5051 Opc = AArch64::CMP_SWAP_32;
5052 CmpOpc = AArch64::SUBSWrs;
5053 ResRC = &AArch64::GPR32RegClass;
5054 } else if (VT == MVT::i64) {
5055 Opc = AArch64::CMP_SWAP_64;
5056 CmpOpc = AArch64::SUBSXrs;
5057 ResRC = &AArch64::GPR64RegClass;
5058 } else {
5059 return false;
5060 }
5061
5062 const MCInstrDesc &II = TII.get(Opc);
5063
5064 const Register AddrReg = constrainOperandRegClass(
5065 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5066 const Register DesiredReg = constrainOperandRegClass(
5067 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5068 const Register NewReg = constrainOperandRegClass(
5069 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5070
5071 const Register ResultReg1 = createResultReg(ResRC);
5072 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5073 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5074
5075 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5076 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5077 .addDef(ResultReg1)
5078 .addDef(ScratchReg)
5079 .addUse(AddrReg)
5080 .addUse(DesiredReg)
5081 .addUse(NewReg);
5082
5083 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5084 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5085 .addUse(ResultReg1)
5086 .addUse(DesiredReg)
5087 .addImm(0);
5088
5089 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5090 .addDef(ResultReg2)
5091 .addUse(AArch64::WZR)
5092 .addUse(AArch64::WZR)
5094
5095 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5096 updateValueMap(I, ResultReg1, 2);
5097 return true;
5098}
5099
5100bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5101 if (TLI.fallBackToDAGISel(*I))
5102 return false;
5103 switch (I->getOpcode()) {
5104 default:
5105 break;
5106 case Instruction::Add:
5107 case Instruction::Sub:
5108 return selectAddSub(I);
5109 case Instruction::Mul:
5110 return selectMul(I);
5111 case Instruction::SDiv:
5112 return selectSDiv(I);
5113 case Instruction::SRem:
5114 if (!selectBinaryOp(I, ISD::SREM))
5115 return selectRem(I, ISD::SREM);
5116 return true;
5117 case Instruction::URem:
5118 if (!selectBinaryOp(I, ISD::UREM))
5119 return selectRem(I, ISD::UREM);
5120 return true;
5121 case Instruction::Shl:
5122 case Instruction::LShr:
5123 case Instruction::AShr:
5124 return selectShift(I);
5125 case Instruction::And:
5126 case Instruction::Or:
5127 case Instruction::Xor:
5128 return selectLogicalOp(I);
5129 case Instruction::Br:
5130 return selectBranch(I);
5131 case Instruction::IndirectBr:
5132 return selectIndirectBr(I);
5133 case Instruction::BitCast:
5135 return selectBitCast(I);
5136 return true;
5137 case Instruction::FPToSI:
5138 if (!selectCast(I, ISD::FP_TO_SINT))
5139 return selectFPToInt(I, /*Signed=*/true);
5140 return true;
5141 case Instruction::FPToUI:
5142 return selectFPToInt(I, /*Signed=*/false);
5143 case Instruction::ZExt:
5144 case Instruction::SExt:
5145 return selectIntExt(I);
5146 case Instruction::Trunc:
5147 if (!selectCast(I, ISD::TRUNCATE))
5148 return selectTrunc(I);
5149 return true;
5150 case Instruction::FPExt:
5151 return selectFPExt(I);
5152 case Instruction::FPTrunc:
5153 return selectFPTrunc(I);
5154 case Instruction::SIToFP:
5155 if (!selectCast(I, ISD::SINT_TO_FP))
5156 return selectIntToFP(I, /*Signed=*/true);
5157 return true;
5158 case Instruction::UIToFP:
5159 return selectIntToFP(I, /*Signed=*/false);
5160 case Instruction::Load:
5161 return selectLoad(I);
5162 case Instruction::Store:
5163 return selectStore(I);
5164 case Instruction::FCmp:
5165 case Instruction::ICmp:
5166 return selectCmp(I);
5167 case Instruction::Select:
5168 return selectSelect(I);
5169 case Instruction::Ret:
5170 return selectRet(I);
5171 case Instruction::FRem:
5172 return selectFRem(I);
5173 case Instruction::GetElementPtr:
5174 return selectGetElementPtr(I);
5175 case Instruction::AtomicCmpXchg:
5176 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5177 }
5178
5179 // fall-back to target-independent instruction selection.
5180 return selectOperator(I, I->getOpcode());
5181}
5182
5184 const TargetLibraryInfo *LibInfo) {
5185
5186 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5187 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5188 CallerAttrs.hasStreamingInterfaceOrBody() ||
5189 CallerAttrs.hasStreamingCompatibleInterface())
5190 return nullptr;
5191 return new AArch64FastISel(FuncInfo, LibInfo);
5192}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
LLVMContext & Context
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
an instruction to allocate memory on the stack
Definition: Instructions.h:59
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:107
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:165
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1709
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1654
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1645
unsigned arg_size() const
Definition: InstrTypes.h:1652
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:950
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:960
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:963
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:977
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:989
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:990
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:966
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:975
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:964
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:965
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:984
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:983
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:987
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:974
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:968
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:971
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:985
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:972
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:967
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:969
@ ICMP_EQ
equal
Definition: InstrTypes.h:981
@ ICMP_NE
not equal
Definition: InstrTypes.h:982
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:988
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:976
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:986
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:973
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:962
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:970
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1096
bool isUnsigned() const
Definition: InstrTypes.h:1238
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:318
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:315
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:160
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:311
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
bool selectGetElementPtr(const User *I)
Definition: FastISel.cpp:531
virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF)
Emit the floating-point constant +0.0 in a register using target- specific logic.
Definition: FastISel.h:480
virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II)
This method is called by target-independent code to do target- specific intrinsic lowering.
Definition: FastISel.cpp:1939
virtual unsigned fastMaterializeConstant(const Constant *C)
Emit a constant in a register using target-specific logic, such as constant pool loads.
Definition: FastISel.h:473
virtual bool fastLowerCall(CallLoweringInfo &CLI)
This method is called by target-independent code to do target- specific call lowering.
Definition: FastISel.cpp:1937
virtual bool fastLowerArguments()
This method is called by target-independent code to do target- specific argument lowering.
Definition: FastISel.cpp:1935
Register getRegForGEPIndex(const Value *Idx)
This is a wrapper around getRegForValue that also takes care of truncating or sign-extending the give...
Definition: FastISel.cpp:383
virtual bool fastSelectInstruction(const Instruction *I)=0
This method is called by target-independent code when the normal FastISel process fails to select an ...
bool selectBitCast(const User *I)
Definition: FastISel.cpp:1519
virtual unsigned fastMaterializeAlloca(const AllocaInst *C)
Emit an alloca address in a register using target-specific logic.
Definition: FastISel.h:476
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:350
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
Indirect Branch Instruction.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const BasicBlock * getParent() const
Definition: Instruction.h:152
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
bool isCommutative() const
Return true if swapping the first two arguments to the intrinsic produces the same result.
Definition: IntrinsicInst.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Context object for machine code objects.
Definition: MCContext.h:76
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:543
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Return a value (possibly void), from a function.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool hasZAState() const
bool hasZT0State() const
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
Class to represent struct types.
Definition: DerivedTypes.h:216
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:252
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:456
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:54
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
unsigned getKillRegState(bool B)
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117