80 class AArch64FastISel final :
public FastISel {
83 using BaseKind =
enum {
89 BaseKind
Kind = RegBase;
95 unsigned OffsetReg = 0;
103 void setKind(BaseKind K) {
Kind = K; }
104 BaseKind getKind()
const {
return Kind; }
107 bool isRegBase()
const {
return Kind == RegBase; }
108 bool isFIBase()
const {
return Kind == FrameIndexBase; }
110 void setReg(
unsigned Reg) {
111 assert(isRegBase() &&
"Invalid base register access!");
116 assert(isRegBase() &&
"Invalid base register access!");
120 void setOffsetReg(
unsigned Reg) {
124 unsigned getOffsetReg()
const {
128 void setFI(
unsigned FI) {
129 assert(isFIBase() &&
"Invalid base frame index access!");
133 unsigned getFI()
const {
134 assert(isFIBase() &&
"Invalid base frame index access!");
138 void setOffset(int64_t
O) {
Offset =
O; }
140 void setShift(
unsigned S) {
Shift =
S; }
141 unsigned getShift() {
return Shift; }
152 bool fastLowerArguments()
override;
153 bool fastLowerCall(CallLoweringInfo &CLI)
override;
154 bool fastLowerIntrinsicCall(
const IntrinsicInst *II)
override;
170 bool selectRem(
const Instruction *
I,
unsigned ISDOpcode);
183 bool isTypeLegal(
Type *Ty,
MVT &VT);
184 bool isTypeSupported(
Type *Ty,
MVT &VT,
bool IsVectorAllowed =
false);
185 bool isValueAvailable(
const Value *V)
const;
186 bool computeAddress(
const Value *Obj, Address &
Addr,
Type *Ty =
nullptr);
187 bool computeCallAddress(
const Value *V, Address &
Addr);
188 bool simplifyAddress(Address &
Addr,
MVT VT);
192 bool isMemCpySmall(
uint64_t Len,
unsigned Alignment);
193 bool tryEmitSmallMemCpy(Address Dest, Address Src,
uint64_t Len,
199 unsigned getRegForGEPIndex(
const Value *Idx);
202 unsigned emitAddSub(
bool UseAdd,
MVT RetVT,
const Value *
LHS,
203 const Value *
RHS,
bool SetFlags =
false,
204 bool WantResult =
true,
bool IsZExt =
false);
205 unsigned emitAddSub_rr(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
206 unsigned RHSReg,
bool SetFlags =
false,
207 bool WantResult =
true);
208 unsigned emitAddSub_ri(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
209 uint64_t Imm,
bool SetFlags =
false,
210 bool WantResult =
true);
211 unsigned emitAddSub_rs(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
213 uint64_t ShiftImm,
bool SetFlags =
false,
214 bool WantResult =
true);
215 unsigned emitAddSub_rx(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
217 uint64_t ShiftImm,
bool SetFlags =
false,
218 bool WantResult =
true);
221 bool emitCompareAndBranch(
const BranchInst *BI);
224 bool emitICmp_ri(
MVT RetVT,
unsigned LHSReg,
uint64_t Imm);
230 bool emitStoreRelease(
MVT VT,
unsigned SrcReg,
unsigned AddrReg,
232 unsigned emitIntExt(
MVT SrcVT,
unsigned SrcReg,
MVT DestVT,
bool isZExt);
233 unsigned emiti1Ext(
unsigned SrcReg,
MVT DestVT,
bool isZExt);
235 bool SetFlags =
false,
bool WantResult =
true,
236 bool IsZExt =
false);
237 unsigned emitAdd_ri_(
MVT VT,
unsigned Op0, int64_t Imm);
239 bool SetFlags =
false,
bool WantResult =
true,
240 bool IsZExt =
false);
241 unsigned emitSubs_rr(
MVT RetVT,
unsigned LHSReg,
unsigned RHSReg,
242 bool WantResult =
true);
243 unsigned emitSubs_rs(
MVT RetVT,
unsigned LHSReg,
unsigned RHSReg,
245 bool WantResult =
true);
246 unsigned emitLogicalOp(
unsigned ISDOpc,
MVT RetVT,
const Value *
LHS,
248 unsigned emitLogicalOp_ri(
unsigned ISDOpc,
MVT RetVT,
unsigned LHSReg,
250 unsigned emitLogicalOp_rs(
unsigned ISDOpc,
MVT RetVT,
unsigned LHSReg,
251 unsigned RHSReg,
uint64_t ShiftImm);
252 unsigned emitAnd_ri(
MVT RetVT,
unsigned LHSReg,
uint64_t Imm);
253 unsigned emitMul_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1);
254 unsigned emitSMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1);
255 unsigned emitUMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1);
256 unsigned emitLSL_rr(
MVT RetVT,
unsigned Op0Reg,
unsigned Op1Reg);
257 unsigned emitLSL_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0Reg,
uint64_t Imm,
259 unsigned emitLSR_rr(
MVT RetVT,
unsigned Op0Reg,
unsigned Op1Reg);
260 unsigned emitLSR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0Reg,
uint64_t Imm,
262 unsigned emitASR_rr(
MVT RetVT,
unsigned Op0Reg,
unsigned Op1Reg);
263 unsigned emitASR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0Reg,
uint64_t Imm,
264 bool IsZExt =
false);
275 bool finishCall(CallLoweringInfo &CLI,
MVT RetVT,
unsigned NumBytes);
279 unsigned fastMaterializeAlloca(
const AllocaInst *AI)
override;
280 unsigned fastMaterializeConstant(
const Constant *
C)
override;
281 unsigned fastMaterializeFloatZero(
const ConstantFP* CF)
override;
291 bool fastSelectInstruction(
const Instruction *
I)
override;
293 #include "AArch64GenFastISel.inc"
300 assert((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!
I->getType()->isVectorTy() &&
I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(
I);
306 if (
const auto *LI = dyn_cast<LoadInst>(
I->getOperand(0)))
310 if (
const auto *
Arg = dyn_cast<Argument>(
I->getOperand(0)))
311 if ((IsZExt &&
Arg->hasZExtAttr()) || (!IsZExt &&
Arg->hasSExtAttr()))
347 unsigned AArch64FastISel::fastMaterializeAlloca(
const AllocaInst *AI) {
349 "Alloca should always return a pointer.");
352 if (!FuncInfo.StaticAllocaMap.count(AI))
356 FuncInfo.StaticAllocaMap.find(AI);
358 if (
SI != FuncInfo.StaticAllocaMap.end()) {
359 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::ADDXri),
371 unsigned AArch64FastISel::materializeInt(
const ConstantInt *CI,
MVT VT) {
380 : &AArch64::GPR32RegClass;
381 unsigned ZeroReg = (VT ==
MVT::i64) ? AArch64::XZR : AArch64::WZR;
382 Register ResultReg = createResultReg(RC);
383 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
388 unsigned AArch64FastISel::materializeFP(
const ConstantFP *CFP,
MVT VT) {
392 return fastMaterializeFloatZero(CFP);
404 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
405 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
410 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
412 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
414 Register TmpReg = createResultReg(RC);
415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc1), TmpReg)
418 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
420 TII.get(TargetOpcode::COPY), ResultReg)
430 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
431 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
436 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
443 unsigned AArch64FastISel::materializeGV(
const GlobalValue *GV) {
455 EVT DestEVT = TLI.getValueType(
DL, GV->
getType(),
true);
459 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
470 ResultReg = createResultReg(&AArch64::GPR32RegClass);
471 LdrOpc = AArch64::LDRWui;
473 ResultReg = createResultReg(&AArch64::GPR64RegClass);
474 LdrOpc = AArch64::LDRXui;
476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(LdrOpc),
486 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
488 TII.get(TargetOpcode::SUBREG_TO_REG))
500 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
501 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::ADDXri),
511 unsigned AArch64FastISel::fastMaterializeConstant(
const Constant *
C) {
512 EVT CEVT = TLI.getValueType(
DL,
C->getType(),
true);
520 if (isa<ConstantPointerNull>(
C)) {
525 if (
const auto *CI = dyn_cast<ConstantInt>(
C))
526 return materializeInt(CI, VT);
527 else if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
C))
528 return materializeFP(CFP, VT);
529 else if (
const GlobalValue *GV = dyn_cast<GlobalValue>(
C))
530 return materializeGV(GV);
535 unsigned AArch64FastISel::fastMaterializeFloatZero(
const ConstantFP* CFP) {
537 "Floating-point constant is not a positive zero.");
539 if (!isTypeLegal(CFP->
getType(), VT))
546 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
547 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
548 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
553 if (
const auto *
MI = dyn_cast<MulOperator>(
I)) {
554 if (
const auto *
C = dyn_cast<ConstantInt>(
MI->getOperand(0)))
555 if (
C->getValue().isPowerOf2())
557 if (
const auto *
C = dyn_cast<ConstantInt>(
MI->getOperand(1)))
558 if (
C->getValue().isPowerOf2())
565 bool AArch64FastISel::computeAddress(
const Value *Obj, Address &
Addr,
Type *Ty)
567 const User *U =
nullptr;
568 unsigned Opcode = Instruction::UserOp1;
569 if (
const Instruction *
I = dyn_cast<Instruction>(Obj)) {
572 if (FuncInfo.StaticAllocaMap.count(
static_cast<const AllocaInst *
>(Obj)) ||
573 FuncInfo.MBBMap[
I->getParent()] == FuncInfo.MBB) {
574 Opcode =
I->getOpcode();
577 }
else if (
const ConstantExpr *
C = dyn_cast<ConstantExpr>(Obj)) {
578 Opcode =
C->getOpcode();
582 if (
auto *Ty = dyn_cast<PointerType>(Obj->
getType()))
583 if (Ty->getAddressSpace() > 255)
591 case Instruction::BitCast:
595 case Instruction::IntToPtr:
598 TLI.getPointerTy(
DL))
602 case Instruction::PtrToInt:
604 if (TLI.getValueType(
DL, U->
getType()) == TLI.getPointerTy(
DL))
608 case Instruction::GetElementPtr: {
616 const Value *
Op = GTI.getOperand();
617 if (
StructType *STy = GTI.getStructTypeOrNull()) {
619 unsigned Idx = cast<ConstantInt>(
Op)->getZExtValue();
622 uint64_t S =
DL.getTypeAllocSize(GTI.getIndexedType());
629 if (canFoldAddIntoGEP(U,
Op)) {
632 cast<ConstantInt>(cast<AddOperator>(
Op)->getOperand(1));
635 Op = cast<AddOperator>(
Op)->getOperand(0);
639 goto unsupported_gep;
645 Addr.setOffset(TmpOffset);
655 case Instruction::Alloca: {
658 FuncInfo.StaticAllocaMap.find(AI);
659 if (
SI != FuncInfo.StaticAllocaMap.end()) {
660 Addr.setKind(Address::FrameIndexBase);
671 if (isa<ConstantInt>(
LHS))
676 return computeAddress(
LHS,
Addr, Ty);
680 if (computeAddress(
LHS,
Addr, Ty) && computeAddress(
RHS,
Addr, Ty))
686 case Instruction::Sub: {
693 return computeAddress(
LHS,
Addr, Ty);
697 case Instruction::Shl: {
698 if (
Addr.getOffsetReg())
701 const auto *CI = dyn_cast<ConstantInt>(U->
getOperand(1));
706 if (Val < 1 || Val > 3)
712 NumBytes = NumBits / 8;
717 if (NumBytes != (1ULL << Val))
724 if (
const auto *
I = dyn_cast<Instruction>(Src)) {
725 if (FuncInfo.MBBMap[
I->getParent()] == FuncInfo.MBB) {
727 if (
const auto *ZE = dyn_cast<ZExtInst>(
I)) {
729 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
731 Src = ZE->getOperand(0);
733 }
else if (
const auto *SE = dyn_cast<SExtInst>(
I)) {
735 SE->getOperand(0)->getType()->isIntegerTy(32)) {
737 Src = SE->getOperand(0);
743 if (
const auto *AI = dyn_cast<BinaryOperator>(Src))
744 if (AI->
getOpcode() == Instruction::And) {
748 if (
const auto *
C = dyn_cast<ConstantInt>(
LHS))
749 if (
C->getValue() == 0xffffffff)
752 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS))
753 if (
C->getValue() == 0xffffffff) {
771 if (
Addr.getOffsetReg())
781 if (
const auto *
C = dyn_cast<ConstantInt>(
LHS))
782 if (
C->getValue().isPowerOf2())
785 assert(isa<ConstantInt>(
RHS) &&
"Expected an ConstantInt.");
786 const auto *
C = cast<ConstantInt>(
RHS);
787 unsigned Val =
C->getValue().logBase2();
788 if (Val < 1 || Val > 3)
794 NumBytes = NumBits / 8;
799 if (NumBytes != (1ULL << Val))
806 if (
const auto *
I = dyn_cast<Instruction>(Src)) {
807 if (FuncInfo.MBBMap[
I->getParent()] == FuncInfo.MBB) {
809 if (
const auto *ZE = dyn_cast<ZExtInst>(
I)) {
811 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
813 Src = ZE->getOperand(0);
815 }
else if (
const auto *SE = dyn_cast<SExtInst>(
I)) {
817 SE->getOperand(0)->getType()->isIntegerTy(32)) {
819 Src = SE->getOperand(0);
831 case Instruction::And: {
832 if (
Addr.getOffsetReg())
835 if (!Ty ||
DL.getTypeSizeInBits(Ty) != 8)
841 if (
const auto *
C = dyn_cast<ConstantInt>(
LHS))
842 if (
C->getValue() == 0xffffffff)
845 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS))
846 if (
C->getValue() == 0xffffffff) {
860 case Instruction::SExt:
861 case Instruction::ZExt: {
862 if (!
Addr.getReg() ||
Addr.getOffsetReg())
865 const Value *Src =
nullptr;
867 if (
const auto *ZE = dyn_cast<ZExtInst>(U)) {
868 if (!
isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
870 Src = ZE->getOperand(0);
872 }
else if (
const auto *SE = dyn_cast<SExtInst>(U)) {
873 if (!
isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
875 Src = SE->getOperand(0);
891 if (
Addr.isRegBase() && !
Addr.getReg()) {
899 if (!
Addr.getOffsetReg()) {
910 bool AArch64FastISel::computeCallAddress(
const Value *V, Address &
Addr) {
911 const User *U =
nullptr;
912 unsigned Opcode = Instruction::UserOp1;
915 if (
const auto *
I = dyn_cast<Instruction>(V)) {
916 Opcode =
I->getOpcode();
918 InMBB =
I->getParent() == FuncInfo.MBB->getBasicBlock();
919 }
else if (
const auto *
C = dyn_cast<ConstantExpr>(V)) {
920 Opcode =
C->getOpcode();
926 case Instruction::BitCast:
931 case Instruction::IntToPtr:
935 TLI.getPointerTy(
DL))
938 case Instruction::PtrToInt:
940 if (InMBB && TLI.getValueType(
DL, U->
getType()) == TLI.getPointerTy(
DL))
945 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
946 Addr.setGlobalValue(GV);
951 if (!
Addr.getGlobalValue()) {
952 Addr.setReg(getRegForValue(V));
953 return Addr.getReg() != 0;
959 bool AArch64FastISel::isTypeLegal(
Type *Ty,
MVT &VT) {
960 EVT evt = TLI.getValueType(
DL, Ty,
true);
976 return TLI.isTypeLegal(VT);
983 bool AArch64FastISel::isTypeSupported(
Type *Ty,
MVT &VT,
bool IsVectorAllowed) {
987 if (isTypeLegal(Ty, VT))
998 bool AArch64FastISel::isValueAvailable(
const Value *V)
const {
999 if (!isa<Instruction>(V))
1002 const auto *
I = cast<Instruction>(V);
1003 return FuncInfo.MBBMap[
I->getParent()] == FuncInfo.MBB;
1006 bool AArch64FastISel::simplifyAddress(Address &
Addr,
MVT VT) {
1014 bool ImmediateOffsetNeedsLowering =
false;
1015 bool RegisterOffsetNeedsLowering =
false;
1017 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1018 ImmediateOffsetNeedsLowering =
true;
1019 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1020 !isUInt<12>(Offset / ScaleFactor))
1021 ImmediateOffsetNeedsLowering =
true;
1026 if (!ImmediateOffsetNeedsLowering &&
Addr.getOffset() &&
Addr.getOffsetReg())
1027 RegisterOffsetNeedsLowering =
true;
1030 if (
Addr.isRegBase() &&
Addr.getOffsetReg() && !
Addr.getReg())
1031 RegisterOffsetNeedsLowering =
true;
1036 if ((ImmediateOffsetNeedsLowering ||
Addr.getOffsetReg()) &&
Addr.isFIBase())
1038 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1039 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::ADDXri),
1044 Addr.setKind(Address::RegBase);
1045 Addr.setReg(ResultReg);
1048 if (RegisterOffsetNeedsLowering) {
1049 unsigned ResultReg = 0;
1050 if (
Addr.getReg()) {
1053 ResultReg = emitAddSub_rx(
true,
MVT::i64,
Addr.getReg(),
1054 Addr.getOffsetReg(),
Addr.getExtendType(),
1057 ResultReg = emitAddSub_rs(
true,
MVT::i64,
Addr.getReg(),
1063 Addr.getShift(),
true);
1066 Addr.getShift(),
false);
1074 Addr.setReg(ResultReg);
1075 Addr.setOffsetReg(0);
1082 if (ImmediateOffsetNeedsLowering) {
1086 ResultReg = emitAdd_ri_(
MVT::i64,
Addr.getReg(), Offset);
1092 Addr.setReg(ResultReg);
1098 void AArch64FastISel::addLoadStoreOperands(Address &
Addr,
1101 unsigned ScaleFactor,
1103 int64_t
Offset =
Addr.getOffset() / ScaleFactor;
1105 if (
Addr.isFIBase()) {
1106 int FI =
Addr.getFI();
1109 MMO = FuncInfo.MF->getMachineMemOperand(
1111 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1115 assert(
Addr.isRegBase() &&
"Unexpected address kind.");
1122 if (
Addr.getOffsetReg()) {
1123 assert(
Addr.getOffset() == 0 &&
"Unexpected offset");
1138 unsigned AArch64FastISel::emitAddSub(
bool UseAdd,
MVT RetVT,
const Value *
LHS,
1140 bool WantResult,
bool IsZExt) {
1142 bool NeedExtend =
false;
1165 if (UseAdd && isa<Constant>(
LHS) && !isa<Constant>(
RHS))
1175 if (
const auto *
SI = dyn_cast<BinaryOperator>(
LHS))
1176 if (isa<ConstantInt>(
SI->getOperand(1)))
1177 if (
SI->getOpcode() == Instruction::Shl ||
1178 SI->getOpcode() == Instruction::LShr ||
1179 SI->getOpcode() == Instruction::AShr )
1187 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1189 unsigned ResultReg = 0;
1190 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS)) {
1191 uint64_t Imm = IsZExt ?
C->getZExtValue() :
C->getSExtValue();
1192 if (
C->isNegative())
1193 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1196 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1198 }
else if (
const auto *
C = dyn_cast<Constant>(
RHS))
1199 if (
C->isNullValue())
1200 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1207 isValueAvailable(
RHS)) {
1208 if (
const auto *
SI = dyn_cast<BinaryOperator>(
RHS))
1209 if (
const auto *
C = dyn_cast<ConstantInt>(
SI->getOperand(1)))
1210 if ((
SI->getOpcode() == Instruction::Shl) && (
C->getZExtValue() < 4)) {
1211 Register RHSReg = getRegForValue(
SI->getOperand(0));
1214 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1215 C->getZExtValue(), SetFlags, WantResult);
1220 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1221 SetFlags, WantResult);
1227 const Value *MulLHS = cast<MulOperator>(
RHS)->getOperand(0);
1228 const Value *MulRHS = cast<MulOperator>(
RHS)->getOperand(1);
1230 if (
const auto *
C = dyn_cast<ConstantInt>(MulLHS))
1231 if (
C->getValue().isPowerOf2())
1234 assert(isa<ConstantInt>(MulRHS) &&
"Expected a ConstantInt.");
1235 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1236 Register RHSReg = getRegForValue(MulLHS);
1239 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg,
AArch64_AM::LSL,
1240 ShiftVal, SetFlags, WantResult);
1248 if (
const auto *
SI = dyn_cast<BinaryOperator>(
RHS)) {
1249 if (
const auto *
C = dyn_cast<ConstantInt>(
SI->getOperand(1))) {
1251 switch (
SI->getOpcode()) {
1259 Register RHSReg = getRegForValue(
SI->getOperand(0));
1262 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1263 ShiftVal, SetFlags, WantResult);
1276 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1278 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1281 unsigned AArch64FastISel::emitAddSub_rr(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1282 unsigned RHSReg,
bool SetFlags,
1284 assert(LHSReg && RHSReg &&
"Invalid register number.");
1286 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1287 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1293 static const unsigned OpcTable[2][2][2] = {
1294 { { AArch64::SUBWrr, AArch64::SUBXrr },
1295 { AArch64::ADDWrr, AArch64::ADDXrr } },
1296 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1297 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1300 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1305 ResultReg = createResultReg(RC);
1307 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1312 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1318 unsigned AArch64FastISel::emitAddSub_ri(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1321 assert(LHSReg &&
"Invalid register number.");
1327 if (isUInt<12>(Imm))
1329 else if ((Imm & 0xfff000) == Imm) {
1335 static const unsigned OpcTable[2][2][2] = {
1336 { { AArch64::SUBWri, AArch64::SUBXri },
1337 { AArch64::ADDWri, AArch64::ADDXri } },
1338 { { AArch64::SUBSWri, AArch64::SUBSXri },
1339 { AArch64::ADDSWri, AArch64::ADDSXri } }
1342 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1345 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1350 ResultReg = createResultReg(RC);
1352 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1363 unsigned AArch64FastISel::emitAddSub_rs(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1368 assert(LHSReg && RHSReg &&
"Invalid register number.");
1369 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1370 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1379 static const unsigned OpcTable[2][2][2] = {
1380 { { AArch64::SUBWrs, AArch64::SUBXrs },
1381 { AArch64::ADDWrs, AArch64::ADDXrs } },
1382 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1383 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1386 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1388 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391 ResultReg = createResultReg(RC);
1393 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1398 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1405 unsigned AArch64FastISel::emitAddSub_rx(
bool UseAdd,
MVT RetVT,
unsigned LHSReg,
1410 assert(LHSReg && RHSReg &&
"Invalid register number.");
1411 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1412 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1420 static const unsigned OpcTable[2][2][2] = {
1421 { { AArch64::SUBWrx, AArch64::SUBXrx },
1422 { AArch64::ADDWrx, AArch64::ADDXrx } },
1423 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1424 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1427 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1430 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1432 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1435 ResultReg = createResultReg(RC);
1437 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1442 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1451 EVT EVT = TLI.getValueType(
DL, Ty,
true);
1464 return emitICmp(VT,
LHS,
RHS, IsZExt);
1467 return emitFCmp(VT,
LHS,
RHS);
1473 return emitSub(RetVT,
LHS,
RHS,
true,
false,
1477 bool AArch64FastISel::emitICmp_ri(
MVT RetVT,
unsigned LHSReg,
uint64_t Imm) {
1478 return emitAddSub_ri(
false, RetVT, LHSReg, Imm,
1488 bool UseImm =
false;
1489 if (
const auto *CFP = dyn_cast<ConstantFP>(
RHS))
1498 unsigned Opc = (RetVT ==
MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc))
1508 unsigned Opc = (RetVT ==
MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1509 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc))
1516 bool SetFlags,
bool WantResult,
bool IsZExt) {
1517 return emitAddSub(
true, RetVT,
LHS,
RHS, SetFlags, WantResult,
1526 unsigned AArch64FastISel::emitAdd_ri_(
MVT VT,
unsigned Op0, int64_t Imm) {
1529 ResultReg = emitAddSub_ri(
false, VT, Op0, -Imm);
1531 ResultReg = emitAddSub_ri(
true, VT, Op0, Imm);
1540 ResultReg = emitAddSub_rr(
true, VT, Op0, CReg);
1545 bool SetFlags,
bool WantResult,
bool IsZExt) {
1546 return emitAddSub(
false, RetVT,
LHS,
RHS, SetFlags, WantResult,
1550 unsigned AArch64FastISel::emitSubs_rr(
MVT RetVT,
unsigned LHSReg,
1551 unsigned RHSReg,
bool WantResult) {
1552 return emitAddSub_rr(
false, RetVT, LHSReg, RHSReg,
1556 unsigned AArch64FastISel::emitSubs_rs(
MVT RetVT,
unsigned LHSReg,
1559 uint64_t ShiftImm,
bool WantResult) {
1560 return emitAddSub_rs(
false, RetVT, LHSReg, RHSReg, ShiftType,
1561 ShiftImm,
true, WantResult);
1564 unsigned AArch64FastISel::emitLogicalOp(
unsigned ISDOpc,
MVT RetVT,
1567 if (isa<ConstantInt>(
LHS) && !isa<ConstantInt>(
RHS))
1577 if (
const auto *
SI = dyn_cast<ShlOperator>(
LHS))
1578 if (isa<ConstantInt>(
SI->getOperand(1)))
1585 unsigned ResultReg = 0;
1586 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS)) {
1588 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1596 const Value *MulLHS = cast<MulOperator>(
RHS)->getOperand(0);
1597 const Value *MulRHS = cast<MulOperator>(
RHS)->getOperand(1);
1599 if (
const auto *
C = dyn_cast<ConstantInt>(MulLHS))
1600 if (
C->getValue().isPowerOf2())
1603 assert(isa<ConstantInt>(MulRHS) &&
"Expected a ConstantInt.");
1604 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1606 Register RHSReg = getRegForValue(MulLHS);
1609 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1617 if (
const auto *
SI = dyn_cast<ShlOperator>(
RHS))
1618 if (
const auto *
C = dyn_cast<ConstantInt>(
SI->getOperand(1))) {
1620 Register RHSReg = getRegForValue(
SI->getOperand(0));
1623 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1634 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1642 unsigned AArch64FastISel::emitLogicalOp_ri(
unsigned ISDOpc,
MVT RetVT,
1645 "ISD nodes are not consecutive!");
1646 static const unsigned OpcTable[3][2] = {
1647 { AArch64::ANDWri, AArch64::ANDXri },
1648 { AArch64::ORRWri, AArch64::ORRXri },
1649 { AArch64::EORWri, AArch64::EORXri }
1662 Opc = OpcTable[Idx][0];
1663 RC = &AArch64::GPR32spRegClass;
1668 Opc = OpcTable[ISDOpc -
ISD::AND][1];
1669 RC = &AArch64::GPR64spRegClass;
1678 fastEmitInst_ri(Opc, RC, LHSReg,
1687 unsigned AArch64FastISel::emitLogicalOp_rs(
unsigned ISDOpc,
MVT RetVT,
1688 unsigned LHSReg,
unsigned RHSReg,
1691 "ISD nodes are not consecutive!");
1692 static const unsigned OpcTable[3][2] = {
1693 { AArch64::ANDWrs, AArch64::ANDXrs },
1694 { AArch64::ORRWrs, AArch64::ORRXrs },
1695 { AArch64::EORWrs, AArch64::EORXrs }
1711 Opc = OpcTable[ISDOpc -
ISD::AND][0];
1712 RC = &AArch64::GPR32RegClass;
1715 Opc = OpcTable[ISDOpc -
ISD::AND][1];
1716 RC = &AArch64::GPR64RegClass;
1720 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1729 unsigned AArch64FastISel::emitAnd_ri(
MVT RetVT,
unsigned LHSReg,
1731 return emitLogicalOp_ri(
ISD::AND, RetVT, LHSReg, Imm);
1736 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1740 if (!simplifyAddress(
Addr, VT))
1749 bool UseScaled =
true;
1750 if ((
Addr.getOffset() < 0) || (
Addr.getOffset() & (ScaleFactor - 1))) {
1755 static const unsigned GPOpcTable[2][8][4] = {
1757 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1759 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1761 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1763 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1765 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1767 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1769 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1771 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1775 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1777 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1779 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1781 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1783 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1785 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1787 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1789 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1794 static const unsigned FPOpcTable[4][2] = {
1795 { AArch64::LDURSi, AArch64::LDURDi },
1796 { AArch64::LDRSui, AArch64::LDRDui },
1797 { AArch64::LDRSroX, AArch64::LDRDroX },
1798 { AArch64::LDRSroW, AArch64::LDRDroW }
1803 bool UseRegOffset =
Addr.isRegBase() && !
Addr.getOffset() &&
Addr.getReg() &&
1804 Addr.getOffsetReg();
1805 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1810 bool IsRet64Bit = RetVT ==
MVT::i64;
1816 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1817 RC = (IsRet64Bit && !WantZExt) ?
1818 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1821 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1822 RC = (IsRet64Bit && !WantZExt) ?
1823 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1826 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1827 RC = (IsRet64Bit && !WantZExt) ?
1828 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1831 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1832 RC = &AArch64::GPR64RegClass;
1835 Opc = FPOpcTable[Idx][0];
1836 RC = &AArch64::FPR32RegClass;
1839 Opc = FPOpcTable[Idx][1];
1840 RC = &AArch64::FPR64RegClass;
1845 Register ResultReg = createResultReg(RC);
1847 TII.get(Opc), ResultReg);
1852 unsigned ANDReg = emitAnd_ri(
MVT::i32, ResultReg, 1);
1853 assert(ANDReg &&
"Unexpected AND instruction emission failure.");
1860 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1861 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1862 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1865 .
addImm(AArch64::sub_32);
1871 bool AArch64FastISel::selectAddSub(
const Instruction *
I) {
1873 if (!isTypeSupported(
I->getType(), VT,
true))
1877 return selectOperator(
I,
I->getOpcode());
1880 switch (
I->getOpcode()) {
1884 ResultReg = emitAdd(VT,
I->getOperand(0),
I->getOperand(1));
1886 case Instruction::Sub:
1887 ResultReg = emitSub(VT,
I->getOperand(0),
I->getOperand(1));
1893 updateValueMap(
I, ResultReg);
1897 bool AArch64FastISel::selectLogicalOp(
const Instruction *
I) {
1899 if (!isTypeSupported(
I->getType(), VT,
true))
1903 return selectOperator(
I,
I->getOpcode());
1906 switch (
I->getOpcode()) {
1909 case Instruction::And:
1910 ResultReg = emitLogicalOp(
ISD::AND, VT,
I->getOperand(0),
I->getOperand(1));
1912 case Instruction::Or:
1913 ResultReg = emitLogicalOp(
ISD::OR, VT,
I->getOperand(0),
I->getOperand(1));
1915 case Instruction::Xor:
1916 ResultReg = emitLogicalOp(
ISD::XOR, VT,
I->getOperand(0),
I->getOperand(1));
1922 updateValueMap(
I, ResultReg);
1926 bool AArch64FastISel::selectLoad(
const Instruction *
I) {
1931 if (!isTypeSupported(
I->getType(), VT,
true) ||
1932 cast<LoadInst>(
I)->isAtomic())
1935 const Value *SV =
I->getOperand(0);
1936 if (TLI.supportSwiftError()) {
1939 if (
const Argument *
Arg = dyn_cast<Argument>(SV)) {
1940 if (
Arg->hasSwiftErrorAttr())
1944 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1945 if (Alloca->isSwiftError())
1952 if (!computeAddress(
I->getOperand(0),
Addr,
I->getType()))
1956 bool WantZExt =
true;
1958 const Value *IntExtVal =
nullptr;
1959 if (
I->hasOneUse()) {
1960 if (
const auto *ZE = dyn_cast<ZExtInst>(
I->use_begin()->getUser())) {
1961 if (isTypeSupported(ZE->getType(), RetVT))
1965 }
else if (
const auto *SE = dyn_cast<SExtInst>(
I->use_begin()->getUser())) {
1966 if (isTypeSupported(SE->getType(), RetVT))
1974 unsigned ResultReg =
1975 emitLoad(VT, RetVT,
Addr, WantZExt, createMachineMemOperandFor(
I));
2002 ResultReg = std::prev(
I)->getOperand(0).getReg();
2003 removeDeadCode(
I, std::next(
I));
2005 ResultReg = fastEmitInst_extractsubreg(
MVT::i32, ResultReg,
2008 updateValueMap(
I, ResultReg);
2017 for (
auto &Opnd :
MI->uses()) {
2019 Reg = Opnd.getReg();
2024 removeDeadCode(
I, std::next(
I));
2029 updateValueMap(IntExtVal, ResultReg);
2033 updateValueMap(
I, ResultReg);
2037 bool AArch64FastISel::emitStoreRelease(
MVT VT,
unsigned SrcReg,
2042 default:
return false;
2043 case MVT::i8: Opc = AArch64::STLRB;
break;
2044 case MVT::i16: Opc = AArch64::STLRH;
break;
2045 case MVT::i32: Opc = AArch64::STLRW;
break;
2046 case MVT::i64: Opc = AArch64::STLRX;
break;
2052 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2061 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2065 if (!simplifyAddress(
Addr, VT))
2074 bool UseScaled =
true;
2075 if ((
Addr.getOffset() < 0) || (
Addr.getOffset() & (ScaleFactor - 1))) {
2080 static const unsigned OpcTable[4][6] = {
2081 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2082 AArch64::STURSi, AArch64::STURDi },
2083 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2084 AArch64::STRSui, AArch64::STRDui },
2085 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2086 AArch64::STRSroX, AArch64::STRDroX },
2087 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2088 AArch64::STRSroW, AArch64::STRDroW }
2092 bool VTIsi1 =
false;
2093 bool UseRegOffset =
Addr.isRegBase() && !
Addr.getOffset() &&
Addr.getReg() &&
2094 Addr.getOffsetReg();
2095 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2103 case MVT::i8: Opc = OpcTable[Idx][0];
break;
2104 case MVT::i16: Opc = OpcTable[Idx][1];
break;
2105 case MVT::i32: Opc = OpcTable[Idx][2];
break;
2106 case MVT::i64: Opc = OpcTable[Idx][3];
break;
2107 case MVT::f32: Opc = OpcTable[Idx][4];
break;
2108 case MVT::f64: Opc = OpcTable[Idx][5];
break;
2112 if (VTIsi1 && SrcReg != AArch64::WZR) {
2113 unsigned ANDReg = emitAnd_ri(
MVT::i32, SrcReg, 1);
2114 assert(ANDReg &&
"Unexpected AND instruction emission failure.");
2121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).
addReg(SrcReg);
2127 bool AArch64FastISel::selectStore(
const Instruction *
I) {
2129 const Value *Op0 =
I->getOperand(0);
2133 if (!isTypeSupported(Op0->
getType(), VT,
true))
2136 const Value *PtrV =
I->getOperand(1);
2137 if (TLI.supportSwiftError()) {
2140 if (
const Argument *
Arg = dyn_cast<Argument>(PtrV)) {
2141 if (
Arg->hasSwiftErrorAttr())
2145 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2146 if (Alloca->isSwiftError())
2153 unsigned SrcReg = 0;
2154 if (
const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2156 SrcReg = (VT ==
MVT::i64) ? AArch64::XZR : AArch64::WZR;
2157 }
else if (
const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2158 if (CF->isZero() && !CF->isNegative()) {
2160 SrcReg = (VT ==
MVT::i64) ? AArch64::XZR : AArch64::WZR;
2165 SrcReg = getRegForValue(Op0);
2170 auto *
SI = cast<StoreInst>(
I);
2173 if (
SI->isAtomic()) {
2178 Register AddrReg = getRegForValue(PtrV);
2179 return emitStoreRelease(VT, SrcReg, AddrReg,
2180 createMachineMemOperandFor(
I));
2241 bool AArch64FastISel::emitCompareAndBranch(
const BranchInst *BI) {
2245 if (FuncInfo.MF->getFunction().hasFnAttribute(
2246 Attribute::SpeculativeLoadHardening))
2268 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2280 if (isa<Constant>(
LHS) && cast<Constant>(
LHS)->isNullValue())
2283 if (!isa<Constant>(
RHS) || !cast<Constant>(
RHS)->isNullValue())
2286 if (
const auto *AI = dyn_cast<BinaryOperator>(
LHS))
2287 if (AI->
getOpcode() == Instruction::And && isValueAvailable(AI)) {
2291 if (
const auto *
C = dyn_cast<ConstantInt>(AndLHS))
2292 if (
C->getValue().isPowerOf2())
2295 if (
const auto *
C = dyn_cast<ConstantInt>(AndRHS))
2296 if (
C->getValue().isPowerOf2()) {
2297 TestBit =
C->getValue().logBase2();
2309 if (!isa<Constant>(
RHS) || !cast<Constant>(
RHS)->isNullValue())
2317 if (!isa<ConstantInt>(
RHS))
2320 if (cast<ConstantInt>(
RHS)->getValue() !=
APInt(BW, -1,
true))
2328 static const unsigned OpcTable[2][2][2] = {
2329 { {AArch64::CBZW, AArch64::CBZX },
2330 {AArch64::CBNZW, AArch64::CBNZX} },
2331 { {AArch64::TBZW, AArch64::TBZX },
2332 {AArch64::TBNZW, AArch64::TBNZX} }
2335 bool IsBitTest = TestBit != -1;
2336 bool Is64Bit = BW == 64;
2337 if (TestBit < 32 && TestBit >= 0)
2340 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2347 if (BW == 64 && !Is64Bit)
2348 SrcReg = fastEmitInst_extractsubreg(
MVT::i32, SrcReg, AArch64::sub_32);
2350 if ((BW < 32) && !IsBitTest)
2351 SrcReg = emitIntExt(VT, SrcReg,
MVT::i32,
true);
2356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc))
2362 finishCondBranch(BI->
getParent(), TBB, FBB);
2366 bool AArch64FastISel::selectBranch(
const Instruction *
I) {
2378 if (CI->
hasOneUse() && isValueAvailable(CI)) {
2385 fastEmitBranch(FBB, DbgLoc);
2388 fastEmitBranch(TBB, DbgLoc);
2393 if (emitCompareAndBranch(BI))
2397 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::Bcc))
2432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::Bcc))
2436 finishCondBranch(BI->
getParent(), TBB, FBB);
2439 }
else if (
const auto *CI = dyn_cast<ConstantInt>(BI->
getCondition())) {
2451 FuncInfo.MBB->addSuccessorWithoutProb(
Target);
2463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::Bcc))
2467 finishCondBranch(BI->
getParent(), TBB, FBB);
2477 unsigned Opcode = AArch64::TBNZW;
2478 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2480 Opcode = AArch64::TBZW;
2486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2487 .
addReg(ConstrainedCondReg)
2491 finishCondBranch(BI->
getParent(), TBB, FBB);
2495 bool AArch64FastISel::selectIndirectBr(
const Instruction *
I) {
2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).
addReg(AddrReg);
2508 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2513 bool AArch64FastISel::selectCmp(
const Instruction *
I) {
2514 const CmpInst *CI = cast<CmpInst>(
I);
2522 unsigned ResultReg = 0;
2527 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2529 TII.get(TargetOpcode::COPY), ResultReg)
2538 updateValueMap(
I, ResultReg);
2546 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550 static unsigned CondCodeTable[2][2] = {
2567 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2568 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::CSINCWr),
2573 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::CSINCWr),
2579 updateValueMap(
I, ResultReg);
2587 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::CSINCWr),
2593 updateValueMap(
I, ResultReg);
2600 if (!
SI->getType()->isIntegerTy(1))
2603 const Value *Src1Val, *Src2Val;
2605 bool NeedExtraOp =
false;
2606 if (
auto *CI = dyn_cast<ConstantInt>(
SI->getTrueValue())) {
2608 Src1Val =
SI->getCondition();
2609 Src2Val =
SI->getFalseValue();
2610 Opc = AArch64::ORRWrr;
2613 Src1Val =
SI->getFalseValue();
2614 Src2Val =
SI->getCondition();
2615 Opc = AArch64::BICWrr;
2617 }
else if (
auto *CI = dyn_cast<ConstantInt>(
SI->getFalseValue())) {
2619 Src1Val =
SI->getCondition();
2620 Src2Val =
SI->getTrueValue();
2621 Opc = AArch64::ORRWrr;
2625 Src1Val =
SI->getCondition();
2626 Src2Val =
SI->getTrueValue();
2627 Opc = AArch64::ANDWrr;
2634 Register Src1Reg = getRegForValue(Src1Val);
2638 Register Src2Reg = getRegForValue(Src2Val);
2645 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2647 updateValueMap(
SI, ResultReg);
2651 bool AArch64FastISel::selectSelect(
const Instruction *
I) {
2652 assert(isa<SelectInst>(
I) &&
"Expected a select instruction.");
2654 if (!isTypeSupported(
I->getType(), VT))
2666 Opc = AArch64::CSELWr;
2667 RC = &AArch64::GPR32RegClass;
2670 Opc = AArch64::CSELXr;
2671 RC = &AArch64::GPR64RegClass;
2674 Opc = AArch64::FCSELSrrr;
2675 RC = &AArch64::FPR32RegClass;
2678 Opc = AArch64::FCSELDrrr;
2679 RC = &AArch64::FPR64RegClass;
2692 if (foldXALUIntrinsic(CC,
I,
Cond)) {
2698 isValueAvailable(
Cond)) {
2699 const auto *
Cmp = cast<CmpInst>(
Cond);
2702 const Value *FoldSelect =
nullptr;
2707 FoldSelect =
SI->getFalseValue();
2710 FoldSelect =
SI->getTrueValue();
2715 Register SrcReg = getRegForValue(FoldSelect);
2719 updateValueMap(
I, SrcReg);
2751 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2757 Register Src1Reg = getRegForValue(
SI->getTrueValue());
2758 Register Src2Reg = getRegForValue(
SI->getFalseValue());
2760 if (!Src1Reg || !Src2Reg)
2764 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2766 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2767 updateValueMap(
I, ResultReg);
2771 bool AArch64FastISel::selectFPExt(
const Instruction *
I) {
2772 Value *V =
I->getOperand(0);
2780 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2781 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::FCVTDSr),
2783 updateValueMap(
I, ResultReg);
2787 bool AArch64FastISel::selectFPTrunc(
const Instruction *
I) {
2788 Value *V =
I->getOperand(0);
2796 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2797 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::FCVTSDr),
2799 updateValueMap(
I, ResultReg);
2806 if (!isTypeLegal(
I->getType(), DestVT) || DestVT.
isVector())
2809 Register SrcReg = getRegForValue(
I->getOperand(0));
2813 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType(),
true);
2820 Opc = (DestVT ==
MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2822 Opc = (DestVT ==
MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2825 Opc = (DestVT ==
MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2827 Opc = (DestVT ==
MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2829 Register ResultReg = createResultReg(
2830 DestVT ==
MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2831 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
2833 updateValueMap(
I, ResultReg);
2839 if (!isTypeLegal(
I->getType(), DestVT) || DestVT.
isVector())
2846 "Unexpected value type.");
2848 Register SrcReg = getRegForValue(
I->getOperand(0));
2852 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType(),
true);
2865 Opc = (DestVT ==
MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2867 Opc = (DestVT ==
MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2870 Opc = (DestVT ==
MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2872 Opc = (DestVT ==
MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2875 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2876 updateValueMap(
I, ResultReg);
2880 bool AArch64FastISel::fastLowerArguments() {
2881 if (!FuncInfo.CanLowerReturn)
2896 unsigned GPRCnt = 0;
2897 unsigned FPRCnt = 0;
2898 for (
auto const &
Arg :
F->args()) {
2899 if (
Arg.hasAttribute(Attribute::ByVal) ||
2900 Arg.hasAttribute(Attribute::InReg) ||
2901 Arg.hasAttribute(Attribute::StructRet) ||
2902 Arg.hasAttribute(Attribute::SwiftSelf) ||
2903 Arg.hasAttribute(Attribute::SwiftAsync) ||
2904 Arg.hasAttribute(Attribute::SwiftError) ||
2905 Arg.hasAttribute(Attribute::Nest))
2912 EVT ArgVT = TLI.getValueType(
DL, ArgTy);
2932 if (GPRCnt > 8 || FPRCnt > 8)
2937 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2938 AArch64::W5, AArch64::W6, AArch64::W7 },
2939 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2940 AArch64::X5, AArch64::X6, AArch64::X7 },
2941 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2942 AArch64::H5, AArch64::H6, AArch64::H7 },
2943 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2944 AArch64::S5, AArch64::S6, AArch64::S7 },
2945 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2946 AArch64::D5, AArch64::D6, AArch64::D7 },
2947 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2948 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2952 unsigned FPRIdx = 0;
2953 for (
auto const &
Arg :
F->args()) {
2954 MVT VT = TLI.getSimpleValueType(
DL,
Arg.getType());
2959 RC = &AArch64::GPR32RegClass;
2963 RC = &AArch64::GPR64RegClass;
2966 RC = &AArch64::FPR16RegClass;
2969 RC = &AArch64::FPR32RegClass;
2972 RC = &AArch64::FPR64RegClass;
2975 RC = &AArch64::FPR128RegClass;
2979 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2983 Register ResultReg = createResultReg(RC);
2984 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2985 TII.get(TargetOpcode::COPY), ResultReg)
2987 updateValueMap(&
Arg, ResultReg);
2992 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2994 unsigned &NumBytes) {
2998 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3001 NumBytes = CCInfo.getNextStackOffset();
3004 unsigned AdjStackDown =
TII.getCallFrameSetupOpcode();
3005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AdjStackDown))
3010 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3011 MVT ArgVT = OutVTs[VA.getValNo()];
3013 Register ArgReg = getRegForValue(ArgVal);
3018 switch (VA.getLocInfo()) {
3022 MVT DestVT = VA.getLocVT();
3024 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT,
false);
3032 MVT DestVT = VA.getLocVT();
3034 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT,
true);
3044 if (VA.isRegLoc() && !VA.needsCustom()) {
3045 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3046 TII.get(TargetOpcode::COPY), VA.getLocReg()).
addReg(ArgReg);
3047 CLI.OutRegs.push_back(VA.getLocReg());
3048 }
else if (VA.needsCustom()) {
3052 assert(VA.isMemLoc() &&
"Assuming store on stack.");
3055 if (isa<UndefValue>(ArgVal))
3061 unsigned BEAlign = 0;
3062 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3063 BEAlign = 8 - ArgSize;
3066 Addr.setKind(Address::RegBase);
3067 Addr.setReg(AArch64::SP);
3068 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3082 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI,
MVT RetVT,
3083 unsigned NumBytes) {
3087 unsigned AdjStackUp =
TII.getCallFrameDestroyOpcode();
3088 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AdjStackUp))
3095 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3098 if (RVLocs.size() != 1)
3102 MVT CopyVT = RVLocs[0].getValVT();
3108 Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3110 TII.get(TargetOpcode::COPY), ResultReg)
3111 .
addReg(RVLocs[0].getLocReg());
3112 CLI.InRegs.push_back(RVLocs[0].getLocReg());
3114 CLI.ResultReg = ResultReg;
3115 CLI.NumResultRegs = 1;
3121 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3123 bool IsTailCall = CLI.IsTailCall;
3124 bool IsVarArg = CLI.IsVarArg;
3133 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3134 !Subtarget->noBTIAtReturnTwice() &&
3162 if (CLI.RetTy->isVoidTy())
3164 else if (!isTypeLegal(CLI.RetTy, RetVT))
3167 for (
auto Flag : CLI.OutFlags)
3169 Flag.isSwiftSelf() ||
Flag.isSwiftAsync() ||
Flag.isSwiftError())
3174 OutVTs.
reserve(CLI.OutVals.size());
3176 for (
auto *Val : CLI.OutVals) {
3178 if (!isTypeLegal(Val->getType(), VT) &&
3186 OutVTs.push_back(VT);
3190 if (Callee && !computeCallAddress(Callee,
Addr))
3197 Addr.getGlobalValue()->hasExternalWeakLinkage())
3202 if (!processCallArgs(CLI, OutVTs, NumBytes))
3214 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3217 else if (
Addr.getGlobalValue())
3219 else if (
Addr.getReg()) {
3225 unsigned CallReg = 0;
3227 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3232 CallReg = createResultReg(&AArch64::GPR64RegClass);
3233 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3234 TII.get(AArch64::LDRXui), CallReg)
3238 }
else if (
Addr.getGlobalValue())
3239 CallReg = materializeGV(
Addr.getGlobalValue());
3240 else if (
Addr.getReg())
3241 CallReg =
Addr.getReg();
3248 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).
addReg(CallReg);
3252 for (
auto Reg : CLI.OutRegs)
3262 return finishCall(CLI, RetVT, NumBytes);
3265 bool AArch64FastISel::isMemCpySmall(
uint64_t Len,
unsigned Alignment) {
3272 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3273 uint64_t Len,
unsigned Alignment) {
3275 if (!isMemCpySmall(Len, Alignment))
3278 int64_t UnscaledOffset = 0;
3284 if (!Alignment || Alignment >= 8) {
3296 if (Len >= 4 && Alignment == 4)
3298 else if (Len >= 2 && Alignment == 2)
3305 unsigned ResultReg =
emitLoad(VT, VT, Src);
3314 UnscaledOffset +=
Size;
3317 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3318 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3329 if (!isa<ExtractValueInst>(
Cond))
3332 const auto *EV = cast<ExtractValueInst>(
Cond);
3333 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3336 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3340 cast<StructType>(
Callee->getReturnType())->getTypeAtIndex(0U);
3341 if (!isTypeLegal(RetTy, RetVT))
3347 const Value *
LHS = II->getArgOperand(0);
3348 const Value *
RHS = II->getArgOperand(1);
3351 if (isa<ConstantInt>(
LHS) && !isa<ConstantInt>(
RHS) && II->isCommutative())
3359 case Intrinsic::smul_with_overflow:
3360 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS))
3361 if (
C->getValue() == 2)
3362 IID = Intrinsic::sadd_with_overflow;
3364 case Intrinsic::umul_with_overflow:
3365 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS))
3366 if (
C->getValue() == 2)
3367 IID = Intrinsic::uadd_with_overflow;
3375 case Intrinsic::sadd_with_overflow:
3376 case Intrinsic::ssub_with_overflow:
3379 case Intrinsic::uadd_with_overflow:
3382 case Intrinsic::usub_with_overflow:
3385 case Intrinsic::smul_with_overflow:
3386 case Intrinsic::umul_with_overflow:
3392 if (!isValueAvailable(II))
3398 for (
auto Itr = std::prev(Start); Itr != End; --Itr) {
3401 if (!isa<ExtractValueInst>(Itr))
3405 const auto *EVI = cast<ExtractValueInst>(Itr);
3406 if (EVI->getAggregateOperand() != II)
3414 bool AArch64FastISel::fastLowerIntrinsicCall(
const IntrinsicInst *II) {
3417 default:
return false;
3418 case Intrinsic::frameaddress: {
3425 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3426 TII.get(TargetOpcode::COPY), SrcReg).
addReg(FramePtr);
3433 unsigned Depth = cast<ConstantInt>(II->
getOperand(0))->getZExtValue();
3435 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3437 assert(DestReg &&
"Unexpected LDR instruction emission failure.");
3441 updateValueMap(II, SrcReg);
3444 case Intrinsic::sponentry: {
3449 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3451 TII.get(AArch64::ADDXri), ResultReg)
3456 updateValueMap(II, ResultReg);
3460 case Intrinsic::memmove: {
3461 const auto *MTI = cast<MemTransferInst>(II);
3463 if (MTI->isVolatile())
3469 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3472 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3474 MTI->getSourceAlignment());
3475 if (isMemCpySmall(Len, Alignment)) {
3477 if (!computeAddress(MTI->getRawDest(), Dest) ||
3478 !computeAddress(MTI->getRawSource(), Src))
3480 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3485 if (!MTI->getLength()->getType()->isIntegerTy(64))
3488 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3493 const char *IntrMemName = isa<MemCpyInst>(II) ?
"memcpy" :
"memmove";
3494 return lowerCallTo(II, IntrMemName, II->
arg_size() - 1);
3496 case Intrinsic::memset: {
3497 const MemSetInst *MSI = cast<MemSetInst>(II);
3510 return lowerCallTo(II,
"memset", II->
arg_size() - 1);
3512 case Intrinsic::sin:
3513 case Intrinsic::cos:
3514 case Intrinsic::pow: {
3516 if (!isTypeLegal(II->
getType(), RetVT))
3523 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3524 { RTLIB::COS_F32, RTLIB::COS_F64 },
3525 { RTLIB::POW_F32, RTLIB::POW_F64 }
3532 case Intrinsic::sin:
3533 LC = LibCallTable[0][Is64Bit];
3535 case Intrinsic::cos:
3536 LC = LibCallTable[1][Is64Bit];
3538 case Intrinsic::pow:
3539 LC = LibCallTable[2][Is64Bit];
3547 for (
auto &
Arg : II->
args()) {
3550 Entry.Ty =
Arg->getType();
3551 Args.push_back(Entry);
3554 CallLoweringInfo CLI;
3556 CLI.setCallee(
DL, Ctx, TLI.getLibcallCallingConv(LC), II->
getType(),
3558 if (!lowerCallTo(CLI))
3560 updateValueMap(II, CLI.ResultReg);
3563 case Intrinsic::fabs: {
3565 if (!isTypeLegal(II->
getType(), VT))
3573 Opc = AArch64::FABSSr;
3576 Opc = AArch64::FABSDr;
3582 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3583 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
3585 updateValueMap(II, ResultReg);
3588 case Intrinsic::trap:
3589 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::BRK))
3592 case Intrinsic::debugtrap:
3593 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::BRK))
3597 case Intrinsic::sqrt: {
3601 if (!isTypeLegal(RetTy, VT))
3608 unsigned ResultReg = fastEmit_r(VT, VT,
ISD::FSQRT, Op0Reg);
3612 updateValueMap(II, ResultReg);
3615 case Intrinsic::sadd_with_overflow:
3616 case Intrinsic::uadd_with_overflow:
3617 case Intrinsic::ssub_with_overflow:
3618 case Intrinsic::usub_with_overflow:
3619 case Intrinsic::smul_with_overflow:
3620 case Intrinsic::umul_with_overflow: {
3623 auto *Ty = cast<StructType>(
Callee->getReturnType());
3624 Type *RetTy = Ty->getTypeAtIndex(0U);
3627 if (!isTypeLegal(RetTy, VT))
3644 case Intrinsic::smul_with_overflow:
3645 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS))
3646 if (
C->getValue() == 2) {
3647 IID = Intrinsic::sadd_with_overflow;
3651 case Intrinsic::umul_with_overflow:
3652 if (
const auto *
C = dyn_cast<ConstantInt>(
RHS))
3653 if (
C->getValue() == 2) {
3654 IID = Intrinsic::uadd_with_overflow;
3660 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3664 case Intrinsic::sadd_with_overflow:
3665 ResultReg1 = emitAdd(VT,
LHS,
RHS,
true);
3668 case Intrinsic::uadd_with_overflow:
3669 ResultReg1 = emitAdd(VT,
LHS,
RHS,
true);
3672 case Intrinsic::ssub_with_overflow:
3673 ResultReg1 = emitSub(VT,
LHS,
RHS,
true);
3676 case Intrinsic::usub_with_overflow:
3677 ResultReg1 = emitSub(VT,
LHS,
RHS,
true);
3680 case Intrinsic::smul_with_overflow: {
3691 MulReg = emitSMULL_rr(
MVT::i64, LHSReg, RHSReg);
3693 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3695 emitAddSub_rx(
false,
MVT::i64, MulReg, MulSubReg,
3703 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3704 unsigned SMULHReg = fastEmit_rr(VT, VT,
ISD::MULHS, LHSReg, RHSReg);
3710 case Intrinsic::umul_with_overflow: {
3721 MulReg = emitUMULL_rr(
MVT::i64, LHSReg, RHSReg);
3723 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3724 TII.get(AArch64::ANDSXri), AArch64::XZR)
3727 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3732 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3733 unsigned UMULHReg = fastEmit_rr(VT, VT,
ISD::MULHU, LHSReg, RHSReg);
3734 emitSubs_rr(VT, AArch64::XZR, UMULHReg,
false);
3741 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3742 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3743 TII.get(TargetOpcode::COPY), ResultReg1).
addReg(MulReg);
3749 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3750 AArch64::WZR, AArch64::WZR,
3753 assert((ResultReg1 + 1) == ResultReg2 &&
3754 "Nonconsecutive result registers.");
3755 updateValueMap(II, ResultReg1, 2);
3762 bool AArch64FastISel::selectRet(
const Instruction *
I) {
3764 const Function &
F = *
I->getParent()->getParent();
3766 if (!FuncInfo.CanLowerReturn)
3772 if (TLI.supportSwiftError() &&
3773 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3776 if (TLI.supportSplitCSR(FuncInfo.MF))
3782 if (
Ret->getNumOperands() > 0) {
3789 CCState CCInfo(CC,
F.isVarArg(), *FuncInfo.MF, ValLocs,
I->getContext());
3792 CCInfo.AnalyzeReturn(Outs, RetCC);
3795 if (ValLocs.size() != 1)
3799 const Value *RV =
Ret->getOperand(0);
3835 if (RVVT != DestVT) {
3839 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3842 bool IsZExt = Outs[0].Flags.isZExt();
3843 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3851 SrcReg = emitAnd_ri(
MVT::i64, SrcReg, 0xffffffff);
3854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3855 TII.get(TargetOpcode::COPY), DestReg).
addReg(SrcReg);
3862 TII.get(AArch64::RET_ReallyLR));
3863 for (
unsigned RetReg : RetRegs)
3868 bool AArch64FastISel::selectTrunc(
const Instruction *
I) {
3869 Type *DestTy =
I->getType();
3871 Type *SrcTy =
Op->getType();
3873 EVT SrcEVT = TLI.getValueType(
DL, SrcTy,
true);
3874 EVT DestEVT = TLI.getValueType(
DL, DestTy,
true);
3921 assert(ResultReg &&
"Unexpected AND instruction emission failure.");
3923 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3924 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3925 TII.get(TargetOpcode::COPY), ResultReg)
3929 updateValueMap(
I, ResultReg);
3933 unsigned AArch64FastISel::emiti1Ext(
unsigned SrcReg,
MVT DestVT,
bool IsZExt) {
3936 "Unexpected value type.");
3942 unsigned ResultReg = emitAnd_ri(
MVT::i32, SrcReg, 1);
3943 assert(ResultReg &&
"Unexpected AND instruction emission failure.");
3948 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3949 TII.get(AArch64::SUBREG_TO_REG), Reg64)
3952 .
addImm(AArch64::sub_32);
3961 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3966 unsigned AArch64FastISel::emitMul_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1) {
3974 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR;
break;
3976 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR;
break;
3980 (RetVT ==
MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3981 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3984 unsigned AArch64FastISel::emitSMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1) {
3988 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3989 Op0, Op1, AArch64::XZR);
3992 unsigned AArch64FastISel::emitUMULL_rr(
MVT RetVT,
unsigned Op0,
unsigned Op1) {
3996 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3997 Op0, Op1, AArch64::XZR);
4000 unsigned AArch64FastISel::emitLSL_rr(
MVT RetVT,
unsigned Op0Reg,
4003 bool NeedTrunc =
false;
4007 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc =
true;
Mask = 0xff;
break;
4008 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc =
true;
Mask = 0xffff;
break;
4009 case MVT::i32: Opc = AArch64::LSLVWr;
break;
4010 case MVT::i64: Opc = AArch64::LSLVXr;
break;
4014 (RetVT ==
MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4018 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4024 unsigned AArch64FastISel::emitLSL_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0,
4027 "Unexpected source/return type pair.");
4030 "Unexpected source value type.");
4032 RetVT ==
MVT::i64) &&
"Unexpected return value type.");
4034 bool Is64Bit = (RetVT ==
MVT::i64);
4035 unsigned RegSize = Is64Bit ? 64 : 32;
4039 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4043 if (RetVT == SrcVT) {
4044 Register ResultReg = createResultReg(RC);
4045 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4046 TII.get(TargetOpcode::COPY), ResultReg)
4050 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4054 if (
Shift >= DstBits)
4084 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 -
Shift);
4085 static const unsigned OpcTable[2][2] = {
4086 {AArch64::SBFMWri, AArch64::SBFMXri},
4087 {AArch64::UBFMWri, AArch64::UBFMXri}
4089 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4093 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4096 .
addImm(AArch64::sub_32);
4099 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4102 unsigned AArch64FastISel::emitLSR_rr(
MVT RetVT,
unsigned Op0Reg,
4105 bool NeedTrunc =
false;
4109 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc =
true;
Mask = 0xff;
break;
4110 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc =
true;
Mask = 0xffff;
break;
4111 case MVT::i32: Opc = AArch64::LSRVWr;
break;
4112 case MVT::i64: Opc = AArch64::LSRVXr;
break;
4116 (RetVT ==
MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4127 unsigned AArch64FastISel::emitLSR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0,
4130 "Unexpected source/return type pair.");
4133 "Unexpected source value type.");
4135 RetVT ==
MVT::i64) &&
"Unexpected return value type.");
4137 bool Is64Bit = (RetVT ==
MVT::i64);
4138 unsigned RegSize = Is64Bit ? 64 : 32;
4142 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4146 if (RetVT == SrcVT) {
4147 Register ResultReg = createResultReg(RC);
4148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4149 TII.get(TargetOpcode::COPY), ResultReg)
4153 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4157 if (
Shift >= DstBits)
4185 if (
Shift >= SrcBits && IsZExt)
4191 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4199 unsigned ImmR = std::min<unsigned>(SrcBits - 1,
Shift);
4200 unsigned ImmS = SrcBits - 1;
4201 static const unsigned OpcTable[2][2] = {
4202 {AArch64::SBFMWri, AArch64::SBFMXri},
4203 {AArch64::UBFMWri, AArch64::UBFMXri}
4205 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4208 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4209 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4212 .
addImm(AArch64::sub_32);
4215 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4218 unsigned AArch64FastISel::emitASR_rr(
MVT RetVT,
unsigned Op0Reg,
4221 bool NeedTrunc =
false;
4225 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc =
true;
Mask = 0xff;
break;
4226 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc =
true;
Mask = 0xffff;
break;
4227 case MVT::i32: Opc = AArch64::ASRVWr;
break;
4228 case MVT::i64: Opc = AArch64::ASRVXr;
break;
4232 (RetVT ==
MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4234 Op0Reg = emitIntExt(RetVT, Op0Reg,
MVT::i32,
false);
4237 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4243 unsigned AArch64FastISel::emitASR_ri(
MVT RetVT,
MVT SrcVT,
unsigned Op0,
4246 "Unexpected source/return type pair.");
4249 "Unexpected source value type.");
4251 RetVT ==
MVT::i64) &&
"Unexpected return value type.");
4253 bool Is64Bit = (RetVT ==
MVT::i64);
4254 unsigned RegSize = Is64Bit ? 64 : 32;
4258 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4262 if (RetVT == SrcVT) {
4263 Register ResultReg = createResultReg(RC);
4264 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4265 TII.get(TargetOpcode::COPY), ResultReg)
4269 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273 if (
Shift >= DstBits)
4301 if (
Shift >= SrcBits && IsZExt)
4304 unsigned ImmR = std::min<unsigned>(SrcBits - 1,
Shift);
4305 unsigned ImmS = SrcBits - 1;
4306 static const unsigned OpcTable[2][2] = {
4307 {AArch64::SBFMWri, AArch64::SBFMXri},
4308 {AArch64::UBFMWri, AArch64::UBFMXri}
4310 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4313 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4314 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4317 .
addImm(AArch64::sub_32);
4320 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4323 unsigned AArch64FastISel::emitIntExt(
MVT SrcVT,
unsigned SrcReg,
MVT DestVT,
4344 return emiti1Ext(SrcReg, DestVT, IsZExt);
4347 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4349 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4354 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4356 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4361 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4371 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4372 TII.get(AArch64::SUBREG_TO_REG), Src64)
4375 .
addImm(AArch64::sub_32);
4380 (DestVT ==
MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4381 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4388 case AArch64::LDURBBi:
4389 case AArch64::LDURHHi:
4390 case AArch64::LDURWi:
4391 case AArch64::LDRBBui:
4392 case AArch64::LDRHHui:
4393 case AArch64::LDRWui:
4394 case AArch64::LDRBBroX:
4395 case AArch64::LDRHHroX:
4396 case AArch64::LDRWroX:
4397 case AArch64::LDRBBroW:
4398 case AArch64::LDRHHroW:
4399 case AArch64::LDRWroW:
4408 case AArch64::LDURSBWi:
4409 case AArch64::LDURSHWi:
4410 case AArch64::LDURSBXi:
4411 case AArch64::LDURSHXi:
4412 case AArch64::LDURSWi:
4413 case AArch64::LDRSBWui:
4414 case AArch64::LDRSHWui:
4415 case AArch64::LDRSBXui:
4416 case AArch64::LDRSHXui:
4417 case AArch64::LDRSWui:
4418 case AArch64::LDRSBWroX:
4419 case AArch64::LDRSHWroX:
4420 case AArch64::LDRSBXroX:
4421 case AArch64::LDRSHXroX:
4422 case AArch64::LDRSWroX:
4423 case AArch64::LDRSBWroW:
4424 case AArch64::LDRSHWroW:
4425 case AArch64::LDRSBXroW:
4426 case AArch64::LDRSHXroW:
4427 case AArch64::LDRSWroW:
4432 bool AArch64FastISel::optimizeIntExtLoad(
const Instruction *
I,
MVT RetVT,
4434 const auto *LI = dyn_cast<LoadInst>(
I->getOperand(0));
4435 if (!LI || !LI->hasOneUse())
4449 bool IsZExt = isa<ZExtInst>(
I);
4450 const auto *LoadMI =
MI;
4451 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4452 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4453 Register LoadReg =
MI->getOperand(1).getReg();
4455 assert(LoadMI &&
"Expected valid instruction");
4462 updateValueMap(
I,
Reg);
4467 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4469 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4472 .
addImm(AArch64::sub_32);
4475 assert((
MI->getOpcode() == TargetOpcode::COPY &&
4476 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4477 "Expected copy instruction");
4478 Reg =
MI->getOperand(1).getReg();
4480 removeDeadCode(
I, std::next(
I));
4482 updateValueMap(
I,
Reg);
4486 bool AArch64FastISel::selectIntExt(
const Instruction *
I) {
4487 assert((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
4488 "Unexpected integer extend instruction.");
4491 if (!isTypeSupported(
I->getType(), RetVT))
4494 if (!isTypeSupported(
I->getOperand(0)->getType(), SrcVT))
4498 if (optimizeIntExtLoad(
I, RetVT, SrcVT))
4501 Register SrcReg = getRegForValue(
I->getOperand(0));
4506 bool IsZExt = isa<ZExtInst>(
I);
4507 if (
const auto *
Arg = dyn_cast<Argument>(
I->getOperand(0))) {
4508 if ((IsZExt &&
Arg->hasZExtAttr()) || (!IsZExt &&
Arg->hasSExtAttr())) {
4510 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4512 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4515 .
addImm(AArch64::sub_32);
4519 updateValueMap(
I, SrcReg);
4524 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4528 updateValueMap(
I, ResultReg);
4532 bool AArch64FastISel::selectRem(
const Instruction *
I,
unsigned ISDOpcode) {
4533 EVT DestEVT = TLI.getValueType(
DL,
I->getType(),
true);
4542 bool Is64bit = (DestVT ==
MVT::i64);
4543 switch (ISDOpcode) {
4547 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4550 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4553 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4554 Register Src0Reg = getRegForValue(
I->getOperand(0));
4558 Register Src1Reg = getRegForValue(
I->getOperand(1));
4563 (DestVT ==
MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4564 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4565 assert(QuotReg &&
"Unexpected DIV instruction emission failure.");
4568 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4569 updateValueMap(
I, ResultReg);
4573 bool AArch64FastISel::selectMul(
const Instruction *
I) {
4575 if (!isTypeSupported(
I->getType(), VT,
true))
4581 const Value *Src0 =
I->getOperand(0);
4582 const Value *Src1 =
I->getOperand(1);
4583 if (
const auto *
C = dyn_cast<ConstantInt>(Src0))
4584 if (
C->getValue().isPowerOf2())
4588 if (
const auto *
C = dyn_cast<ConstantInt>(Src1))
4589 if (
C->getValue().isPowerOf2()) {
4590 uint64_t ShiftVal =
C->getValue().logBase2();
4593 if (
const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4596 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4599 Src0 = ZExt->getOperand(0);
4602 }
else if (
const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4605 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4608 Src0 = SExt->getOperand(0);
4613 Register Src0Reg = getRegForValue(Src0);
4617 unsigned ResultReg =
4618 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4621 updateValueMap(
I, ResultReg);
4626 Register Src0Reg = getRegForValue(
I->getOperand(0));
4630 Register Src1Reg = getRegForValue(
I->getOperand(1));
4634 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4639 updateValueMap(
I, ResultReg);
4643 bool AArch64FastISel::selectShift(
const Instruction *
I) {
4645 if (!isTypeSupported(
I->getType(), RetVT,
true))
4649 return selectOperator(
I,
I->getOpcode());
4651 if (
const auto *
C = dyn_cast<ConstantInt>(
I->getOperand(1))) {
4652 unsigned ResultReg = 0;
4655 bool IsZExt =
I->getOpcode() != Instruction::AShr;
4656 const Value *Op0 =
I->getOperand(0);
4657 if (
const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4660 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4663 Op0 = ZExt->getOperand(0);
4666 }
else if (
const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4669 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4672 Op0 = SExt->getOperand(0);
4677 Register Op0Reg = getRegForValue(Op0);
4681 switch (
I->getOpcode()) {
4683 case Instruction::Shl:
4684 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4686 case Instruction::AShr:
4687 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4689 case Instruction::LShr:
4690 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4696 updateValueMap(
I, ResultReg);
4700 Register Op0Reg = getRegForValue(
I->getOperand(0));
4704 Register Op1Reg = getRegForValue(
I->getOperand(1));
4708 unsigned ResultReg = 0;
4709 switch (
I->getOpcode()) {
4711 case Instruction::Shl:
4712 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4714 case Instruction::AShr:
4715 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4717 case Instruction::LShr:
4718 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4725 updateValueMap(
I, ResultReg);
4729 bool AArch64FastISel::selectBitCast(
const Instruction *
I) {
4732 if (!isTypeLegal(
I->getOperand(0)->getType(), SrcVT))
4734 if (!isTypeLegal(
I->getType(), RetVT))
4739 Opc = AArch64::FMOVWSr;
4741 Opc = AArch64::FMOVXDr;
4743 Opc = AArch64::FMOVSWr;
4745 Opc = AArch64::FMOVDXr;
4752 case MVT::i32: RC = &AArch64::GPR32RegClass;
break;
4753 case MVT::i64: RC = &AArch64::GPR64RegClass;
break;
4754 case MVT::f32: RC = &AArch64::FPR32RegClass;
break;
4755 case MVT::f64: RC = &AArch64::FPR64RegClass;
break;
4757 Register Op0Reg = getRegForValue(
I->getOperand(0));
4761 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4765 updateValueMap(
I, ResultReg);
4769 bool AArch64FastISel::selectFRem(
const Instruction *
I) {
4771 if (!isTypeLegal(
I->getType(), RetVT))
4779 LC = RTLIB::REM_F32;
4782 LC = RTLIB::REM_F64;
4787 Args.reserve(
I->getNumOperands());
4790 for (
auto &
Arg :
I->operands()) {
4793 Entry.Ty =
Arg->getType();
4794 Args.push_back(Entry);
4797 CallLoweringInfo CLI;
4799 CLI.setCallee(
DL, Ctx, TLI.getLibcallCallingConv(LC),
I->getType(),
4801 if (!lowerCallTo(CLI))
4803 updateValueMap(
I, CLI.ResultReg);
4807 bool AArch64FastISel::selectSDiv(
const Instruction *
I) {
4809 if (!isTypeLegal(
I->getType(), VT))
4812 if (!isa<ConstantInt>(
I->getOperand(1)))
4815 const APInt &
C = cast<ConstantInt>(
I->getOperand(1))->getValue();
4817 !(
C.isPowerOf2() ||
C.isNegatedPowerOf2()))
4820 unsigned Lg2 =
C.countTrailingZeros();
4821 Register Src0Reg = getRegForValue(
I->getOperand(0));
4825 if (cast<BinaryOperator>(
I)->isExact()) {
4826 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4829 updateValueMap(
I, ResultReg);
4833 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4834 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4839 if (!emitICmp_ri(VT, Src0Reg, 0))
4845 SelectOpc = AArch64::CSELXr;
4846 RC = &AArch64::GPR64RegClass;
4848 SelectOpc = AArch64::CSELWr;
4849 RC = &AArch64::GPR32RegClass;
4851 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4858 unsigned ZeroReg = (VT ==
MVT::i64) ? AArch64::XZR : AArch64::WZR;
4861 ResultReg = emitAddSub_rs(
false, VT, ZeroReg, SelectReg,
4864 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4869 updateValueMap(
I, ResultReg);
4876 unsigned AArch64FastISel::getRegForGEPIndex(
const Value *Idx) {
4877 Register IdxN = getRegForValue(Idx);
4883 MVT PtrVT = TLI.getPointerTy(
DL);
4885 if (IdxVT.
bitsLT(PtrVT)) {
4886 IdxN = emitIntExt(IdxVT.
getSimpleVT(), IdxN, PtrVT,
false);
4887 }
else if (IdxVT.
bitsGT(PtrVT))
4888 llvm_unreachable(
"AArch64 FastISel doesn't support types larger than i64");
4896 bool AArch64FastISel::selectGetElementPtr(
const Instruction *
I) {
4900 Register N = getRegForValue(
I->getOperand(0));
4907 MVT VT = TLI.getPointerTy(
DL);
4910 const Value *Idx = GTI.getOperand();
4911 if (
auto *StTy = GTI.getStructTypeOrNull()) {
4912 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4915 TotalOffs +=
DL.getStructLayout(StTy)->getElementOffset(
Field);
4917 Type *Ty = GTI.getIndexedType();
4920 if (
const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4925 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4929 N = emitAdd_ri_(VT,
N, TotalOffs);
4936 uint64_t ElementSize =
DL.getTypeAllocSize(Ty);
4937 unsigned IdxN = getRegForGEPIndex(Idx);
4941 if (ElementSize != 1) {
4945 IdxN = emitMul_rr(VT, IdxN,
C);
4955 N = emitAdd_ri_(VT,
N, TotalOffs);
4959 updateValueMap(
I,
N);
4965 "cmpxchg survived AtomicExpand at optlevel > -O0");
4967 auto *RetPairTy = cast<StructType>(
I->getType());
4968 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4969 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4970 "cmpxchg has a non-i1 status result");
4973 if (!isTypeLegal(RetTy, VT))
4977 unsigned Opc, CmpOpc;
4981 Opc = AArch64::CMP_SWAP_32;
4982 CmpOpc = AArch64::SUBSWrs;
4983 ResRC = &AArch64::GPR32RegClass;
4985 Opc = AArch64::CMP_SWAP_64;
4986 CmpOpc = AArch64::SUBSXrs;
4987 ResRC = &AArch64::GPR64RegClass;
4995 II, getRegForValue(
I->getPointerOperand()), II.
getNumDefs());
4997 II, getRegForValue(
I->getCompareOperand()), II.
getNumDefs() + 1);
4999 II, getRegForValue(
I->getNewValOperand()), II.
getNumDefs() + 2);
5001 const Register ResultReg1 = createResultReg(ResRC);
5002 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5003 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5013 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CmpOpc))
5019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::CSINCWr))
5025 assert((ResultReg1 + 1) == ResultReg2 &&
"Nonconsecutive result registers.");
5026 updateValueMap(
I, ResultReg1, 2);
5030 bool AArch64FastISel::fastSelectInstruction(
const Instruction *
I) {
5031 switch (
I->getOpcode()) {
5035 case Instruction::Sub:
5036 return selectAddSub(
I);
5038 return selectMul(
I);
5039 case Instruction::SDiv:
5040 return selectSDiv(
I);
5041 case Instruction::SRem:
5045 case Instruction::URem:
5049 case Instruction::Shl:
5050 case Instruction::LShr:
5051 case Instruction::AShr:
5052 return selectShift(
I);
5053 case Instruction::And:
5054 case Instruction::Or:
5055 case Instruction::Xor:
5056 return selectLogicalOp(
I);
5057 case Instruction::Br:
5058 return selectBranch(
I);
5059 case Instruction::IndirectBr:
5060 return selectIndirectBr(
I);
5061 case Instruction::BitCast:
5063 return selectBitCast(
I);
5065 case Instruction::FPToSI:
5067 return selectFPToInt(
I,
true);
5069 case Instruction::FPToUI:
5070 return selectFPToInt(
I,
false);
5071 case Instruction::ZExt:
5072 case Instruction::SExt:
5073 return selectIntExt(
I);
5074 case Instruction::Trunc:
5076 return selectTrunc(
I);
5078 case Instruction::FPExt:
5079 return selectFPExt(
I);
5080 case Instruction::FPTrunc:
5081 return selectFPTrunc(
I);
5082 case Instruction::SIToFP:
5084 return selectIntToFP(
I,
true);
5086 case Instruction::UIToFP:
5087 return selectIntToFP(
I,
false);
5089 return selectLoad(
I);
5091 return selectStore(
I);
5092 case Instruction::FCmp:
5093 case Instruction::ICmp:
5094 return selectCmp(
I);
5096 return selectSelect(
I);
5098 return selectRet(
I);
5099 case Instruction::FRem:
5100 return selectFRem(
I);
5101 case Instruction::GetElementPtr:
5102 return selectGetElementPtr(
I);
5103 case Instruction::AtomicCmpXchg:
5104 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(
I));
5108 return selectOperator(
I,
I->getOpcode());
5113 return new AArch64FastISel(FuncInfo, LibInfo);