37 #include "llvm/IR/IntrinsicsX86.h"
47 class X86FastISel final :
public FastISel {
59 bool fastSelectInstruction(
const Instruction *
I)
override;
68 bool fastLowerArguments()
override;
69 bool fastLowerCall(CallLoweringInfo &CLI)
override;
70 bool fastLowerIntrinsicCall(
const IntrinsicInst *II)
override;
72 #include "X86GenFastISel.inc"
79 unsigned &ResultReg,
unsigned Alignment = 1);
120 bool X86SelectFPExtOrFPTrunc(
const Instruction *
I,
unsigned Opc,
127 bool X86SelectIntToFP(
const Instruction *
I,
bool IsSigned);
130 return Subtarget->getInstrInfo();
141 unsigned fastMaterializeConstant(
const Constant *
C)
override;
143 unsigned fastMaterializeAlloca(
const AllocaInst *
C)
override;
145 unsigned fastMaterializeFloatZero(
const ConstantFP *CF)
override;
149 bool isScalarFPTypeInSSEReg(
EVT VT)
const {
150 return (VT ==
MVT::f64 && Subtarget->hasSSE2()) ||
151 (VT ==
MVT::f32 && Subtarget->hasSSE1()) ||
152 (VT ==
MVT::f16 && Subtarget->hasFP16());
155 bool isTypeLegal(
Type *Ty,
MVT &VT,
bool AllowI1 =
false);
168 unsigned fastEmitInst_rrrr(
unsigned MachineInstOpcode,
170 unsigned Op1,
unsigned Op2,
unsigned Op3);
175 static std::pair<unsigned, bool>
178 bool NeedSwap =
false;
207 return std::make_pair(CC, NeedSwap);
228 if (!isa<ExtractValueInst>(
Cond))
231 const auto *EV = cast<ExtractValueInst>(
Cond);
232 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
235 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
239 cast<StructType>(
Callee->getReturnType())->getTypeAtIndex(0U);
240 if (!isTypeLegal(RetTy, RetVT))
247 switch (II->getIntrinsicID()) {
248 default:
return false;
249 case Intrinsic::sadd_with_overflow:
250 case Intrinsic::ssub_with_overflow:
251 case Intrinsic::smul_with_overflow:
252 case Intrinsic::umul_with_overflow: TmpCC =
X86::COND_O;
break;
253 case Intrinsic::uadd_with_overflow:
254 case Intrinsic::usub_with_overflow: TmpCC =
X86::COND_B;
break;
258 if (II->getParent() !=
I->getParent())
264 for (
auto Itr = std::prev(Start); Itr != End; --Itr) {
267 if (!isa<ExtractValueInst>(Itr))
271 const auto *EVI = cast<ExtractValueInst>(Itr);
272 if (EVI->getAggregateOperand() != II)
293 bool X86FastISel::isTypeLegal(
Type *Ty,
MVT &VT,
bool AllowI1) {
294 EVT evt = TLI.getValueType(
DL, Ty,
true);
302 if (VT ==
MVT::f64 && !Subtarget->hasSSE2())
304 if (VT ==
MVT::f32 && !Subtarget->hasSSE1())
313 return (AllowI1 && VT ==
MVT::i1) || TLI.isTypeLegal(VT);
321 unsigned Alignment) {
322 bool HasSSE1 = Subtarget->hasSSE1();
323 bool HasSSE2 = Subtarget->hasSSE2();
324 bool HasSSE41 = Subtarget->hasSSE41();
325 bool HasAVX = Subtarget->hasAVX();
326 bool HasAVX2 = Subtarget->hasAVX2();
327 bool HasAVX512 = Subtarget->hasAVX512();
328 bool HasVLX = Subtarget->hasVLX();
338 default:
return false;
353 Opc = HasAVX512 ? X86::VMOVSSZrm_alt
354 : HasAVX ? X86::VMOVSSrm_alt
355 : HasSSE1 ? X86::MOVSSrm_alt
359 Opc = HasAVX512 ? X86::VMOVSDZrm_alt
360 : HasAVX ? X86::VMOVSDrm_alt
361 : HasSSE2 ? X86::MOVSDrm_alt
368 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
369 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
370 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
371 else if (Alignment >= 16)
372 Opc = HasVLX ? X86::VMOVAPSZ128rm :
373 HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
375 Opc = HasVLX ? X86::VMOVUPSZ128rm :
376 HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
379 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
380 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
381 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
382 else if (Alignment >= 16)
383 Opc = HasVLX ? X86::VMOVAPDZ128rm :
384 HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
386 Opc = HasVLX ? X86::VMOVUPDZ128rm :
387 HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
393 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
394 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
395 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
396 else if (Alignment >= 16)
397 Opc = HasVLX ? X86::VMOVDQA64Z128rm :
398 HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
400 Opc = HasVLX ? X86::VMOVDQU64Z128rm :
401 HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
405 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
406 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
407 else if (IsNonTemporal && Alignment >= 16)
409 else if (Alignment >= 32)
410 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
412 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
416 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
417 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
418 else if (IsNonTemporal && Alignment >= 16)
420 else if (Alignment >= 32)
421 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
423 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
430 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
431 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
432 else if (IsNonTemporal && Alignment >= 16)
434 else if (Alignment >= 32)
435 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
437 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
441 if (IsNonTemporal && Alignment >= 64)
442 Opc = X86::VMOVNTDQAZrm;
444 Opc = (
Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
448 if (IsNonTemporal && Alignment >= 64)
449 Opc = X86::VMOVNTDQAZrm;
451 Opc = (
Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
460 if (IsNonTemporal && Alignment >= 64)
461 Opc = X86::VMOVNTDQAZrm;
463 Opc = (
Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
469 ResultReg = createResultReg(RC);
471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
484 bool HasSSE1 = Subtarget->hasSSE1();
485 bool HasSSE2 = Subtarget->hasSSE2();
486 bool HasSSE4A = Subtarget->hasSSE4A();
487 bool HasAVX = Subtarget->hasAVX();
488 bool HasAVX512 = Subtarget->hasAVX512();
489 bool HasVLX = Subtarget->hasVLX();
496 default:
return false;
499 Register AndResult = createResultReg(&X86::GR8RegClass);
500 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
501 TII.get(X86::AND8ri), AndResult)
506 case MVT::i8: Opc = X86::MOV8mr;
break;
507 case MVT::i16: Opc = X86::MOV16mr;
break;
509 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
513 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
517 if (IsNonTemporal && HasSSE4A)
520 Opc = HasAVX512 ? X86::VMOVSSZmr :
521 HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
527 if (IsNonTemporal && HasSSE4A)
530 Opc = HasAVX512 ? X86::VMOVSDZmr :
531 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
536 Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
541 Opc = HasVLX ? X86::VMOVNTPSZ128mr :
542 HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
544 Opc = HasVLX ? X86::VMOVAPSZ128mr :
545 HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
547 Opc = HasVLX ? X86::VMOVUPSZ128mr :
548 HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
553 Opc = HasVLX ? X86::VMOVNTPDZ128mr :
554 HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
556 Opc = HasVLX ? X86::VMOVAPDZ128mr :
557 HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
559 Opc = HasVLX ? X86::VMOVUPDZ128mr :
560 HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
568 Opc = HasVLX ? X86::VMOVNTDQZ128mr :
569 HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
571 Opc = HasVLX ? X86::VMOVDQA64Z128mr :
572 HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
574 Opc = HasVLX ? X86::VMOVDQU64Z128mr :
575 HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
581 Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
583 Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
585 Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
591 Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
593 Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
595 Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
604 Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
606 Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
608 Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
613 Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
615 Opc = X86::VMOVUPSZmr;
620 Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
622 Opc = X86::VMOVUPDZmr;
632 Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
634 Opc = X86::VMOVDQU64Zmr;
647 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
655 bool X86FastISel::X86FastEmitStore(
EVT VT,
const Value *Val,
659 if (isa<ConstantPointerNull>(Val))
663 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
671 case MVT::i8: Opc = X86::MOV8mi;
break;
672 case MVT::i16: Opc = X86::MOV16mi;
break;
673 case MVT::i32: Opc = X86::MOV32mi;
break;
677 Opc = X86::MOV64mi32;
683 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc));
685 : CI->getZExtValue());
692 Register ValReg = getRegForValue(Val);
696 return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
703 unsigned Src,
EVT SrcVT,
704 unsigned &ResultReg) {
715 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
721 if (GV->isThreadLocal())
725 if (GV->isAbsoluteSymbolRef())
731 if (!Subtarget->isPICStyleRIPRel() ||
737 unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
742 AM.
Base.
Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
748 if (Subtarget->isPICStyleRIPRel()) {
762 if (
I != LocalValueMap.end() &&
I->second) {
774 SavePoint SaveInsertPt = enterLocalValueArea();
778 RC = &X86::GR64RegClass;
781 RC = &X86::GR32RegClass;
788 LoadReg = createResultReg(RC);
790 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), LoadReg);
794 leaveLocalValueArea(SaveInsertPt);
797 LocalValueMap[V] = LoadReg;
809 if (!AM.
GV || !Subtarget->isPICStyleRIPRel()) {
811 AM.
Base.
Reg = getRegForValue(V);
829 const User *U =
nullptr;
830 unsigned Opcode = Instruction::UserOp1;
835 if (FuncInfo.StaticAllocaMap.count(
static_cast<const AllocaInst *
>(V)) ||
836 FuncInfo.MBBMap[
I->getParent()] == FuncInfo.MBB) {
837 Opcode =
I->getOpcode();
840 }
else if (
const ConstantExpr *
C = dyn_cast<ConstantExpr>(V)) {
841 Opcode =
C->getOpcode();
846 if (Ty->getAddressSpace() > 255)
853 case Instruction::BitCast:
857 case Instruction::IntToPtr:
860 TLI.getPointerTy(
DL))
864 case Instruction::PtrToInt:
866 if (TLI.getValueType(
DL, U->
getType()) == TLI.getPointerTy(
DL))
870 case Instruction::Alloca: {
874 FuncInfo.StaticAllocaMap.find(A);
875 if (
SI != FuncInfo.StaticAllocaMap.end()) {
896 case Instruction::GetElementPtr: {
902 unsigned Scale = AM.
Scale;
907 i !=
e; ++
i, ++GTI) {
909 if (
StructType *STy = GTI.getStructTypeOrNull()) {
917 uint64_t S =
DL.getTypeAllocSize(GTI.getIndexedType());
921 Disp += CI->getSExtValue() *
S;
924 if (canFoldAddIntoGEP(U,
Op)) {
927 cast<ConstantInt>(cast<AddOperator>(
Op)->getOperand(1));
930 Op = cast<AddOperator>(
Op)->getOperand(0);
934 (!AM.
GV || !Subtarget->isPICStyleRIPRel()) &&
935 (
S == 1 ||
S == 2 ||
S == 4 ||
S == 8)) {
938 IndexReg = getRegForGEPIndex(
Op);
944 goto unsupported_gep;
958 dyn_cast<GetElementPtrInst>(U->
getOperand(0))) {
972 if (handleConstantAddresses(
I, AM))
982 return handleConstantAddresses(V, AM);
988 const User *U =
nullptr;
989 unsigned Opcode = Instruction::UserOp1;
1016 Opcode =
I->getOpcode();
1018 InMBB =
I->getParent() == FuncInfo.MBB->getBasicBlock();
1019 }
else if (
const ConstantExpr *
C = dyn_cast<ConstantExpr>(V)) {
1020 Opcode =
C->getOpcode();
1026 case Instruction::BitCast:
1029 return X86SelectCallAddress(U->
getOperand(0), AM);
1032 case Instruction::IntToPtr:
1036 TLI.getPointerTy(
DL))
1037 return X86SelectCallAddress(U->
getOperand(0), AM);
1040 case Instruction::PtrToInt:
1042 if (InMBB && TLI.getValueType(
DL, U->
getType()) == TLI.getPointerTy(
DL))
1043 return X86SelectCallAddress(U->
getOperand(0), AM);
1048 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1054 if (Subtarget->isPICStyleRIPRel() &&
1060 if (GVar->isThreadLocal())
1069 if (Subtarget->isPICStyleRIPRel()) {
1075 AM.
GVOpFlags = Subtarget->classifyLocalReference(
nullptr);
1082 if (!AM.
GV || !Subtarget->isPICStyleRIPRel()) {
1083 auto GetCallRegForValue = [
this](
const Value *V) {
1087 if (
Reg && Subtarget->isTarget64BitILP32()) {
1088 Register CopyReg = createResultReg(&X86::GR32RegClass);
1089 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV32rr),
1093 Register ExtReg = createResultReg(&X86::GR64RegClass);
1094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1095 TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1106 AM.
Base.
Reg = GetCallRegForValue(V);
1111 AM.
IndexReg = GetCallRegForValue(V);
1121 bool X86FastISel::X86SelectStore(
const Instruction *
I) {
1128 const Value *PtrV =
I->getOperand(1);
1129 if (TLI.supportSwiftError()) {
1132 if (
const Argument *
Arg = dyn_cast<Argument>(PtrV)) {
1133 if (
Arg->hasSwiftErrorAttr())
1137 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1138 if (Alloca->isSwiftError())
1143 const Value *Val =
S->getValueOperand();
1144 const Value *Ptr =
S->getPointerOperand();
1147 if (!isTypeLegal(Val->
getType(), VT,
true))
1158 return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(
I), Aligned);
1164 const Function &
F = *
I->getParent()->getParent();
1168 if (!FuncInfo.CanLowerReturn)
1171 if (TLI.supportSwiftError() &&
1172 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1175 if (TLI.supportSplitCSR(FuncInfo.MF))
1207 if (
Ret->getNumOperands() > 0) {
1213 CCState CCInfo(CC,
F.isVarArg(), *FuncInfo.MF, ValLocs,
I->getContext());
1216 const Value *RV =
Ret->getOperand(0);
1222 if (ValLocs.size() != 1)
1243 if (SrcVT != DstVT) {
1247 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1253 if (Outs[0].Flags.isSExt())
1256 SrcReg = fastEmitZExtFromI1(
MVT::i8, SrcReg);
1271 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1272 TII.get(TargetOpcode::COPY), DstReg).
addReg(SrcReg);
1289 "SRetReturnReg should have been set in LowerFormalArguments()!");
1290 unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX :
X86::EAX;
1291 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1293 RetRegs.push_back(RetReg);
1299 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1300 TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
1303 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1304 TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
1306 for (
unsigned i = 0,
e = RetRegs.size();
i !=
e; ++
i)
1313 bool X86FastISel::X86SelectLoad(
const Instruction *
I) {
1320 const Value *SV =
I->getOperand(0);
1321 if (TLI.supportSwiftError()) {
1324 if (
const Argument *
Arg = dyn_cast<Argument>(SV)) {
1325 if (
Arg->hasSwiftErrorAttr())
1329 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1330 if (Alloca->isSwiftError())
1336 if (!isTypeLegal(LI->
getType(), VT,
true))
1345 unsigned ResultReg = 0;
1346 if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1350 updateValueMap(
I, ResultReg);
1355 bool HasAVX512 = Subtarget->
hasAVX512();
1356 bool HasAVX = Subtarget->
hasAVX();
1357 bool HasSSE1 = Subtarget->
hasSSE1();
1358 bool HasSSE2 = Subtarget->
hasSSE2();
1362 case MVT::i8:
return X86::CMP8rr;
1363 case MVT::i16:
return X86::CMP16rr;
1364 case MVT::i32:
return X86::CMP32rr;
1365 case MVT::i64:
return X86::CMP64rr;
1367 return HasAVX512 ? X86::VUCOMISSZrr
1368 : HasAVX ? X86::VUCOMISSrr
1369 : HasSSE1 ? X86::UCOMISSrr
1372 return HasAVX512 ? X86::VUCOMISDZrr
1373 : HasAVX ? X86::VUCOMISDrr
1374 : HasSSE2 ? X86::UCOMISDrr
1391 return X86::CMP16ri8;
1392 return X86::CMP16ri;
1395 return X86::CMP32ri8;
1396 return X86::CMP32ri;
1399 return X86::CMP64ri8;
1403 return X86::CMP64ri32;
1408 bool X86FastISel::X86FastEmitCompare(
const Value *Op0,
const Value *Op1,
EVT VT,
1410 Register Op0Reg = getRegForValue(Op0);
1411 if (Op0Reg == 0)
return false;
1414 if (isa<ConstantPointerNull>(Op1))
1420 if (
const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc,
TII.get(CompareImmOpc))
1424 .
addImm(Op1C->getSExtValue());
1430 if (CompareOpc == 0)
return false;
1432 Register Op1Reg = getRegForValue(Op1);
1433 if (Op1Reg == 0)
return false;
1434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc,
TII.get(CompareOpc))
1442 const CmpInst *CI = cast<CmpInst>(
I);
1445 if (!isTypeLegal(
I->getOperand(0)->getType(), VT))
1454 unsigned ResultReg = 0;
1458 ResultReg = createResultReg(&X86::GR32RegClass);
1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV32r0),
1461 ResultReg = fastEmitInst_extractsubreg(
MVT::i8, ResultReg, X86::sub_8bit);
1467 ResultReg = createResultReg(&X86::GR8RegClass);
1468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV8ri),
1475 updateValueMap(
I, ResultReg);
1486 const auto *RHSC = dyn_cast<ConstantFP>(
RHS);
1487 if (RHSC && RHSC->isNullValue())
1492 static const uint16_t SETFOpcTable[2][3] = {
1503 ResultReg = createResultReg(&X86::GR8RegClass);
1505 if (!X86FastEmitCompare(
LHS,
RHS, VT,
I->getDebugLoc()))
1508 Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1509 Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SETCCr),
1511 FlagReg1).
addImm(SETFOpc[0]);
1512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SETCCr),
1513 FlagReg2).
addImm(SETFOpc[1]);
1514 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(SETFOpc[2]),
1516 updateValueMap(
I, ResultReg);
1529 if (!X86FastEmitCompare(
LHS,
RHS, VT,
I->getDebugLoc()))
1532 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SETCCr),
1534 updateValueMap(
I, ResultReg);
1538 bool X86FastISel::X86SelectZExt(
const Instruction *
I) {
1539 EVT DstVT = TLI.getValueType(
DL,
I->getType());
1540 if (!TLI.isTypeLegal(DstVT))
1543 Register ResultReg = getRegForValue(
I->getOperand(0));
1548 MVT SrcVT = TLI.getSimpleValueType(
DL,
I->getOperand(0)->getType());
1551 ResultReg = fastEmitZExtFromI1(
MVT::i8, ResultReg);
1563 case MVT::i8: MovInst = X86::MOVZX32rr8;
break;
1564 case MVT::i16: MovInst = X86::MOVZX32rr16;
break;
1565 case MVT::i32: MovInst = X86::MOV32rr;
break;
1569 Register Result32 = createResultReg(&X86::GR32RegClass);
1570 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(MovInst), Result32)
1573 ResultReg = createResultReg(&X86::GR64RegClass);
1574 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::SUBREG_TO_REG),
1580 Register Result32 = createResultReg(&X86::GR32RegClass);
1581 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOVZX32rr8),
1582 Result32).
addReg(ResultReg);
1584 ResultReg = fastEmitInst_extractsubreg(
MVT::i16, Result32, X86::sub_16bit);
1585 }
else if (DstVT !=
MVT::i8) {
1592 updateValueMap(
I, ResultReg);
1596 bool X86FastISel::X86SelectSExt(
const Instruction *
I) {
1597 EVT DstVT = TLI.getValueType(
DL,
I->getType());
1598 if (!TLI.isTypeLegal(DstVT))
1601 Register ResultReg = getRegForValue(
I->getOperand(0));
1606 MVT SrcVT = TLI.getSimpleValueType(
DL,
I->getOperand(0)->getType());
1614 ResultReg = createResultReg(&X86::GR8RegClass);
1615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::NEG8r),
1616 ResultReg).
addReg(ZExtReg);
1624 Register Result32 = createResultReg(&X86::GR32RegClass);
1625 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOVSX32rr8),
1626 Result32).
addReg(ResultReg);
1628 ResultReg = fastEmitInst_extractsubreg(
MVT::i16, Result32, X86::sub_16bit);
1629 }
else if (DstVT !=
MVT::i8) {
1636 updateValueMap(
I, ResultReg);
1640 bool X86FastISel::X86SelectBranch(
const Instruction *
I) {
1671 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1672 if (CmpRHSC && CmpRHSC->isNullValue())
1677 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1687 bool NeedExtraBranch =
false;
1694 NeedExtraBranch =
true;
1707 if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->
getDebugLoc()))
1710 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::JCC_1))
1715 if (NeedExtraBranch) {
1716 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::JCC_1))
1720 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1727 if (TI->hasOneUse() && TI->getParent() ==
I->getParent() &&
1728 isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1729 unsigned TestOpc = 0;
1732 case MVT::i8: TestOpc = X86::TEST8ri;
break;
1733 case MVT::i16: TestOpc = X86::TEST16ri;
break;
1734 case MVT::i32: TestOpc = X86::TEST32ri;
break;
1735 case MVT::i64: TestOpc = X86::TEST64ri32;
break;
1738 Register OpReg = getRegForValue(TI->getOperand(0));
1739 if (OpReg == 0)
return false;
1741 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TestOpc))
1745 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1750 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::JCC_1))
1753 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1757 }
else if (foldX86XALUIntrinsic(CC, BI, BI->
getCondition())) {
1764 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::JCC_1))
1766 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1774 if (OpReg == 0)
return false;
1778 unsigned KOpReg = OpReg;
1779 OpReg = createResultReg(&X86::GR32RegClass);
1780 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1781 TII.get(TargetOpcode::COPY), OpReg)
1783 OpReg = fastEmitInst_extractsubreg(
MVT::i8, OpReg, X86::sub_8bit);
1785 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::TEST8ri))
1788 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::JCC_1))
1790 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1794 bool X86FastISel::X86SelectShift(
const Instruction *
I) {
1795 unsigned CReg = 0, OpReg = 0;
1797 if (
I->getType()->isIntegerTy(8)) {
1799 RC = &X86::GR8RegClass;
1800 switch (
I->getOpcode()) {
1801 case Instruction::LShr: OpReg = X86::SHR8rCL;
break;
1802 case Instruction::AShr: OpReg = X86::SAR8rCL;
break;
1803 case Instruction::Shl: OpReg = X86::SHL8rCL;
break;
1804 default:
return false;
1806 }
else if (
I->getType()->isIntegerTy(16)) {
1808 RC = &X86::GR16RegClass;
1809 switch (
I->getOpcode()) {
1811 case Instruction::LShr: OpReg = X86::SHR16rCL;
break;
1812 case Instruction::AShr: OpReg = X86::SAR16rCL;
break;
1813 case Instruction::Shl: OpReg = X86::SHL16rCL;
break;
1815 }
else if (
I->getType()->isIntegerTy(32)) {
1817 RC = &X86::GR32RegClass;
1818 switch (
I->getOpcode()) {
1820 case Instruction::LShr: OpReg = X86::SHR32rCL;
break;
1821 case Instruction::AShr: OpReg = X86::SAR32rCL;
break;
1822 case Instruction::Shl: OpReg = X86::SHL32rCL;
break;
1824 }
else if (
I->getType()->isIntegerTy(64)) {
1826 RC = &X86::GR64RegClass;
1827 switch (
I->getOpcode()) {
1829 case Instruction::LShr: OpReg = X86::SHR64rCL;
break;
1830 case Instruction::AShr: OpReg = X86::SAR64rCL;
break;
1831 case Instruction::Shl: OpReg = X86::SHL64rCL;
break;
1838 if (!isTypeLegal(
I->getType(), VT))
1841 Register Op0Reg = getRegForValue(
I->getOperand(0));
1842 if (Op0Reg == 0)
return false;
1844 Register Op1Reg = getRegForValue(
I->getOperand(1));
1845 if (Op1Reg == 0)
return false;
1846 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
1851 if (CReg != X86::CL)
1852 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1853 TII.get(TargetOpcode::KILL), X86::CL)
1856 Register ResultReg = createResultReg(RC);
1857 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(OpReg), ResultReg)
1859 updateValueMap(
I, ResultReg);
1863 bool X86FastISel::X86SelectDivRem(
const Instruction *
I) {
1864 const static unsigned NumTypes = 4;
1865 const static unsigned NumOps = 4;
1866 const static bool S =
true;
1867 const static bool U =
false;
1868 const static unsigned Copy = TargetOpcode::COPY;
1878 const static struct DivRemEntry {
1884 struct DivRemResult {
1886 unsigned OpSignExtend;
1890 unsigned DivRemResultReg;
1892 } ResultTable[NumOps];
1893 } OpTable[NumTypes] = {
1894 { &X86::GR8RegClass, X86::AX, 0, {
1895 { X86::IDIV8r, 0, X86::MOVSX16rr8,
X86::AL,
S },
1896 { X86::IDIV8r, 0, X86::MOVSX16rr8,
X86::AH,
S },
1897 { X86::DIV8r, 0, X86::MOVZX16rr8,
X86::AL, U },
1898 { X86::DIV8r, 0, X86::MOVZX16rr8,
X86::AH, U },
1901 { &X86::GR16RegClass, X86::AX, X86::DX, {
1902 { X86::IDIV16r, X86::CWD,
Copy, X86::AX,
S },
1903 { X86::IDIV16r, X86::CWD,
Copy, X86::DX,
S },
1904 { X86::DIV16r, X86::MOV32r0,
Copy, X86::AX, U },
1905 { X86::DIV16r, X86::MOV32r0,
Copy, X86::DX, U },
1915 { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1916 { X86::IDIV64r, X86::CQO,
Copy, X86::RAX,
S },
1917 { X86::IDIV64r, X86::CQO,
Copy, X86::RDX,
S },
1918 { X86::DIV64r, X86::MOV32r0,
Copy, X86::RAX, U },
1919 { X86::DIV64r, X86::MOV32r0,
Copy, X86::RDX, U },
1925 if (!isTypeLegal(
I->getType(), VT))
1930 default:
return false;
1931 case MVT::i8: TypeIndex = 0;
break;
1932 case MVT::i16: TypeIndex = 1;
break;
1933 case MVT::i32: TypeIndex = 2;
break;
1935 if (!Subtarget->is64Bit())
1940 switch (
I->getOpcode()) {
1942 case Instruction::SDiv:
OpIndex = 0;
break;
1943 case Instruction::SRem:
OpIndex = 1;
break;
1944 case Instruction::UDiv:
OpIndex = 2;
break;
1945 case Instruction::URem:
OpIndex = 3;
break;
1948 const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1949 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[
OpIndex];
1950 Register Op0Reg = getRegForValue(
I->getOperand(0));
1953 Register Op1Reg = getRegForValue(
I->getOperand(1));
1958 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1959 TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).
addReg(Op0Reg);
1961 if (OpEntry.OpSignExtend) {
1962 if (OpEntry.IsOpSigned)
1963 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1964 TII.get(OpEntry.OpSignExtend));
1966 Register Zero32 = createResultReg(&X86::GR32RegClass);
1967 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1968 TII.get(X86::MOV32r0), Zero32);
1974 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1975 TII.get(Copy), TypeEntry.HighInReg)
1976 .
addReg(Zero32, 0, X86::sub_16bit);
1978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1979 TII.get(Copy), TypeEntry.HighInReg)
1982 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1983 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1989 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1990 TII.get(OpEntry.OpDivRem)).
addReg(Op1Reg);
1999 unsigned ResultReg = 0;
2000 if ((
I->getOpcode() == Instruction::SRem ||
2001 I->getOpcode() == Instruction::URem) &&
2002 OpEntry.DivRemResultReg ==
X86::AH && Subtarget->is64Bit()) {
2003 Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2004 Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2006 TII.get(Copy), SourceSuperReg).
addReg(X86::AX);
2009 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SHR16ri),
2013 ResultReg = fastEmitInst_extractsubreg(
MVT::i8, ResultSuperReg,
2018 ResultReg = createResultReg(TypeEntry.RC);
2019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), ResultReg)
2020 .
addReg(OpEntry.DivRemResultReg);
2022 updateValueMap(
I, ResultReg);
2029 bool X86FastISel::X86FastEmitCMoveSelect(
MVT RetVT,
const Instruction *
I) {
2031 if (!Subtarget->canUseCMOV())
2035 if (RetVT < MVT::i16 || RetVT >
MVT::i64)
2040 bool NeedTest =
true;
2046 const auto *CI = dyn_cast<CmpInst>(
Cond);
2047 if (CI && (CI->
getParent() ==
I->getParent())) {
2051 static const uint16_t SETFOpcTable[2][3] = {
2059 SETFOpc = &SETFOpcTable[0][0];
2063 SETFOpc = &SETFOpcTable[1][0];
2079 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->
getDebugLoc()))
2083 Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2084 Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SETCCr),
2086 FlagReg1).
addImm(SETFOpc[0]);
2087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SETCCr),
2088 FlagReg2).
addImm(SETFOpc[1]);
2089 auto const &II =
TII.get(SETFOpc[2]);
2090 if (II.getNumDefs()) {
2091 Register TmpReg = createResultReg(&X86::GR8RegClass);
2092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2095 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2100 }
else if (foldX86XALUIntrinsic(CC,
I,
Cond)) {
2122 unsigned KCondReg = CondReg;
2123 CondReg = createResultReg(&X86::GR32RegClass);
2124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2125 TII.get(TargetOpcode::COPY), CondReg)
2127 CondReg = fastEmitInst_extractsubreg(
MVT::i8, CondReg, X86::sub_8bit);
2129 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::TEST8ri))
2139 if (!LHSReg || !RHSReg)
2144 Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2145 updateValueMap(
I, ResultReg);
2154 bool X86FastISel::X86FastEmitSSESelect(
MVT RetVT,
const Instruction *
I) {
2158 const auto *CI = dyn_cast<FCmpInst>(
I->getOperand(0));
2159 if (!CI || (CI->
getParent() !=
I->getParent()))
2163 !((Subtarget->hasSSE1() && RetVT ==
MVT::f32) ||
2164 (Subtarget->hasSSE2() && RetVT ==
MVT::f64)))
2175 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2176 if (CmpRHSC && CmpRHSC->isNullValue())
2183 if (CC > 7 && !Subtarget->hasAVX())
2194 Register CmpLHSReg = getRegForValue(CmpLHS);
2195 Register CmpRHSReg = getRegForValue(CmpRHS);
2196 if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2202 if (Subtarget->hasAVX512()) {
2207 unsigned CmpOpcode =
2208 (RetVT ==
MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2209 Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2214 Register ImplicitDefReg = createResultReg(VR128X);
2215 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2216 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2220 unsigned MovOpcode =
2221 (RetVT ==
MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2222 unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2223 ImplicitDefReg, LHSReg);
2225 ResultReg = createResultReg(RC);
2226 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2227 TII.get(TargetOpcode::COPY), ResultReg).
addReg(MovReg);
2229 }
else if (Subtarget->hasAVX()) {
2237 unsigned CmpOpcode =
2238 (RetVT ==
MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2239 unsigned BlendOpcode =
2240 (RetVT ==
MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2242 Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2244 Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2246 ResultReg = createResultReg(RC);
2247 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2248 TII.get(TargetOpcode::COPY), ResultReg).
addReg(VBlendReg);
2251 static const uint16_t OpcTable[2][4] = {
2252 { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2253 { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2258 default:
return false;
2259 case MVT::f32: Opc = &OpcTable[0][0];
break;
2260 case MVT::f64: Opc = &OpcTable[1][0];
break;
2264 Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2265 Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2266 Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2267 Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2268 ResultReg = createResultReg(RC);
2269 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2270 TII.get(TargetOpcode::COPY), ResultReg).
addReg(OrReg);
2272 updateValueMap(
I, ResultReg);
2276 bool X86FastISel::X86FastEmitPseudoSelect(
MVT RetVT,
const Instruction *
I) {
2281 default:
return false;
2282 case MVT::i8: Opc = X86::CMOV_GR8;
break;
2283 case MVT::i16: Opc = X86::CMOV_GR16;
break;
2284 case MVT::f16: Opc = X86::CMOV_FR16X;
break;
2285 case MVT::i32: Opc = X86::CMOV_GR32;
break;
2286 case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
2287 : X86::CMOV_FR32;
break;
2288 case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
2289 : X86::CMOV_FR64;
break;
2298 const auto *CI = dyn_cast<CmpInst>(
Cond);
2299 if (CI && (CI->
getParent() ==
I->getParent())) {
2312 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->
getDebugLoc()))
2321 unsigned KCondReg = CondReg;
2322 CondReg = createResultReg(&X86::GR32RegClass);
2323 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2324 TII.get(TargetOpcode::COPY), CondReg)
2326 CondReg = fastEmitInst_extractsubreg(
MVT::i8, CondReg, X86::sub_8bit);
2328 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::TEST8ri))
2338 if (!LHSReg || !RHSReg)
2344 fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2345 updateValueMap(
I, ResultReg);
2349 bool X86FastISel::X86SelectSelect(
const Instruction *
I) {
2351 if (!isTypeLegal(
I->getType(), RetVT))
2355 if (
const auto *CI = dyn_cast<CmpInst>(
I->getOperand(0))) {
2357 const Value *Opnd =
nullptr;
2365 Register OpReg = getRegForValue(Opnd);
2369 Register ResultReg = createResultReg(RC);
2370 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2371 TII.get(TargetOpcode::COPY), ResultReg)
2373 updateValueMap(
I, ResultReg);
2379 if (X86FastEmitCMoveSelect(RetVT,
I))
2383 if (X86FastEmitSSESelect(RetVT,
I))
2388 if (X86FastEmitPseudoSelect(RetVT,
I))
2395 bool X86FastISel::X86SelectIntToFP(
const Instruction *
I,
bool IsSigned) {
2400 bool HasAVX512 = Subtarget->hasAVX512();
2401 if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2405 MVT SrcVT = TLI.getSimpleValueType(
DL,
I->getOperand(0)->getType());
2410 Register OpReg = getRegForValue(
I->getOperand(0));
2416 static const uint16_t SCvtOpc[2][2][2] = {
2417 { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2418 { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2419 { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2420 { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2422 static const uint16_t UCvtOpc[2][2] = {
2423 { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2424 { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2428 if (
I->getType()->isDoubleTy()) {
2430 Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2431 }
else if (
I->getType()->isFloatTy()) {
2433 Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2439 Register ImplicitDefReg = createResultReg(RC);
2440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2441 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2442 Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2443 updateValueMap(
I, ResultReg);
2447 bool X86FastISel::X86SelectSIToFP(
const Instruction *
I) {
2448 return X86SelectIntToFP(
I,
true);
2451 bool X86FastISel::X86SelectUIToFP(
const Instruction *
I) {
2452 return X86SelectIntToFP(
I,
false);
2456 bool X86FastISel::X86SelectFPExtOrFPTrunc(
const Instruction *
I,
2459 assert((
I->getOpcode() == Instruction::FPExt ||
2460 I->getOpcode() == Instruction::FPTrunc) &&
2461 "Instruction must be an FPExt or FPTrunc!");
2462 bool HasAVX = Subtarget->hasAVX();
2464 Register OpReg = getRegForValue(
I->getOperand(0));
2468 unsigned ImplicitDefReg;
2470 ImplicitDefReg = createResultReg(RC);
2471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2472 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2476 Register ResultReg = createResultReg(RC);
2478 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpc),
2482 MIB.
addReg(ImplicitDefReg);
2485 updateValueMap(
I, ResultReg);
2489 bool X86FastISel::X86SelectFPExt(
const Instruction *
I) {
2490 if (Subtarget->hasSSE2() &&
I->getType()->isDoubleTy() &&
2491 I->getOperand(0)->getType()->isFloatTy()) {
2492 bool HasAVX512 = Subtarget->hasAVX512();
2495 HasAVX512 ? X86::VCVTSS2SDZrr
2496 : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2497 return X86SelectFPExtOrFPTrunc(
I, Opc, TLI.getRegClassFor(
MVT::f64));
2503 bool X86FastISel::X86SelectFPTrunc(
const Instruction *
I) {
2504 if (Subtarget->hasSSE2() &&
I->getType()->isFloatTy() &&
2505 I->getOperand(0)->getType()->isDoubleTy()) {
2506 bool HasAVX512 = Subtarget->hasAVX512();
2509 HasAVX512 ? X86::VCVTSD2SSZrr
2510 : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2511 return X86SelectFPExtOrFPTrunc(
I, Opc, TLI.getRegClassFor(
MVT::f32));
2517 bool X86FastISel::X86SelectTrunc(
const Instruction *
I) {
2518 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType());
2519 EVT DstVT = TLI.getValueType(
DL,
I->getType());
2524 if (!TLI.isTypeLegal(SrcVT))
2527 Register InputReg = getRegForValue(
I->getOperand(0));
2534 updateValueMap(
I, InputReg);
2544 updateValueMap(
I, ResultReg);
2548 bool X86FastISel::IsMemcpySmall(
uint64_t Len) {
2549 return Len <= (Subtarget->is64Bit() ? 32 : 16);
2556 if (!IsMemcpySmall(Len))
2559 bool i64Legal = Subtarget->is64Bit();
2564 if (Len >= 8 && i64Legal)
2574 bool RV = X86FastEmitLoad(VT, SrcAM,
nullptr,
Reg);
2575 RV &= X86FastEmitStore(VT,
Reg, DestAM);
2576 assert(RV &&
"Failed to emit load or store??");
2588 bool X86FastISel::fastLowerIntrinsicCall(
const IntrinsicInst *II) {
2591 default:
return false;
2592 case Intrinsic::convert_from_fp16:
2593 case Intrinsic::convert_to_fp16: {
2594 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2603 bool IsFloatToHalf = II->
getIntrinsicID() == Intrinsic::convert_to_fp16;
2604 if (IsFloatToHalf) {
2605 if (!
Op->getType()->isFloatTy())
2612 unsigned ResultReg = 0;
2614 if (IsFloatToHalf) {
2622 unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2624 InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2627 Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2628 : X86::VMOVPDI2DIrr;
2629 ResultReg = createResultReg(&X86::GR32RegClass);
2630 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
2634 unsigned RegIdx = X86::sub_16bit;
2635 ResultReg = fastEmitInst_extractsubreg(
MVT::i16, ResultReg, RegIdx);
2637 assert(
Op->getType()->isIntegerTy(16) &&
"Expected a 16-bit integer!");
2645 unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2647 InputReg = fastEmitInst_r(Opc, RC, InputReg);
2651 ResultReg = createResultReg(TLI.getRegClassFor(
MVT::f32));
2652 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2653 TII.get(TargetOpcode::COPY), ResultReg)
2657 updateValueMap(II, ResultReg);
2660 case Intrinsic::frameaddress: {
2668 if (!isTypeLegal(RetTy, VT))
2676 case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass;
break;
2677 case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass;
break;
2688 (FrameReg == X86::EBP && VT ==
MVT::i32)) &&
2689 "Invalid Frame Register!");
2694 Register SrcReg = createResultReg(RC);
2695 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2696 TII.get(TargetOpcode::COPY), SrcReg).
addReg(FrameReg);
2703 unsigned Depth = cast<ConstantInt>(II->
getOperand(0))->getZExtValue();
2705 Register DestReg = createResultReg(RC);
2707 TII.get(Opc), DestReg), SrcReg);
2711 updateValueMap(II, SrcReg);
2715 const MemCpyInst *MCI = cast<MemCpyInst>(II);
2720 if (isa<ConstantInt>(MCI->
getLength())) {
2724 if (IsMemcpySmall(Len)) {
2729 TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2734 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2741 return lowerCallTo(II,
"memcpy", II->
arg_size() - 1);
2743 case Intrinsic::memset: {
2744 const MemSetInst *MSI = cast<MemSetInst>(II);
2749 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2756 return lowerCallTo(II,
"memset", II->
arg_size() - 1);
2758 case Intrinsic::stackprotector: {
2760 EVT PtrTy = TLI.getPointerTy(
DL);
2770 if (!X86FastEmitStore(PtrTy, Op1, AM))
return false;
2773 case Intrinsic::dbg_declare: {
2781 "Expected inlined-at fields to agree");
2788 case Intrinsic::trap: {
2792 case Intrinsic::sqrt: {
2793 if (!Subtarget->hasSSE1())
2796 Type *RetTy = II->getCalledFunction()->getReturnType();
2799 if (!isTypeLegal(RetTy, VT))
2805 static const uint16_t SqrtOpc[3][2] = {
2806 { X86::SQRTSSr, X86::SQRTSDr },
2807 { X86::VSQRTSSr, X86::VSQRTSDr },
2808 { X86::VSQRTSSZr, X86::VSQRTSDZr },
2810 unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2811 Subtarget->hasAVX() ? 1 :
2815 default:
return false;
2816 case MVT::f32: Opc = SqrtOpc[AVXLevel][0];
break;
2817 case MVT::f64: Opc = SqrtOpc[AVXLevel][1];
break;
2820 const Value *SrcVal = II->getArgOperand(0);
2821 Register SrcReg = getRegForValue(SrcVal);
2827 unsigned ImplicitDefReg = 0;
2829 ImplicitDefReg = createResultReg(RC);
2830 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2831 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2834 Register ResultReg = createResultReg(RC);
2836 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc),
2840 MIB.
addReg(ImplicitDefReg);
2844 updateValueMap(II, ResultReg);
2847 case Intrinsic::sadd_with_overflow:
2848 case Intrinsic::uadd_with_overflow:
2849 case Intrinsic::ssub_with_overflow:
2850 case Intrinsic::usub_with_overflow:
2851 case Intrinsic::smul_with_overflow:
2852 case Intrinsic::umul_with_overflow: {
2856 auto *Ty = cast<StructType>(
Callee->getReturnType());
2857 Type *RetTy = Ty->getTypeAtIndex(0U);
2860 "Overflow value expected to be an i1");
2863 if (!isTypeLegal(RetTy, VT))
2869 const Value *
LHS = II->getArgOperand(0);
2870 const Value *
RHS = II->getArgOperand(1);
2873 if (isa<ConstantInt>(
LHS) && !isa<ConstantInt>(
RHS) && II->isCommutative())
2877 switch (II->getIntrinsicID()) {
2879 case Intrinsic::sadd_with_overflow:
2881 case Intrinsic::uadd_with_overflow:
2883 case Intrinsic::ssub_with_overflow:
2885 case Intrinsic::usub_with_overflow:
2887 case Intrinsic::smul_with_overflow:
2889 case Intrinsic::umul_with_overflow:
2897 unsigned ResultReg = 0;
2899 if (
const auto *CI = dyn_cast<ConstantInt>(
RHS)) {
2900 static const uint16_t Opc[2][4] = {
2901 { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2902 { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2908 ResultReg = createResultReg(TLI.getRegClassFor(VT));
2910 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2914 ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2919 RHSReg = getRegForValue(
RHS);
2922 ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2929 { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2937 TLI.getRegClassFor(VT), RHSReg);
2940 { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2944 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2947 ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2950 TLI.getRegClassFor(VT), LHSReg, RHSReg);
2957 Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2958 assert((ResultReg+1) == ResultReg2 &&
"Nonconsecutive result registers.");
2959 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::SETCCr),
2962 updateValueMap(II, ResultReg, 2);
2965 case Intrinsic::x86_sse_cvttss2si:
2966 case Intrinsic::x86_sse_cvttss2si64:
2967 case Intrinsic::x86_sse2_cvttsd2si:
2968 case Intrinsic::x86_sse2_cvttsd2si64: {
2970 switch (II->getIntrinsicID()) {
2972 case Intrinsic::x86_sse_cvttss2si:
2973 case Intrinsic::x86_sse_cvttss2si64:
2974 if (!Subtarget->hasSSE1())
2976 IsInputDouble =
false;
2978 case Intrinsic::x86_sse2_cvttsd2si:
2979 case Intrinsic::x86_sse2_cvttsd2si64:
2980 if (!Subtarget->hasSSE2())
2982 IsInputDouble =
true;
2986 Type *RetTy = II->getCalledFunction()->getReturnType();
2988 if (!isTypeLegal(RetTy, VT))
2991 static const uint16_t CvtOpc[3][2][2] = {
2992 { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
2993 { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
2994 { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
2995 { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
2996 { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
2997 { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
2999 unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3000 Subtarget->hasAVX() ? 1 :
3005 case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0];
break;
3006 case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1];
break;
3010 const Value *
Op = II->getArgOperand(0);
3011 while (
auto *
IE = dyn_cast<InsertElementInst>(
Op)) {
3013 if (!isa<ConstantInt>(Index))
3015 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3018 Op =
IE->getOperand(1);
3021 Op =
IE->getOperand(0);
3028 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
3032 updateValueMap(II, ResultReg);
3038 bool X86FastISel::fastLowerArguments() {
3039 if (!FuncInfo.CanLowerReturn)
3050 if (Subtarget->isCallingConvWin64(CC))
3053 if (!Subtarget->is64Bit())
3056 if (Subtarget->useSoftFloat())
3060 unsigned GPRCnt = 0;
3061 unsigned FPRCnt = 0;
3062 for (
auto const &
Arg :
F->args()) {
3063 if (
Arg.hasAttribute(Attribute::ByVal) ||
3064 Arg.hasAttribute(Attribute::InReg) ||
3065 Arg.hasAttribute(Attribute::StructRet) ||
3066 Arg.hasAttribute(Attribute::SwiftSelf) ||
3067 Arg.hasAttribute(Attribute::SwiftAsync) ||
3068 Arg.hasAttribute(Attribute::SwiftError) ||
3069 Arg.hasAttribute(Attribute::Nest))
3076 EVT ArgVT = TLI.getValueType(
DL, ArgTy);
3077 if (!ArgVT.
isSimple())
return false;
3079 default:
return false;
3086 if (!Subtarget->hasSSE1())
3099 static const MCPhysReg GPR32ArgRegs[] = {
3102 static const MCPhysReg GPR64ArgRegs[] = {
3103 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3106 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3107 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3111 unsigned FPRIdx = 0;
3112 for (
auto const &
Arg :
F->args()) {
3113 MVT VT = TLI.getSimpleValueType(
DL,
Arg.getType());
3121 case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++];
break;
3123 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3127 Register ResultReg = createResultReg(RC);
3128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3129 TII.get(TargetOpcode::COPY), ResultReg)
3131 updateValueMap(&
Arg, ResultReg);
3139 if (Subtarget->is64Bit())
3156 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3157 auto &OutVals = CLI.OutVals;
3158 auto &OutFlags = CLI.OutFlags;
3159 auto &OutRegs = CLI.OutRegs;
3160 auto &
Ins = CLI.Ins;
3161 auto &InRegs = CLI.InRegs;
3163 bool &IsTailCall = CLI.IsTailCall;
3164 bool IsVarArg = CLI.IsVarArg;
3167 const auto *CB = CLI.CB;
3169 bool Is64Bit = Subtarget->is64Bit();
3170 bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3174 if (CB && CB->doesNoCfCheck())
3178 if ((CB && isa<CallInst>(CB) && CB->hasFnAttr(
"no_caller_saved_registers")))
3182 if ((CB && CB->hasFnAttr(
"no_callee_saved_registers")))
3186 if (Subtarget->useIndirectThunkCalls())
3191 default:
return false;
3219 if (IsVarArg && IsWin64)
3223 if (CLI.CB && CLI.CB->hasInAllocaArgument())
3226 for (
auto Flag : CLI.OutFlags)
3227 if (
Flag.isSwiftError() ||
Flag.isPreallocated())
3236 for (
int i = 0,
e = OutVals.size();
i !=
e; ++
i) {
3237 Value *&Val = OutVals[
i];
3239 if (
auto *CI = dyn_cast<ConstantInt>(Val)) {
3240 if (CI->getBitWidth() < 32) {
3251 auto *TI = dyn_cast<TruncInst>(Val);
3253 if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3254 (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3255 Value *PrevVal = TI->getOperand(0);
3256 ResultReg = getRegForValue(PrevVal);
3261 if (!isTypeLegal(PrevVal->
getType(), VT))
3264 ResultReg = fastEmit_ri(VT, VT,
ISD::AND, ResultReg, 1);
3266 if (!isTypeLegal(Val->
getType(), VT) ||
3269 ResultReg = getRegForValue(Val);
3275 ArgRegs.push_back(ResultReg);
3276 OutVTs.push_back(VT);
3281 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3285 CCInfo.AllocateStack(32,
Align(8));
3287 CCInfo.AnalyzeCallOperands(OutVTs, OutFlags,
CC_X86);
3290 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3293 unsigned AdjStackDown =
TII.getCallFrameSetupOpcode();
3294 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AdjStackDown))
3299 for (
unsigned i = 0,
e = ArgLocs.size();
i !=
e; ++
i) {
3307 unsigned ArgReg = ArgRegs[VA.
getValNo()];
3314 "Unexpected extend");
3321 assert(Emitted &&
"Failed to emit a sext!"); (void)Emitted;
3327 "Unexpected extend");
3332 ArgReg = fastEmitZExtFromI1(
MVT::i8, ArgReg);
3341 assert(Emitted &&
"Failed to emit a zext!"); (void)Emitted;
3347 "Unexpected extend");
3357 assert(Emitted &&
"Failed to emit a aext!"); (void)Emitted;
3363 assert(ArgReg &&
"Failed to emit a bitcast!");
3384 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3391 if (isa<UndefValue>(ArgVal))
3397 AM.
Disp = LocMemOffset;
3406 if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.
getByValSize()))
3408 }
else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3412 if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3415 if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3423 if (Subtarget->isPICStyleGOT()) {
3424 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3425 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3429 if (Is64Bit && IsVarArg && !IsWin64) {
3440 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3441 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3443 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3444 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3445 &&
"SSE registers cannot be used when SSE is disabled");
3446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV8ri),
3453 if (!X86SelectCallAddress(Callee, CalleeAM))
3456 unsigned CalleeOp = 0;
3458 if (CalleeAM.
GV !=
nullptr) {
3460 }
else if (CalleeAM.
Base.
Reg != 0) {
3469 unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3470 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CallOpc))
3474 assert(GV &&
"Not a direct call");
3476 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3484 unsigned CallOpc = NeedLoad
3485 ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3486 : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3488 MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CallOpc));
3504 if (Subtarget->isPICStyleGOT())
3507 if (Is64Bit && IsVarArg && !IsWin64)
3511 for (
auto Reg : OutRegs)
3515 unsigned NumBytesForCalleeToPop =
3517 TM.Options.GuaranteedTailCallOpt)
3520 unsigned AdjStackUp =
TII.getCallFrameDestroyOpcode();
3521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AdjStackUp))
3526 CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3527 CLI.RetTy->getContext());
3531 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3532 for (
unsigned i = 0;
i != RVLocs.size(); ++
i) {
3535 unsigned CopyReg = ResultReg +
i;
3540 ((Is64Bit ||
Ins[
i].Flags.
isInReg()) && !Subtarget->hasSSE1())) {
3546 if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3547 isScalarFPTypeInSSEReg(VA.
getValVT())) {
3549 CopyReg = createResultReg(&X86::RFP80RegClass);
3553 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3554 TII.get(TargetOpcode::COPY), CopyReg).
addReg(SrcReg);
3562 unsigned Opc = ResVT ==
MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3568 Opc = ResVT ==
MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3570 TII.get(Opc), ResultReg +
i), FI);
3574 CLI.ResultReg = ResultReg;
3575 CLI.NumResultRegs = RVLocs.size();
3582 X86FastISel::fastSelectInstruction(
const Instruction *
I) {
3583 switch (
I->getOpcode()) {
3586 return X86SelectLoad(
I);
3588 return X86SelectStore(
I);
3590 return X86SelectRet(
I);
3591 case Instruction::ICmp:
3592 case Instruction::FCmp:
3593 return X86SelectCmp(
I);
3594 case Instruction::ZExt:
3595 return X86SelectZExt(
I);
3596 case Instruction::SExt:
3597 return X86SelectSExt(
I);
3598 case Instruction::Br:
3599 return X86SelectBranch(
I);
3600 case Instruction::LShr:
3601 case Instruction::AShr:
3602 case Instruction::Shl:
3603 return X86SelectShift(
I);
3604 case Instruction::SDiv:
3605 case Instruction::UDiv:
3606 case Instruction::SRem:
3607 case Instruction::URem:
3608 return X86SelectDivRem(
I);
3610 return X86SelectSelect(
I);
3611 case Instruction::Trunc:
3612 return X86SelectTrunc(
I);
3613 case Instruction::FPExt:
3614 return X86SelectFPExt(
I);
3615 case Instruction::FPTrunc:
3616 return X86SelectFPTrunc(
I);
3617 case Instruction::SIToFP:
3618 return X86SelectSIToFP(
I);
3619 case Instruction::UIToFP:
3620 return X86SelectUIToFP(
I);
3621 case Instruction::IntToPtr:
3622 case Instruction::PtrToInt: {
3623 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType());
3624 EVT DstVT = TLI.getValueType(
DL,
I->getType());
3626 return X86SelectZExt(
I);
3628 return X86SelectTrunc(
I);
3630 if (
Reg == 0)
return false;
3631 updateValueMap(
I,
Reg);
3634 case Instruction::BitCast: {
3636 if (!Subtarget->hasSSE2())
3640 if (!isTypeLegal(
I->getOperand(0)->getType(), SrcVT) ||
3641 !isTypeLegal(
I->getType(), DstVT))
3657 Register ResultReg = createResultReg(DstClass);
3658 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3661 updateValueMap(
I, ResultReg);
3669 unsigned X86FastISel::X86MaterializeInt(
const ConstantInt *CI,
MVT VT) {
3675 Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3680 return fastEmitInst_extractsubreg(
MVT::i8, SrcReg, X86::sub_8bit);
3682 return fastEmitInst_extractsubreg(
MVT::i16, SrcReg, X86::sub_16bit);
3686 Register ResultReg = createResultReg(&X86::GR64RegClass);
3687 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3688 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3701 case MVT::i8: Opc = X86::MOV8ri;
break;
3702 case MVT::i16: Opc = X86::MOV16ri;
break;
3703 case MVT::i32: Opc = X86::MOV32ri;
break;
3706 Opc = X86::MOV32ri64;
3708 Opc = X86::MOV64ri32;
3714 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3717 unsigned X86FastISel::X86MaterializeFP(
const ConstantFP *CFP,
MVT VT) {
3719 return fastMaterializeFloatZero(CFP);
3728 bool HasSSE1 = Subtarget->hasSSE1();
3729 bool HasSSE2 = Subtarget->hasSSE2();
3730 bool HasAVX = Subtarget->hasAVX();
3731 bool HasAVX512 = Subtarget->hasAVX512();
3735 Opc = HasAVX512 ? X86::VMOVSSZrm_alt
3736 : HasAVX ? X86::VMOVSSrm_alt
3737 : HasSSE1 ? X86::MOVSSrm_alt
3741 Opc = HasAVX512 ? X86::VMOVSDZrm_alt
3742 : HasAVX ? X86::VMOVSDrm_alt
3743 : HasSSE2 ? X86::MOVSDrm_alt
3755 unsigned PICBase = 0;
3756 unsigned char OpFlag = Subtarget->classifyLocalReference(
nullptr);
3758 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3760 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3765 unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3770 Register AddrReg = createResultReg(&X86::GR64RegClass);
3771 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV64ri),
3775 TII.get(Opc), ResultReg);
3776 addRegReg(MIB, AddrReg,
false, PICBase,
false);
3785 TII.get(Opc), ResultReg),
3786 CPI, PICBase, OpFlag);
3790 unsigned X86FastISel::X86MaterializeGV(
const GlobalValue *GV,
MVT VT) {
3804 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3809 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV64ri),
3815 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3818 TII.get(Opc), ResultReg), AM);
3825 unsigned X86FastISel::fastMaterializeConstant(
const Constant *
C) {
3826 EVT CEVT = TLI.getValueType(
DL,
C->getType(),
true);
3833 if (
const auto *CI = dyn_cast<ConstantInt>(
C))
3834 return X86MaterializeInt(CI, VT);
3835 if (
const auto *CFP = dyn_cast<ConstantFP>(
C))
3836 return X86MaterializeFP(CFP, VT);
3837 if (
const auto *GV = dyn_cast<GlobalValue>(
C))
3838 return X86MaterializeGV(GV, VT);
3839 if (isa<UndefValue>(
C)) {
3845 if (!Subtarget->hasSSE1())
3846 Opc = X86::LD_Fp032;
3849 if (!Subtarget->hasSSE2())
3850 Opc = X86::LD_Fp064;
3853 Opc = X86::LD_Fp080;
3858 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3859 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc),
3868 unsigned X86FastISel::fastMaterializeAlloca(
const AllocaInst *
C) {
3876 if (!FuncInfo.StaticAllocaMap.count(
C))
3878 assert(
C->isStaticAlloca() &&
"dynamic alloca in the static alloca map?");
3885 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3888 Register ResultReg = createResultReg(RC);
3890 TII.get(Opc), ResultReg), AM);
3894 unsigned X86FastISel::fastMaterializeFloatZero(
const ConstantFP *CF) {
3896 if (!isTypeLegal(CF->
getType(), VT))
3900 bool HasSSE1 = Subtarget->hasSSE1();
3901 bool HasSSE2 = Subtarget->hasSSE2();
3902 bool HasAVX512 = Subtarget->hasAVX512();
3907 Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
3908 : HasSSE1 ? X86::FsFLD0SS
3912 Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
3913 : HasSSE2 ? X86::FsFLD0SD
3921 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3922 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
3927 bool X86FastISel::tryToFoldLoadIntoMI(
MachineInstr *
MI,
unsigned OpNo,
3942 *FuncInfo.MF, *
MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->
getAlign(),
3952 unsigned OperandNo = 0;
3954 E =
Result->operands_end();
I !=
E; ++
I, ++OperandNo) {
3961 if (IndexReg == MO.
getReg())
3966 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3967 Result->cloneInstrSymbols(*FuncInfo.MF, *
MI);
3969 removeDeadCode(
I, std::next(
I));
3973 unsigned X86FastISel::fastEmitInst_rrrr(
unsigned MachineInstOpcode,
3975 unsigned Op0,
unsigned Op1,
3976 unsigned Op2,
unsigned Op3) {
3979 Register ResultReg = createResultReg(RC);
3986 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3992 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4007 return new X86FastISel(funcInfo, libInfo);