37#include "llvm/IR/IntrinsicsX86.h"
47class X86FastISel final :
public FastISel {
72#include "X86GenFastISel.inc"
75 bool X86FastEmitCompare(
const Value *LHS,
const Value *RHS,
EVT VT,
79 unsigned &ResultReg,
unsigned Alignment = 1);
120 bool X86SelectFPExtOrFPTrunc(
const Instruction *
I,
unsigned Opc,
127 bool X86SelectIntToFP(
const Instruction *
I,
bool IsSigned);
130 return Subtarget->getInstrInfo();
149 bool isScalarFPTypeInSSEReg(
EVT VT)
const {
150 return (VT ==
MVT::f64 && Subtarget->hasSSE2()) ||
154 bool isTypeLegal(
Type *Ty,
MVT &VT,
bool AllowI1 =
false);
167 unsigned fastEmitInst_rrrr(
unsigned MachineInstOpcode,
169 unsigned Op1,
unsigned Op2,
unsigned Op3);
174static std::pair<unsigned, bool>
177 bool NeedSwap =
false;
206 return std::make_pair(
CC, NeedSwap);
220 return ::addFullAddress(MIB, AM);
227 if (!isa<ExtractValueInst>(
Cond))
230 const auto *EV = cast<ExtractValueInst>(
Cond);
231 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
234 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
238 cast<StructType>(
Callee->getReturnType())->getTypeAtIndex(0U);
239 if (!isTypeLegal(
RetTy, RetVT))
246 switch (II->getIntrinsicID()) {
247 default:
return false;
248 case Intrinsic::sadd_with_overflow:
249 case Intrinsic::ssub_with_overflow:
250 case Intrinsic::smul_with_overflow:
251 case Intrinsic::umul_with_overflow: TmpCC =
X86::COND_O;
break;
252 case Intrinsic::uadd_with_overflow:
253 case Intrinsic::usub_with_overflow: TmpCC =
X86::COND_B;
break;
257 if (II->getParent() !=
I->getParent())
263 for (
auto Itr = std::prev(Start); Itr != End; --Itr) {
266 if (!isa<ExtractValueInst>(Itr))
270 const auto *EVI = cast<ExtractValueInst>(Itr);
271 if (EVI->getAggregateOperand() != II)
277 auto HasPhis = [](
const BasicBlock *Succ) {
return !Succ->phis().empty(); };
290bool X86FastISel::isTypeLegal(
Type *Ty,
MVT &VT,
bool AllowI1) {
291 EVT evt = TLI.getValueType(
DL, Ty,
true);
299 if (VT ==
MVT::f64 && !Subtarget->hasSSE2())
301 if (VT ==
MVT::f32 && !Subtarget->hasSSE1())
310 return (AllowI1 && VT ==
MVT::i1) || TLI.isTypeLegal(VT);
318 unsigned Alignment) {
319 bool HasSSE1 = Subtarget->hasSSE1();
320 bool HasSSE2 = Subtarget->hasSSE2();
321 bool HasSSE41 = Subtarget->hasSSE41();
322 bool HasAVX = Subtarget->hasAVX();
323 bool HasAVX2 = Subtarget->hasAVX2();
324 bool HasAVX512 = Subtarget->hasAVX512();
325 bool HasVLX = Subtarget->hasVLX();
335 default:
return false;
350 Opc = HasAVX512 ? X86::VMOVSSZrm_alt
351 : HasAVX ? X86::VMOVSSrm_alt
352 : HasSSE1 ? X86::MOVSSrm_alt
356 Opc = HasAVX512 ? X86::VMOVSDZrm_alt
357 : HasAVX ? X86::VMOVSDrm_alt
358 : HasSSE2 ? X86::MOVSDrm_alt
365 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
366 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
367 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
368 else if (Alignment >= 16)
369 Opc = HasVLX ? X86::VMOVAPSZ128rm :
370 HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
372 Opc = HasVLX ? X86::VMOVUPSZ128rm :
373 HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
376 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
377 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
378 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
379 else if (Alignment >= 16)
380 Opc = HasVLX ? X86::VMOVAPDZ128rm :
381 HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
383 Opc = HasVLX ? X86::VMOVUPDZ128rm :
384 HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
390 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
391 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
392 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
393 else if (Alignment >= 16)
394 Opc = HasVLX ? X86::VMOVDQA64Z128rm :
395 HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
397 Opc = HasVLX ? X86::VMOVDQU64Z128rm :
398 HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
402 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
403 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
404 else if (IsNonTemporal && Alignment >= 16)
406 else if (Alignment >= 32)
407 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
409 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
413 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415 else if (IsNonTemporal && Alignment >= 16)
417 else if (Alignment >= 32)
418 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
420 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
427 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
428 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
429 else if (IsNonTemporal && Alignment >= 16)
431 else if (Alignment >= 32)
432 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
434 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
438 if (IsNonTemporal && Alignment >= 64)
439 Opc = X86::VMOVNTDQAZrm;
441 Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
445 if (IsNonTemporal && Alignment >= 64)
446 Opc = X86::VMOVNTDQAZrm;
448 Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
457 if (IsNonTemporal && Alignment >= 64)
458 Opc = X86::VMOVNTDQAZrm;
460 Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
466 ResultReg = createResultReg(RC);
481 bool HasSSE1 = Subtarget->hasSSE1();
482 bool HasSSE2 = Subtarget->hasSSE2();
483 bool HasSSE4A = Subtarget->hasSSE4A();
484 bool HasAVX = Subtarget->hasAVX();
485 bool HasAVX512 = Subtarget->hasAVX512();
486 bool HasVLX = Subtarget->hasVLX();
493 default:
return false;
496 Register AndResult = createResultReg(&X86::GR8RegClass);
498 TII.get(X86::AND8ri), AndResult)
503 case MVT::i8: Opc = X86::MOV8mr;
break;
504 case MVT::i16: Opc = X86::MOV16mr;
break;
506 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
510 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
514 if (IsNonTemporal && HasSSE4A)
517 Opc = HasAVX512 ? X86::VMOVSSZmr :
518 HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
524 if (IsNonTemporal && HasSSE4A)
527 Opc = HasAVX512 ? X86::VMOVSDZmr :
528 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
533 Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
538 Opc = HasVLX ? X86::VMOVNTPSZ128mr :
539 HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
541 Opc = HasVLX ? X86::VMOVAPSZ128mr :
542 HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
544 Opc = HasVLX ? X86::VMOVUPSZ128mr :
545 HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
550 Opc = HasVLX ? X86::VMOVNTPDZ128mr :
551 HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
553 Opc = HasVLX ? X86::VMOVAPDZ128mr :
554 HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
556 Opc = HasVLX ? X86::VMOVUPDZ128mr :
557 HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
565 Opc = HasVLX ? X86::VMOVNTDQZ128mr :
566 HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
568 Opc = HasVLX ? X86::VMOVDQA64Z128mr :
569 HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
571 Opc = HasVLX ? X86::VMOVDQU64Z128mr :
572 HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
578 Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
580 Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
582 Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
588 Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
590 Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
592 Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
601 Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
603 Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
605 Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
610 Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
612 Opc = X86::VMOVUPSZmr;
617 Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
619 Opc = X86::VMOVUPDZmr;
629 Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
631 Opc = X86::VMOVDQU64Zmr;
652bool X86FastISel::X86FastEmitStore(
EVT VT,
const Value *Val,
656 if (isa<ConstantPointerNull>(Val))
660 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
668 case MVT::i8: Opc = X86::MOV8mi;
break;
669 case MVT::i16: Opc = X86::MOV16mi;
break;
670 case MVT::i32: Opc = X86::MOV32mi;
break;
673 if (isInt<32>(CI->getSExtValue()))
674 Opc = X86::MOV64mi32;
682 : CI->getZExtValue());
689 Register ValReg = getRegForValue(Val);
693 return X86FastEmitStore(VT, ValReg, AM, MMO,
Aligned);
700 unsigned Src,
EVT SrcVT,
701 unsigned &ResultReg) {
712 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
718 if (GV->isThreadLocal())
722 if (GV->isAbsoluteSymbolRef())
728 if (!Subtarget->isPICStyleRIPRel() ||
734 unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
745 if (Subtarget->isPICStyleRIPRel()) {
759 if (
I != LocalValueMap.end() &&
I->second) {
771 SavePoint SaveInsertPt = enterLocalValueArea();
775 RC = &X86::GR64RegClass;
778 RC = &X86::GR32RegClass;
785 LoadReg = createResultReg(RC);
791 leaveLocalValueArea(SaveInsertPt);
794 LocalValueMap[
V] = LoadReg;
806 if (!AM.
GV || !Subtarget->isPICStyleRIPRel()) {
808 AM.
Base.
Reg = getRegForValue(V);
826 const User *
U =
nullptr;
827 unsigned Opcode = Instruction::UserOp1;
834 Opcode =
I->getOpcode();
837 }
else if (
const ConstantExpr *
C = dyn_cast<ConstantExpr>(V)) {
838 Opcode =
C->getOpcode();
842 if (
PointerType *Ty = dyn_cast<PointerType>(
V->getType()))
843 if (Ty->getAddressSpace() > 255)
850 case Instruction::BitCast:
854 case Instruction::IntToPtr:
856 if (TLI.getValueType(
DL,
U->getOperand(0)->getType()) ==
857 TLI.getPointerTy(
DL))
861 case Instruction::PtrToInt:
863 if (TLI.getValueType(
DL,
U->getType()) == TLI.getPointerTy(
DL))
867 case Instruction::Alloca: {
872 if (SI !=
FuncInfo.StaticAllocaMap.end()) {
880 case Instruction::Add: {
882 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(
U->getOperand(1))) {
885 if (isInt<32>(Disp)) {
893 case Instruction::GetElementPtr: {
899 unsigned Scale = AM.
Scale;
904 i !=
e; ++i, ++GTI) {
906 if (
StructType *STy = GTI.getStructTypeOrNull()) {
914 uint64_t S =
DL.getTypeAllocSize(GTI.getIndexedType());
916 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
918 Disp += CI->getSExtValue() * S;
921 if (canFoldAddIntoGEP(U, Op)) {
924 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
927 Op = cast<AddOperator>(Op)->getOperand(0);
931 (!AM.
GV || !Subtarget->isPICStyleRIPRel()) &&
932 (S == 1 || S == 2 || S == 4 || S == 8)) {
935 IndexReg = getRegForGEPIndex(Op);
941 goto unsupported_gep;
946 if (!isInt<32>(Disp))
955 dyn_cast<GetElementPtrInst>(
U->getOperand(0))) {
969 if (handleConstantAddresses(
I, AM))
979 return handleConstantAddresses(V, AM);
985 const User *
U =
nullptr;
986 unsigned Opcode = Instruction::UserOp1;
1013 Opcode =
I->getOpcode();
1015 InMBB =
I->getParent() ==
FuncInfo.MBB->getBasicBlock();
1016 }
else if (
const ConstantExpr *
C = dyn_cast<ConstantExpr>(V)) {
1017 Opcode =
C->getOpcode();
1023 case Instruction::BitCast:
1026 return X86SelectCallAddress(
U->getOperand(0), AM);
1029 case Instruction::IntToPtr:
1032 TLI.getValueType(
DL,
U->getOperand(0)->getType()) ==
1033 TLI.getPointerTy(
DL))
1034 return X86SelectCallAddress(
U->getOperand(0), AM);
1037 case Instruction::PtrToInt:
1039 if (InMBB && TLI.getValueType(
DL,
U->getType()) == TLI.getPointerTy(
DL))
1040 return X86SelectCallAddress(
U->getOperand(0), AM);
1045 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1051 if (Subtarget->isPICStyleRIPRel() &&
1057 if (GVar->isThreadLocal())
1066 if (Subtarget->isPICStyleRIPRel()) {
1072 AM.
GVOpFlags = Subtarget->classifyLocalReference(
nullptr);
1079 if (!AM.
GV || !Subtarget->isPICStyleRIPRel()) {
1080 auto GetCallRegForValue = [
this](
const Value *
V) {
1084 if (Reg && Subtarget->isTarget64BitILP32()) {
1085 Register CopyReg = createResultReg(&X86::GR32RegClass);
1090 Register ExtReg = createResultReg(&X86::GR64RegClass);
1092 TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1103 AM.
Base.
Reg = GetCallRegForValue(V);
1108 AM.
IndexReg = GetCallRegForValue(V);
1118bool X86FastISel::X86SelectStore(
const Instruction *
I) {
1125 const Value *PtrV =
I->getOperand(1);
1126 if (TLI.supportSwiftError()) {
1129 if (
const Argument *
Arg = dyn_cast<Argument>(PtrV)) {
1130 if (
Arg->hasSwiftErrorAttr())
1134 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1135 if (Alloca->isSwiftError())
1144 if (!isTypeLegal(Val->
getType(), VT,
true))
1149 bool Aligned = Alignment >= ABIAlignment;
1155 return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(
I),
Aligned);
1161 const Function &
F = *
I->getParent()->getParent();
1168 if (TLI.supportSwiftError() &&
1169 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1172 if (TLI.supportSplitCSR(
FuncInfo.MF))
1204 if (
Ret->getNumOperands() > 0) {
1213 const Value *RV =
Ret->getOperand(0);
1219 if (ValLocs.
size() != 1)
1240 if (SrcVT != DstVT) {
1244 if (!Outs[0].
Flags.isZExt() && !Outs[0].Flags.isSExt())
1250 if (Outs[0].
Flags.isSExt())
1253 SrcReg = fastEmitZExtFromI1(
MVT::i8, SrcReg);
1269 TII.get(TargetOpcode::COPY), DstReg).
addReg(SrcReg);
1286 "SRetReturnReg should have been set in LowerFormalArguments()!");
1287 unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1289 TII.get(TargetOpcode::COPY), RetReg).
addReg(Reg);
1297 TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
1301 TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
1303 for (
unsigned i = 0, e = RetRegs.
size(); i != e; ++i)
1317 const Value *SV =
I->getOperand(0);
1318 if (TLI.supportSwiftError()) {
1321 if (
const Argument *
Arg = dyn_cast<Argument>(SV)) {
1322 if (
Arg->hasSwiftErrorAttr())
1326 if (
const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1327 if (Alloca->isSwiftError())
1333 if (!isTypeLegal(LI->
getType(), VT,
true))
1342 unsigned ResultReg = 0;
1343 if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1347 updateValueMap(
I, ResultReg);
1352 bool HasAVX512 = Subtarget->
hasAVX512();
1353 bool HasAVX = Subtarget->
hasAVX();
1354 bool HasSSE1 = Subtarget->
hasSSE1();
1355 bool HasSSE2 = Subtarget->
hasSSE2();
1359 case MVT::i8:
return X86::CMP8rr;
1360 case MVT::i16:
return X86::CMP16rr;
1361 case MVT::i32:
return X86::CMP32rr;
1362 case MVT::i64:
return X86::CMP64rr;
1364 return HasAVX512 ? X86::VUCOMISSZrr
1365 : HasAVX ? X86::VUCOMISSrr
1366 : HasSSE1 ? X86::UCOMISSrr
1369 return HasAVX512 ? X86::VUCOMISDZrr
1370 : HasAVX ? X86::VUCOMISDrr
1371 : HasSSE2 ? X86::UCOMISDrr
1388 return X86::CMP16ri8;
1389 return X86::CMP16ri;
1392 return X86::CMP32ri8;
1393 return X86::CMP32ri;
1396 return X86::CMP64ri8;
1400 return X86::CMP64ri32;
1405bool X86FastISel::X86FastEmitCompare(
const Value *Op0,
const Value *Op1,
EVT VT,
1407 Register Op0Reg = getRegForValue(Op0);
1408 if (Op0Reg == 0)
return false;
1411 if (isa<ConstantPointerNull>(Op1))
1417 if (
const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1421 .
addImm(Op1C->getSExtValue());
1427 if (CompareOpc == 0)
return false;
1429 Register Op1Reg = getRegForValue(Op1);
1430 if (Op1Reg == 0)
return false;
1439 const CmpInst *CI = cast<CmpInst>(
I);
1442 if (!isTypeLegal(
I->getOperand(0)->getType(), VT))
1451 unsigned ResultReg = 0;
1452 switch (Predicate) {
1455 ResultReg = createResultReg(&X86::GR32RegClass);
1458 ResultReg = fastEmitInst_extractsubreg(
MVT::i8, ResultReg, X86::sub_8bit);
1464 ResultReg = createResultReg(&X86::GR8RegClass);
1472 updateValueMap(
I, ResultReg);
1483 const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1484 if (RHSC && RHSC->isNullValue())
1489 static const uint16_t SETFOpcTable[2][3] = {
1494 switch (Predicate) {
1500 ResultReg = createResultReg(&X86::GR8RegClass);
1502 if (!X86FastEmitCompare(LHS, RHS, VT,
I->getDebugLoc()))
1505 Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1506 Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1508 FlagReg1).
addImm(SETFOpc[0]);
1510 FlagReg2).
addImm(SETFOpc[1]);
1513 updateValueMap(
I, ResultReg);
1526 if (!X86FastEmitCompare(LHS, RHS, VT,
I->getDebugLoc()))
1531 updateValueMap(
I, ResultReg);
1536 EVT DstVT = TLI.getValueType(
DL,
I->getType());
1537 if (!TLI.isTypeLegal(DstVT))
1540 Register ResultReg = getRegForValue(
I->getOperand(0));
1545 MVT SrcVT = TLI.getSimpleValueType(
DL,
I->getOperand(0)->getType());
1548 ResultReg = fastEmitZExtFromI1(
MVT::i8, ResultReg);
1560 case MVT::i8: MovInst = X86::MOVZX32rr8;
break;
1561 case MVT::i16: MovInst = X86::MOVZX32rr16;
break;
1562 case MVT::i32: MovInst = X86::MOV32rr;
break;
1566 Register Result32 = createResultReg(&X86::GR32RegClass);
1570 ResultReg = createResultReg(&X86::GR64RegClass);
1577 Register Result32 = createResultReg(&X86::GR32RegClass);
1579 Result32).
addReg(ResultReg);
1581 ResultReg = fastEmitInst_extractsubreg(
MVT::i16, Result32, X86::sub_16bit);
1582 }
else if (DstVT !=
MVT::i8) {
1589 updateValueMap(
I, ResultReg);
1594 EVT DstVT = TLI.getValueType(
DL,
I->getType());
1595 if (!TLI.isTypeLegal(DstVT))
1598 Register ResultReg = getRegForValue(
I->getOperand(0));
1603 MVT SrcVT = TLI.getSimpleValueType(
DL,
I->getOperand(0)->getType());
1611 ResultReg = createResultReg(&X86::GR8RegClass);
1613 ResultReg).
addReg(ZExtReg);
1621 Register Result32 = createResultReg(&X86::GR32RegClass);
1623 Result32).
addReg(ResultReg);
1625 ResultReg = fastEmitInst_extractsubreg(
MVT::i16, Result32, X86::sub_16bit);
1626 }
else if (DstVT !=
MVT::i8) {
1633 updateValueMap(
I, ResultReg);
1637bool X86FastISel::X86SelectBranch(
const Instruction *
I) {
1654 switch (Predicate) {
1668 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1669 if (CmpRHSC && CmpRHSC->isNullValue())
1674 if (
FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1684 bool NeedExtraBranch =
false;
1685 switch (Predicate) {
1691 NeedExtraBranch =
true;
1704 if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->
getDebugLoc()))
1712 if (NeedExtraBranch) {
1717 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1724 if (TI->hasOneUse() && TI->getParent() ==
I->getParent() &&
1725 isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1726 unsigned TestOpc = 0;
1729 case MVT::i8: TestOpc = X86::TEST8ri;
break;
1730 case MVT::i16: TestOpc = X86::TEST16ri;
break;
1731 case MVT::i32: TestOpc = X86::TEST32ri;
break;
1732 case MVT::i64: TestOpc = X86::TEST64ri32;
break;
1735 Register OpReg = getRegForValue(TI->getOperand(0));
1736 if (OpReg == 0)
return false;
1742 if (
FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1750 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1763 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1771 if (OpReg == 0)
return false;
1774 if (
MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1775 unsigned KOpReg = OpReg;
1776 OpReg = createResultReg(&X86::GR32RegClass);
1778 TII.get(TargetOpcode::COPY), OpReg)
1780 OpReg = fastEmitInst_extractsubreg(
MVT::i8, OpReg, X86::sub_8bit);
1787 finishCondBranch(BI->
getParent(), TrueMBB, FalseMBB);
1791bool X86FastISel::X86SelectShift(
const Instruction *
I) {
1792 unsigned CReg = 0, OpReg = 0;
1794 if (
I->getType()->isIntegerTy(8)) {
1796 RC = &X86::GR8RegClass;
1797 switch (
I->getOpcode()) {
1798 case Instruction::LShr: OpReg = X86::SHR8rCL;
break;
1799 case Instruction::AShr: OpReg = X86::SAR8rCL;
break;
1800 case Instruction::Shl: OpReg = X86::SHL8rCL;
break;
1801 default:
return false;
1803 }
else if (
I->getType()->isIntegerTy(16)) {
1805 RC = &X86::GR16RegClass;
1806 switch (
I->getOpcode()) {
1808 case Instruction::LShr: OpReg = X86::SHR16rCL;
break;
1809 case Instruction::AShr: OpReg = X86::SAR16rCL;
break;
1810 case Instruction::Shl: OpReg = X86::SHL16rCL;
break;
1812 }
else if (
I->getType()->isIntegerTy(32)) {
1814 RC = &X86::GR32RegClass;
1815 switch (
I->getOpcode()) {
1817 case Instruction::LShr: OpReg = X86::SHR32rCL;
break;
1818 case Instruction::AShr: OpReg = X86::SAR32rCL;
break;
1819 case Instruction::Shl: OpReg = X86::SHL32rCL;
break;
1821 }
else if (
I->getType()->isIntegerTy(64)) {
1823 RC = &X86::GR64RegClass;
1824 switch (
I->getOpcode()) {
1826 case Instruction::LShr: OpReg = X86::SHR64rCL;
break;
1827 case Instruction::AShr: OpReg = X86::SAR64rCL;
break;
1828 case Instruction::Shl: OpReg = X86::SHL64rCL;
break;
1835 if (!isTypeLegal(
I->getType(), VT))
1838 Register Op0Reg = getRegForValue(
I->getOperand(0));
1839 if (Op0Reg == 0)
return false;
1841 Register Op1Reg = getRegForValue(
I->getOperand(1));
1842 if (Op1Reg == 0)
return false;
1848 if (CReg != X86::CL)
1850 TII.get(TargetOpcode::KILL), X86::CL)
1853 Register ResultReg = createResultReg(RC);
1856 updateValueMap(
I, ResultReg);
1860bool X86FastISel::X86SelectDivRem(
const Instruction *
I) {
1861 const static unsigned NumTypes = 4;
1862 const static unsigned NumOps = 4;
1863 const static bool S =
true;
1864 const static bool U =
false;
1865 const static unsigned Copy = TargetOpcode::COPY;
1875 const static struct DivRemEntry {
1881 struct DivRemResult {
1883 unsigned OpSignExtend;
1887 unsigned DivRemResultReg;
1889 } ResultTable[NumOps];
1890 } OpTable[NumTypes] = {
1891 { &X86::GR8RegClass, X86::AX, 0, {
1892 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S },
1893 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S },
1894 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL,
U },
1895 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH,
U },
1898 { &X86::GR16RegClass, X86::AX, X86::DX, {
1899 { X86::IDIV16r, X86::CWD,
Copy, X86::AX, S },
1900 { X86::IDIV16r, X86::CWD,
Copy, X86::DX, S },
1901 { X86::DIV16r, X86::MOV32r0,
Copy, X86::AX,
U },
1902 { X86::DIV16r, X86::MOV32r0,
Copy, X86::DX,
U },
1905 { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1906 { X86::IDIV32r, X86::CDQ,
Copy, X86::EAX, S },
1907 { X86::IDIV32r, X86::CDQ,
Copy, X86::EDX, S },
1908 { X86::DIV32r, X86::MOV32r0,
Copy, X86::EAX,
U },
1909 { X86::DIV32r, X86::MOV32r0,
Copy, X86::EDX,
U },
1912 { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1913 { X86::IDIV64r, X86::CQO,
Copy, X86::RAX, S },
1914 { X86::IDIV64r, X86::CQO,
Copy, X86::RDX, S },
1915 { X86::DIV64r, X86::MOV32r0,
Copy, X86::RAX,
U },
1916 { X86::DIV64r, X86::MOV32r0,
Copy, X86::RDX,
U },
1922 if (!isTypeLegal(
I->getType(), VT))
1927 default:
return false;
1928 case MVT::i8: TypeIndex = 0;
break;
1929 case MVT::i16: TypeIndex = 1;
break;
1930 case MVT::i32: TypeIndex = 2;
break;
1932 if (!Subtarget->is64Bit())
1937 switch (
I->getOpcode()) {
1939 case Instruction::SDiv:
OpIndex = 0;
break;
1940 case Instruction::SRem:
OpIndex = 1;
break;
1941 case Instruction::UDiv:
OpIndex = 2;
break;
1942 case Instruction::URem:
OpIndex = 3;
break;
1945 const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1946 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[
OpIndex];
1947 Register Op0Reg = getRegForValue(
I->getOperand(0));
1950 Register Op1Reg = getRegForValue(
I->getOperand(1));
1956 TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).
addReg(Op0Reg);
1958 if (OpEntry.OpSignExtend) {
1959 if (OpEntry.IsOpSigned)
1961 TII.get(OpEntry.OpSignExtend));
1963 Register Zero32 = createResultReg(&X86::GR32RegClass);
1965 TII.get(X86::MOV32r0), Zero32);
1972 TII.get(Copy), TypeEntry.HighInReg)
1973 .
addReg(Zero32, 0, X86::sub_16bit);
1976 TII.get(Copy), TypeEntry.HighInReg)
1980 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1987 TII.get(OpEntry.OpDivRem)).
addReg(Op1Reg);
1996 unsigned ResultReg = 0;
1997 if ((
I->getOpcode() == Instruction::SRem ||
1998 I->getOpcode() == Instruction::URem) &&
1999 OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2000 Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2001 Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2003 TII.get(Copy), SourceSuperReg).
addReg(X86::AX);
2010 ResultReg = fastEmitInst_extractsubreg(
MVT::i8, ResultSuperReg,
2015 ResultReg = createResultReg(TypeEntry.RC);
2017 .
addReg(OpEntry.DivRemResultReg);
2019 updateValueMap(
I, ResultReg);
2026bool X86FastISel::X86FastEmitCMoveSelect(
MVT RetVT,
const Instruction *
I) {
2028 if (!Subtarget->canUseCMOV())
2032 if (RetVT < MVT::i16 || RetVT >
MVT::i64)
2037 bool NeedTest =
true;
2043 const auto *CI = dyn_cast<CmpInst>(
Cond);
2044 if (CI && (CI->
getParent() ==
I->getParent())) {
2048 static const uint16_t SETFOpcTable[2][3] = {
2053 switch (Predicate) {
2056 SETFOpc = &SETFOpcTable[0][0];
2060 SETFOpc = &SETFOpcTable[1][0];
2076 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->
getDebugLoc()))
2080 Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2081 Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2083 FlagReg1).
addImm(SETFOpc[0]);
2085 FlagReg2).
addImm(SETFOpc[1]);
2086 auto const &II =
TII.get(SETFOpc[2]);
2087 if (II.getNumDefs()) {
2088 Register TmpReg = createResultReg(&X86::GR8RegClass);
2097 }
else if (foldX86XALUIntrinsic(
CC,
I,
Cond)) {
2118 if (
MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2119 unsigned KCondReg = CondReg;
2120 CondReg = createResultReg(&X86::GR32RegClass);
2122 TII.get(TargetOpcode::COPY), CondReg)
2124 CondReg = fastEmitInst_extractsubreg(
MVT::i8, CondReg, X86::sub_8bit);
2134 Register RHSReg = getRegForValue(RHS);
2135 Register LHSReg = getRegForValue(LHS);
2136 if (!LHSReg || !RHSReg)
2141 Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg,
CC);
2142 updateValueMap(
I, ResultReg);
2151bool X86FastISel::X86FastEmitSSESelect(
MVT RetVT,
const Instruction *
I) {
2155 const auto *CI = dyn_cast<FCmpInst>(
I->getOperand(0));
2156 if (!CI || (CI->
getParent() !=
I->getParent()))
2160 !((Subtarget->hasSSE1() && RetVT ==
MVT::f32) ||
2161 (Subtarget->hasSSE2() && RetVT ==
MVT::f64)))
2172 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2173 if (CmpRHSC && CmpRHSC->isNullValue())
2180 if (
CC > 7 && !Subtarget->hasAVX())
2189 Register LHSReg = getRegForValue(LHS);
2190 Register RHSReg = getRegForValue(RHS);
2191 Register CmpLHSReg = getRegForValue(CmpLHS);
2192 Register CmpRHSReg = getRegForValue(CmpRHS);
2193 if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2199 if (Subtarget->hasAVX512()) {
2204 unsigned CmpOpcode =
2205 (RetVT ==
MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2206 Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2211 Register ImplicitDefReg = createResultReg(VR128X);
2213 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2217 unsigned MovOpcode =
2218 (RetVT ==
MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2219 unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2220 ImplicitDefReg, LHSReg);
2222 ResultReg = createResultReg(RC);
2224 TII.get(TargetOpcode::COPY), ResultReg).
addReg(MovReg);
2226 }
else if (Subtarget->hasAVX()) {
2234 unsigned CmpOpcode =
2235 (RetVT ==
MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2236 unsigned BlendOpcode =
2237 (RetVT ==
MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2239 Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2241 Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2243 ResultReg = createResultReg(RC);
2245 TII.get(TargetOpcode::COPY), ResultReg).
addReg(VBlendReg);
2248 static const uint16_t OpcTable[2][4] = {
2249 { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2250 { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2255 default:
return false;
2256 case MVT::f32: Opc = &OpcTable[0][0];
break;
2257 case MVT::f64: Opc = &OpcTable[1][0];
break;
2261 Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg,
CC);
2262 Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2263 Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2264 Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2265 ResultReg = createResultReg(RC);
2267 TII.get(TargetOpcode::COPY), ResultReg).
addReg(OrReg);
2269 updateValueMap(
I, ResultReg);
2273bool X86FastISel::X86FastEmitPseudoSelect(
MVT RetVT,
const Instruction *
I) {
2278 default:
return false;
2279 case MVT::i8: Opc = X86::CMOV_GR8;
break;
2280 case MVT::i16: Opc = X86::CMOV_GR16;
break;
2281 case MVT::i32: Opc = X86::CMOV_GR32;
break;
2283 Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16;
break;
2285 Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32;
break;
2287 Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64;
break;
2296 const auto *CI = dyn_cast<CmpInst>(
Cond);
2297 if (CI && (CI->
getParent() ==
I->getParent())) {
2310 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->
getDebugLoc()))
2318 if (
MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2319 unsigned KCondReg = CondReg;
2320 CondReg = createResultReg(&X86::GR32RegClass);
2322 TII.get(TargetOpcode::COPY), CondReg)
2324 CondReg = fastEmitInst_extractsubreg(
MVT::i8, CondReg, X86::sub_8bit);
2334 Register LHSReg = getRegForValue(LHS);
2335 Register RHSReg = getRegForValue(RHS);
2336 if (!LHSReg || !RHSReg)
2342 fastEmitInst_rri(Opc, RC, RHSReg, LHSReg,
CC);
2343 updateValueMap(
I, ResultReg);
2347bool X86FastISel::X86SelectSelect(
const Instruction *
I) {
2349 if (!isTypeLegal(
I->getType(), RetVT))
2353 if (
const auto *CI = dyn_cast<CmpInst>(
I->getOperand(0))) {
2355 const Value *Opnd =
nullptr;
2356 switch (Predicate) {
2363 Register OpReg = getRegForValue(Opnd);
2367 Register ResultReg = createResultReg(RC);
2369 TII.get(TargetOpcode::COPY), ResultReg)
2371 updateValueMap(
I, ResultReg);
2377 if (X86FastEmitCMoveSelect(RetVT,
I))
2381 if (X86FastEmitSSESelect(RetVT,
I))
2386 if (X86FastEmitPseudoSelect(RetVT,
I))
2393bool X86FastISel::X86SelectIntToFP(
const Instruction *
I,
bool IsSigned) {
2398 bool HasAVX512 = Subtarget->hasAVX512();
2399 if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2403 MVT SrcVT = TLI.getSimpleValueType(
DL,
I->getOperand(0)->getType());
2408 Register OpReg = getRegForValue(
I->getOperand(0));
2414 static const uint16_t SCvtOpc[2][2][2] = {
2415 { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2416 { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2417 { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2418 { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2420 static const uint16_t UCvtOpc[2][2] = {
2421 { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2422 { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2426 if (
I->getType()->isDoubleTy()) {
2428 Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2429 }
else if (
I->getType()->isFloatTy()) {
2431 Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2437 Register ImplicitDefReg = createResultReg(RC);
2439 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2440 Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2441 updateValueMap(
I, ResultReg);
2445bool X86FastISel::X86SelectSIToFP(
const Instruction *
I) {
2446 return X86SelectIntToFP(
I,
true);
2449bool X86FastISel::X86SelectUIToFP(
const Instruction *
I) {
2450 return X86SelectIntToFP(
I,
false);
2454bool X86FastISel::X86SelectFPExtOrFPTrunc(
const Instruction *
I,
2457 assert((
I->getOpcode() == Instruction::FPExt ||
2458 I->getOpcode() == Instruction::FPTrunc) &&
2459 "Instruction must be an FPExt or FPTrunc!");
2460 bool HasAVX = Subtarget->hasAVX();
2462 Register OpReg = getRegForValue(
I->getOperand(0));
2466 unsigned ImplicitDefReg;
2468 ImplicitDefReg = createResultReg(RC);
2470 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2474 Register ResultReg = createResultReg(RC);
2480 MIB.
addReg(ImplicitDefReg);
2483 updateValueMap(
I, ResultReg);
2487bool X86FastISel::X86SelectFPExt(
const Instruction *
I) {
2488 if (Subtarget->hasSSE2() &&
I->getType()->isDoubleTy() &&
2489 I->getOperand(0)->getType()->isFloatTy()) {
2490 bool HasAVX512 = Subtarget->hasAVX512();
2493 HasAVX512 ? X86::VCVTSS2SDZrr
2494 : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2495 return X86SelectFPExtOrFPTrunc(
I, Opc, TLI.getRegClassFor(
MVT::f64));
2501bool X86FastISel::X86SelectFPTrunc(
const Instruction *
I) {
2502 if (Subtarget->hasSSE2() &&
I->getType()->isFloatTy() &&
2503 I->getOperand(0)->getType()->isDoubleTy()) {
2504 bool HasAVX512 = Subtarget->hasAVX512();
2507 HasAVX512 ? X86::VCVTSD2SSZrr
2508 : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2509 return X86SelectFPExtOrFPTrunc(
I, Opc, TLI.getRegClassFor(
MVT::f32));
2515bool X86FastISel::X86SelectTrunc(
const Instruction *
I) {
2516 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType());
2517 EVT DstVT = TLI.getValueType(
DL,
I->getType());
2522 if (!TLI.isTypeLegal(SrcVT))
2525 Register InputReg = getRegForValue(
I->getOperand(0));
2532 updateValueMap(
I, InputReg);
2542 updateValueMap(
I, ResultReg);
2546bool X86FastISel::IsMemcpySmall(
uint64_t Len) {
2547 return Len <= (Subtarget->is64Bit() ? 32 : 16);
2554 if (!IsMemcpySmall(Len))
2557 bool i64Legal = Subtarget->is64Bit();
2562 if (Len >= 8 && i64Legal)
2572 bool RV = X86FastEmitLoad(VT, SrcAM,
nullptr, Reg);
2573 RV &= X86FastEmitStore(VT, Reg, DestAM);
2574 assert(RV &&
"Failed to emit load or store??");
2586bool X86FastISel::fastLowerIntrinsicCall(
const IntrinsicInst *II) {
2589 default:
return false;
2590 case Intrinsic::convert_from_fp16:
2591 case Intrinsic::convert_to_fp16: {
2592 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2596 Register InputReg = getRegForValue(Op);
2601 bool IsFloatToHalf = II->
getIntrinsicID() == Intrinsic::convert_to_fp16;
2602 if (IsFloatToHalf) {
2603 if (!
Op->getType()->isFloatTy())
2610 unsigned ResultReg = 0;
2612 if (IsFloatToHalf) {
2620 unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2622 InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2625 Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2626 : X86::VMOVPDI2DIrr;
2627 ResultReg = createResultReg(&X86::GR32RegClass);
2632 unsigned RegIdx = X86::sub_16bit;
2633 ResultReg = fastEmitInst_extractsubreg(
MVT::i16, ResultReg, RegIdx);
2635 assert(
Op->getType()->isIntegerTy(16) &&
"Expected a 16-bit integer!");
2643 unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2645 InputReg = fastEmitInst_r(Opc, RC, InputReg);
2649 ResultReg = createResultReg(TLI.getRegClassFor(
MVT::f32));
2651 TII.get(TargetOpcode::COPY), ResultReg)
2655 updateValueMap(II, ResultReg);
2658 case Intrinsic::frameaddress: {
2666 if (!isTypeLegal(
RetTy, VT))
2674 case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass;
break;
2675 case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass;
break;
2684 unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(*MF);
2686 (FrameReg == X86::EBP && VT ==
MVT::i32)) &&
2687 "Invalid Frame Register!");
2692 Register SrcReg = createResultReg(RC);
2694 TII.get(TargetOpcode::COPY), SrcReg).
addReg(FrameReg);
2701 unsigned Depth = cast<ConstantInt>(II->
getOperand(0))->getZExtValue();
2703 Register DestReg = createResultReg(RC);
2705 TII.get(Opc), DestReg), SrcReg);
2709 updateValueMap(II, SrcReg);
2712 case Intrinsic::memcpy: {
2713 const MemCpyInst *MCI = cast<MemCpyInst>(II);
2718 if (isa<ConstantInt>(MCI->
getLength())) {
2722 if (IsMemcpySmall(Len)) {
2727 TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2732 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2739 return lowerCallTo(II,
"memcpy", II->
arg_size() - 1);
2741 case Intrinsic::memset: {
2742 const MemSetInst *MSI = cast<MemSetInst>(II);
2747 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2754 return lowerCallTo(II,
"memset", II->
arg_size() - 1);
2756 case Intrinsic::stackprotector: {
2758 EVT PtrTy = TLI.getPointerTy(
DL);
2768 if (!X86FastEmitStore(PtrTy, Op1, AM))
return false;
2771 case Intrinsic::dbg_declare: {
2779 "Expected inlined-at fields to agree");
2786 case Intrinsic::trap: {
2790 case Intrinsic::sqrt: {
2791 if (!Subtarget->hasSSE1())
2794 Type *
RetTy = II->getCalledFunction()->getReturnType();
2797 if (!isTypeLegal(
RetTy, VT))
2803 static const uint16_t SqrtOpc[3][2] = {
2804 { X86::SQRTSSr, X86::SQRTSDr },
2805 { X86::VSQRTSSr, X86::VSQRTSDr },
2806 { X86::VSQRTSSZr, X86::VSQRTSDZr },
2808 unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2809 Subtarget->hasAVX() ? 1 :
2813 default:
return false;
2814 case MVT::f32: Opc = SqrtOpc[AVXLevel][0];
break;
2815 case MVT::f64: Opc = SqrtOpc[AVXLevel][1];
break;
2818 const Value *SrcVal = II->getArgOperand(0);
2819 Register SrcReg = getRegForValue(SrcVal);
2825 unsigned ImplicitDefReg = 0;
2827 ImplicitDefReg = createResultReg(RC);
2829 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2832 Register ResultReg = createResultReg(RC);
2838 MIB.
addReg(ImplicitDefReg);
2842 updateValueMap(II, ResultReg);
2845 case Intrinsic::sadd_with_overflow:
2846 case Intrinsic::uadd_with_overflow:
2847 case Intrinsic::ssub_with_overflow:
2848 case Intrinsic::usub_with_overflow:
2849 case Intrinsic::smul_with_overflow:
2850 case Intrinsic::umul_with_overflow: {
2854 auto *Ty = cast<StructType>(
Callee->getReturnType());
2858 "Overflow value expected to be an i1");
2861 if (!isTypeLegal(
RetTy, VT))
2867 const Value *
LHS = II->getArgOperand(0);
2868 const Value *
RHS = II->getArgOperand(1);
2871 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
2875 switch (II->getIntrinsicID()) {
2877 case Intrinsic::sadd_with_overflow:
2879 case Intrinsic::uadd_with_overflow:
2881 case Intrinsic::ssub_with_overflow:
2883 case Intrinsic::usub_with_overflow:
2885 case Intrinsic::smul_with_overflow:
2887 case Intrinsic::umul_with_overflow:
2891 Register LHSReg = getRegForValue(LHS);
2895 unsigned ResultReg = 0;
2897 if (
const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2898 static const uint16_t Opc[2][4] = {
2899 { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2900 { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2906 ResultReg = createResultReg(TLI.getRegClassFor(VT));
2912 ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2917 RHSReg = getRegForValue(RHS);
2920 ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2927 { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2928 static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2935 TLI.getRegClassFor(VT), RHSReg);
2938 { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2943 TII.get(TargetOpcode::COPY), X86::AL)
2945 ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2948 TLI.getRegClassFor(VT), LHSReg, RHSReg);
2955 Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2956 assert((ResultReg+1) == ResultReg2 &&
"Nonconsecutive result registers.");
2958 ResultReg2).
addImm(CondCode);
2960 updateValueMap(II, ResultReg, 2);
2963 case Intrinsic::x86_sse_cvttss2si:
2964 case Intrinsic::x86_sse_cvttss2si64:
2965 case Intrinsic::x86_sse2_cvttsd2si:
2966 case Intrinsic::x86_sse2_cvttsd2si64: {
2968 switch (II->getIntrinsicID()) {
2970 case Intrinsic::x86_sse_cvttss2si:
2971 case Intrinsic::x86_sse_cvttss2si64:
2972 if (!Subtarget->hasSSE1())
2974 IsInputDouble =
false;
2976 case Intrinsic::x86_sse2_cvttsd2si:
2977 case Intrinsic::x86_sse2_cvttsd2si64:
2978 if (!Subtarget->hasSSE2())
2980 IsInputDouble =
true;
2984 Type *
RetTy = II->getCalledFunction()->getReturnType();
2986 if (!isTypeLegal(
RetTy, VT))
2989 static const uint16_t CvtOpc[3][2][2] = {
2990 { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
2991 { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
2992 { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
2993 { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
2994 { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
2995 { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
2997 unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2998 Subtarget->hasAVX() ? 1 :
3003 case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0];
break;
3004 case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1];
break;
3008 const Value *
Op = II->getArgOperand(0);
3009 while (
auto *IE = dyn_cast<InsertElementInst>(Op)) {
3011 if (!isa<ConstantInt>(
Index))
3013 unsigned Idx = cast<ConstantInt>(
Index)->getZExtValue();
3016 Op =
IE->getOperand(1);
3019 Op =
IE->getOperand(0);
3026 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3030 updateValueMap(II, ResultReg);
3036bool X86FastISel::fastLowerArguments() {
3048 if (Subtarget->isCallingConvWin64(
CC))
3051 if (!Subtarget->is64Bit())
3054 if (Subtarget->useSoftFloat())
3058 unsigned GPRCnt = 0;
3059 unsigned FPRCnt = 0;
3060 for (
auto const &
Arg :
F->args()) {
3061 if (
Arg.hasAttribute(Attribute::ByVal) ||
3062 Arg.hasAttribute(Attribute::InReg) ||
3063 Arg.hasAttribute(Attribute::StructRet) ||
3064 Arg.hasAttribute(Attribute::SwiftSelf) ||
3065 Arg.hasAttribute(Attribute::SwiftAsync) ||
3066 Arg.hasAttribute(Attribute::SwiftError) ||
3067 Arg.hasAttribute(Attribute::Nest))
3074 EVT ArgVT = TLI.getValueType(
DL, ArgTy);
3075 if (!ArgVT.
isSimple())
return false;
3077 default:
return false;
3084 if (!Subtarget->hasSSE1())
3097 static const MCPhysReg GPR32ArgRegs[] = {
3098 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3100 static const MCPhysReg GPR64ArgRegs[] = {
3101 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3104 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3105 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3109 unsigned FPRIdx = 0;
3110 for (
auto const &
Arg :
F->args()) {
3111 MVT VT = TLI.getSimpleValueType(
DL,
Arg.getType());
3119 case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++];
break;
3125 Register ResultReg = createResultReg(RC);
3127 TII.get(TargetOpcode::COPY), ResultReg)
3129 updateValueMap(&
Arg, ResultReg);
3137 if (Subtarget->is64Bit())
3154bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155 auto &OutVals = CLI.OutVals;
3156 auto &OutFlags = CLI.OutFlags;
3157 auto &OutRegs = CLI.OutRegs;
3158 auto &
Ins = CLI.Ins;
3159 auto &InRegs = CLI.InRegs;
3161 bool &IsTailCall = CLI.IsTailCall;
3162 bool IsVarArg = CLI.IsVarArg;
3165 const auto *CB = CLI.CB;
3167 bool Is64Bit = Subtarget->is64Bit();
3168 bool IsWin64 = Subtarget->isCallingConvWin64(
CC);
3172 if (CB && CB->doesNoCfCheck())
3176 if ((CB && isa<CallInst>(CB) && CB->hasFnAttr(
"no_caller_saved_registers")))
3180 if ((CB && CB->hasFnAttr(
"no_callee_saved_registers")))
3188 if (Subtarget->useIndirectThunkCalls())
3193 default:
return false;
3221 if (IsVarArg && IsWin64)
3225 if (CLI.CB && CLI.CB->hasInAllocaArgument())
3228 for (
auto Flag : CLI.OutFlags)
3229 if (
Flag.isSwiftError() ||
Flag.isPreallocated())
3238 for (
int i = 0, e = OutVals.size(); i != e; ++i) {
3239 Value *&Val = OutVals[i];
3241 if (
auto *CI = dyn_cast<ConstantInt>(Val)) {
3242 if (CI->getBitWidth() < 32) {
3253 auto *TI = dyn_cast<TruncInst>(Val);
3255 if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3256 (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3257 Value *PrevVal = TI->getOperand(0);
3258 ResultReg = getRegForValue(PrevVal);
3263 if (!isTypeLegal(PrevVal->
getType(), VT))
3266 ResultReg = fastEmit_ri(VT, VT,
ISD::AND, ResultReg, 1);
3268 if (!isTypeLegal(Val->
getType(), VT) ||
3271 ResultReg = getRegForValue(Val);
3287 CCInfo.AllocateStack(32,
Align(8));
3289 CCInfo.AnalyzeCallOperands(OutVTs, OutFlags,
CC_X86);
3292 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3295 unsigned AdjStackDown =
TII.getCallFrameSetupOpcode();
3301 for (
unsigned i = 0, e = ArgLocs.
size(); i != e; ++i) {
3309 unsigned ArgReg = ArgRegs[VA.
getValNo()];
3316 "Unexpected extend");
3323 assert(Emitted &&
"Failed to emit a sext!"); (void)Emitted;
3329 "Unexpected extend");
3334 ArgReg = fastEmitZExtFromI1(
MVT::i8, ArgReg);
3343 assert(Emitted &&
"Failed to emit a zext!"); (void)Emitted;
3349 "Unexpected extend");
3359 assert(Emitted &&
"Failed to emit a aext!"); (void)Emitted;
3365 assert(ArgReg &&
"Failed to emit a bitcast!");
3393 if (isa<UndefValue>(ArgVal))
3399 AM.
Disp = LocMemOffset;
3405 if (
Flags.isByVal()) {
3408 if (!TryEmitSmallMemcpy(AM, SrcAM,
Flags.getByValSize()))
3410 }
else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3414 if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3417 if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3425 if (Subtarget->isPICStyleGOT()) {
3426 unsigned Base = getInstrInfo()->getGlobalBaseReg(
FuncInfo.MF);
3431 if (Is64Bit && IsVarArg && !IsWin64) {
3442 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3443 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3445 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3446 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3447 &&
"SSE registers cannot be used when SSE is disabled");
3449 X86::AL).
addImm(NumXMMRegs);
3455 if (!X86SelectCallAddress(
Callee, CalleeAM))
3458 unsigned CalleeOp = 0;
3460 if (CalleeAM.
GV !=
nullptr) {
3462 }
else if (CalleeAM.
Base.
Reg != 0) {
3471 unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3476 assert(GV &&
"Not a direct call");
3478 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3486 unsigned CallOpc = NeedLoad
3487 ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3488 : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3494 MIB.
addSym(Symbol, OpFlags);
3506 if (Subtarget->isPICStyleGOT())
3509 if (Is64Bit && IsVarArg && !IsWin64)
3513 for (
auto Reg : OutRegs)
3517 unsigned NumBytesForCalleeToPop =
3519 TM.Options.GuaranteedTailCallOpt)
3522 unsigned AdjStackUp =
TII.getCallFrameDestroyOpcode();
3529 CLI.RetTy->getContext());
3530 CCRetInfo.AnalyzeCallResult(Ins,
RetCC_X86);
3534 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
3537 unsigned CopyReg = ResultReg + i;
3542 ((Is64Bit || Ins[i].
Flags.isInReg()) && !Subtarget->hasSSE1())) {
3548 if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3549 isScalarFPTypeInSSEReg(VA.
getValVT())) {
3551 CopyReg = createResultReg(&X86::RFP80RegClass);
3556 TII.get(TargetOpcode::COPY), CopyReg).
addReg(SrcReg);
3564 unsigned Opc = ResVT ==
MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3570 Opc = ResVT ==
MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3572 TII.get(Opc), ResultReg + i), FI);
3576 CLI.ResultReg = ResultReg;
3577 CLI.NumResultRegs = RVLocs.
size();
3584X86FastISel::fastSelectInstruction(
const Instruction *
I) {
3585 switch (
I->getOpcode()) {
3587 case Instruction::Load:
3588 return X86SelectLoad(
I);
3589 case Instruction::Store:
3590 return X86SelectStore(
I);
3591 case Instruction::Ret:
3592 return X86SelectRet(
I);
3593 case Instruction::ICmp:
3594 case Instruction::FCmp:
3595 return X86SelectCmp(
I);
3596 case Instruction::ZExt:
3597 return X86SelectZExt(
I);
3598 case Instruction::SExt:
3599 return X86SelectSExt(
I);
3600 case Instruction::Br:
3601 return X86SelectBranch(
I);
3602 case Instruction::LShr:
3603 case Instruction::AShr:
3604 case Instruction::Shl:
3605 return X86SelectShift(
I);
3606 case Instruction::SDiv:
3607 case Instruction::UDiv:
3608 case Instruction::SRem:
3609 case Instruction::URem:
3610 return X86SelectDivRem(
I);
3611 case Instruction::Select:
3612 return X86SelectSelect(
I);
3613 case Instruction::Trunc:
3614 return X86SelectTrunc(
I);
3615 case Instruction::FPExt:
3616 return X86SelectFPExt(
I);
3617 case Instruction::FPTrunc:
3618 return X86SelectFPTrunc(
I);
3619 case Instruction::SIToFP:
3620 return X86SelectSIToFP(
I);
3621 case Instruction::UIToFP:
3622 return X86SelectUIToFP(
I);
3623 case Instruction::IntToPtr:
3624 case Instruction::PtrToInt: {
3625 EVT SrcVT = TLI.getValueType(
DL,
I->getOperand(0)->getType());
3626 EVT DstVT = TLI.getValueType(
DL,
I->getType());
3628 return X86SelectZExt(
I);
3630 return X86SelectTrunc(
I);
3632 if (Reg == 0)
return false;
3633 updateValueMap(
I, Reg);
3636 case Instruction::BitCast: {
3638 if (!Subtarget->hasSSE2())
3642 if (!isTypeLegal(
I->getOperand(0)->getType(), SrcVT) ||
3643 !isTypeLegal(
I->getType(), DstVT))
3659 Register ResultReg = createResultReg(DstClass);
3661 TII.get(TargetOpcode::COPY), ResultReg).
addReg(Reg);
3663 updateValueMap(
I, ResultReg);
3671unsigned X86FastISel::X86MaterializeInt(
const ConstantInt *CI,
MVT VT) {
3677 Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3682 return fastEmitInst_extractsubreg(
MVT::i8, SrcReg, X86::sub_8bit);
3684 return fastEmitInst_extractsubreg(
MVT::i16, SrcReg, X86::sub_16bit);
3688 Register ResultReg = createResultReg(&X86::GR64RegClass);
3690 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3703 case MVT::i8: Opc = X86::MOV8ri;
break;
3704 case MVT::i16: Opc = X86::MOV16ri;
break;
3705 case MVT::i32: Opc = X86::MOV32ri;
break;
3707 if (isUInt<32>(Imm))
3708 Opc = X86::MOV32ri64;
3709 else if (isInt<32>(Imm))
3710 Opc = X86::MOV64ri32;
3716 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3719unsigned X86FastISel::X86MaterializeFP(
const ConstantFP *CFP,
MVT VT) {
3721 return fastMaterializeFloatZero(CFP);
3730 bool HasSSE1 = Subtarget->hasSSE1();
3731 bool HasSSE2 = Subtarget->hasSSE2();
3732 bool HasAVX = Subtarget->hasAVX();
3733 bool HasAVX512 = Subtarget->hasAVX512();
3737 Opc = HasAVX512 ? X86::VMOVSSZrm_alt
3738 : HasAVX ? X86::VMOVSSrm_alt
3739 : HasSSE1 ? X86::MOVSSrm_alt
3743 Opc = HasAVX512 ? X86::VMOVSDZrm_alt
3744 : HasAVX ? X86::VMOVSDrm_alt
3745 : HasSSE2 ? X86::MOVSDrm_alt
3757 unsigned PICBase = 0;
3758 unsigned char OpFlag = Subtarget->classifyLocalReference(
nullptr);
3760 PICBase = getInstrInfo()->getGlobalBaseReg(
FuncInfo.MF);
3762 PICBase = getInstrInfo()->getGlobalBaseReg(
FuncInfo.MF);
3767 unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3772 Register AddrReg = createResultReg(&X86::GR64RegClass);
3777 TII.get(Opc), ResultReg);
3778 addRegReg(MIB, AddrReg,
false, PICBase,
false);
3787 TII.get(Opc), ResultReg),
3788 CPI, PICBase, OpFlag);
3792unsigned X86FastISel::X86MaterializeGV(
const GlobalValue *GV,
MVT VT) {
3806 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3817 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3820 TII.get(Opc), ResultReg), AM);
3827unsigned X86FastISel::fastMaterializeConstant(
const Constant *
C) {
3828 EVT CEVT = TLI.getValueType(
DL,
C->getType(),
true);
3835 if (
const auto *CI = dyn_cast<ConstantInt>(
C))
3836 return X86MaterializeInt(CI, VT);
3837 if (
const auto *CFP = dyn_cast<ConstantFP>(
C))
3838 return X86MaterializeFP(CFP, VT);
3839 if (
const auto *GV = dyn_cast<GlobalValue>(
C))
3840 return X86MaterializeGV(GV, VT);
3841 if (isa<UndefValue>(
C)) {
3847 if (!Subtarget->hasSSE1())
3848 Opc = X86::LD_Fp032;
3851 if (!Subtarget->hasSSE2())
3852 Opc = X86::LD_Fp064;
3855 Opc = X86::LD_Fp080;
3860 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3870unsigned X86FastISel::fastMaterializeAlloca(
const AllocaInst *
C) {
3880 assert(
C->isStaticAlloca() &&
"dynamic alloca in the static alloca map?");
3887 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3890 Register ResultReg = createResultReg(RC);
3892 TII.get(Opc), ResultReg), AM);
3896unsigned X86FastISel::fastMaterializeFloatZero(
const ConstantFP *CF) {
3898 if (!isTypeLegal(CF->
getType(), VT))
3902 bool HasSSE1 = Subtarget->hasSSE1();
3903 bool HasSSE2 = Subtarget->hasSSE2();
3904 bool HasAVX512 = Subtarget->hasAVX512();
3909 Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH;
3912 Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
3913 : HasSSE1 ? X86::FsFLD0SS
3917 Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
3918 : HasSSE2 ? X86::FsFLD0SD
3926 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3932bool X86FastISel::tryToFoldLoadIntoMI(
MachineInstr *
MI,
unsigned OpNo,
3957 unsigned OperandNo = 0;
3959 E =
Result->operands_end();
I !=
E; ++
I, ++OperandNo) {
3966 if (IndexReg == MO.
getReg())
3971 Result->addMemOperand(*
FuncInfo.MF, createMachineMemOperandFor(LI));
3974 removeDeadCode(
I, std::next(
I));
3978unsigned X86FastISel::fastEmitInst_rrrr(
unsigned MachineInstOpcode,
3980 unsigned Op0,
unsigned Op1,
3981 unsigned Op2,
unsigned Op3) {
3984 Register ResultReg = createResultReg(RC);
4013 return new X86FastISel(funcInfo, libInfo);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the FastISel class.
const HexagonInstrInfo * TII
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, const CallBase *CB)
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
LLVM Basic Block Representation.
InstListType::const_iterator const_iterator
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
A constant value that is initialized with an expression using other constant values.
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
This represents the llvm.dbg.declare instruction.
Value * getAddress() const
DILocalVariable * getVariable() const
DIExpression * getExpression() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF)
Emit the floating-point constant +0.0 in a register using target- specific logic.
virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II)
This method is called by target-independent code to do target- specific intrinsic lowering.
virtual bool tryToFoldLoadIntoMI(MachineInstr *, unsigned, const LoadInst *)
The specified machine instr operand is a vreg, and that vreg is being provided by the specified load ...
virtual unsigned fastMaterializeConstant(const Constant *C)
Emit a constant in a register using target-specific logic, such as constant pool loads.
virtual bool fastLowerCall(CallLoweringInfo &CLI)
This method is called by target-independent code to do target- specific call lowering.
virtual bool fastLowerArguments()
This method is called by target-independent code to do target- specific argument lowering.
virtual bool fastSelectInstruction(const Instruction *I)=0
This method is called by target-independent code when the normal FastISel process fails to select an ...
virtual unsigned fastMaterializeAlloca(const AllocaInst *C)
Emit an alloca address in a register using target-specific logic.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getReturnType() const
Returns the type of the ret val.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
bool usesWindowsCFI() const
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setStackProtectorIndex(int I)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
bool isNonTemporal() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
unsigned getDestAddressSpace() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getSourceAddressSpace() const
Wrapper class representing virtual and physical registers.
Return a value (possibly void), from a function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
uint64_t getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Register getSRetReturnReg() const
unsigned getBytesToPopOnReturn() const
const Triple & getTargetTriple() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
@ Swift
Calling convention for Swift.
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ X86_ThisCall
Similar to X86_StdCall.
@ X86_StdCall
stdcall is mostly used by the Win32 API.
@ WebKit_JS
Used for stack based JavaScript calls.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ C
The default llvm calling convention, compatible with C.
@ X86_FastCall
'fast' analog of X86_StdCall.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ AND
Bitwise operators - logical and, logical or, logical xor.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
@ Emitted
Assigned address, still materializing.
This is an optimization pass for GlobalISel generic memory operations.
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto successors(const MachineBasicBlock *BB)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
unsigned getKillRegState(bool B)
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
X86AddressMode - This struct holds a generalized full x86 address mode.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
union llvm::X86AddressMode::@599 Base
enum llvm::X86AddressMode::@598 BaseType