40#include "llvm/Config/llvm-config.h"
51#define DEBUG_TYPE "x86-codegen"
53STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
54STATISTIC(NumFP ,
"Number of floating point instructions");
57 const unsigned ScratchFPReg = 7;
64 memset(Stack, 0,
sizeof(Stack));
65 memset(RegMap, 0,
sizeof(RegMap));
80 MachineFunctionProperties::Property::NoVRegs);
105 unsigned FixCount = 0;
109 unsigned char FixStack[8];
111 LiveBundle() =
default;
114 bool isFixed()
const {
return !
Mask || FixCount; }
130 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
153 unsigned StackTop = 0;
163 unsigned RegMap[NumFPRegs];
166 void setupBlockStack();
169 void finishBlockStack();
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack()
const {
173 dbgs() <<
"Stack contents:";
174 for (
unsigned i = 0; i != StackTop; ++i) {
176 assert(RegMap[Stack[i]] == i &&
"Stack[] doesn't match RegMap[]!");
183 unsigned getSlot(
unsigned RegNo)
const {
184 assert(RegNo < NumFPRegs &&
"Regno out of range!");
185 return RegMap[RegNo];
189 bool isLive(
unsigned RegNo)
const {
190 unsigned Slot = getSlot(RegNo);
195 unsigned getStackEntry(
unsigned STi)
const {
198 return Stack[StackTop-1-STi];
203 unsigned getSTReg(
unsigned RegNo)
const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
208 void pushReg(
unsigned Reg) {
209 assert(Reg < NumFPRegs &&
"Register number out of range!");
213 RegMap[
Reg] = StackTop++;
220 RegMap[
Stack[--StackTop]] = ~0;
223 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
226 if (isAtTop(RegNo))
return;
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
235 if (RegMap[RegOnTop] >= StackTop)
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
244 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
247 unsigned STReg = getSTReg(RegNo);
274 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 &&
"Expected FP register!");
318 return Reg - X86::FP0;
327 bool FPIsUsed =
false;
329 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
331 for (
unsigned i = 0; i <= 6; ++i)
332 if (!
MRI.reg_nodbg_empty(X86::FP0 + i)) {
338 if (!FPIsUsed)
return false;
340 Bundles = &getAnalysis<EdgeBundles>();
344 bundleCFGRecomputeKillFlags(MF);
354 LiveBundles[Bundles->
getBundle(Entry->getNumber(),
false)];
358 if ((Entry->getParent()->getFunction().getCallingConv() ==
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
369 Bundle.FixStack[0] = 0;
372 bool Changed =
false;
374 Changed |= processBasicBlock(MF, *BB);
379 if (Processed.
insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
393 assert(LiveBundles.
empty() &&
"Stale data in LiveBundles");
400 const unsigned Mask = calcLiveInMask(&
MBB,
false);
412 bool Changed =
false;
422 if (
MI.isInlineAsm())
425 if (
MI.isCopy() && isFPCopy(
MI))
428 if (
MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
440 PrevMI = &*std::prev(
I);
449 if (MO.isReg() && MO.isDead())
452 switch (FPInstClass) {
465 for (
unsigned i = 0, e = DeadRegs.
size(); i != e; ++i) {
466 unsigned Reg = DeadRegs[i];
469 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
470 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
471 LLVM_DEBUG(
dbgs() <<
"Register FP#" << Reg - X86::FP0 <<
" is dead!\n");
472 freeStackSlotAfter(
I, Reg-X86::FP0);
480 dbgs() <<
"Just deleted pseudo instruction\n";
484 while (Start != BB.
begin() && std::prev(Start) != PrevI)
486 dbgs() <<
"Inserted instructions:\n\t";
487 Start->print(
dbgs());
488 while (++Start != std::next(
I)) {
505void FPS::setupBlockStack() {
507 <<
" derived from " <<
MBB->
getName() <<
".\n");
510 const LiveBundle &Bundle =
519 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
522 for (
unsigned i = Bundle.FixCount; i > 0; --i) {
524 <<
unsigned(Bundle.FixStack[i - 1]) <<
'\n');
525 pushReg(Bundle.FixStack[i-1]);
531 unsigned Mask = calcLiveInMask(
MBB,
true);
540void FPS::finishBlockStack() {
546 <<
" derived from " <<
MBB->
getName() <<
".\n");
550 LiveBundle &Bundle = LiveBundles[BundleIdx];
555 adjustLiveRegs(Bundle.Mask, Term);
564 if (Bundle.isFixed()) {
566 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
570 Bundle.FixCount = StackTop;
571 for (
unsigned i = 0; i < StackTop; ++i)
572 Bundle.FixStack[i] = getStackEntry(i);
604#define ASSERT_SORTED(TABLE)
606#define ASSERT_SORTED(TABLE) \
608 static std::atomic<bool> TABLE##Checked(false); \
609 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
610 assert(is_sorted(TABLE) && \
611 "All lookup tables must be sorted for efficient access!"); \
612 TABLE##Checked.store(true, std::memory_order_relaxed); \
626 { X86::ABS_Fp32 , X86::ABS_F },
627 { X86::ABS_Fp64 , X86::ABS_F },
628 { X86::ABS_Fp80 , X86::ABS_F },
629 { X86::ADD_Fp32m , X86::ADD_F32m },
630 { X86::ADD_Fp64m , X86::ADD_F64m },
631 { X86::ADD_Fp64m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m32 , X86::ADD_F32m },
633 { X86::ADD_Fp80m64 , X86::ADD_F64m },
634 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
636 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
637 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
639 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
640 { X86::CHS_Fp32 , X86::CHS_F },
641 { X86::CHS_Fp64 , X86::CHS_F },
642 { X86::CHS_Fp80 , X86::CHS_F },
643 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
645 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
646 { X86::CMOVB_Fp32 , X86::CMOVB_F },
647 { X86::CMOVB_Fp64 , X86::CMOVB_F },
648 { X86::CMOVB_Fp80 , X86::CMOVB_F },
649 { X86::CMOVE_Fp32 , X86::CMOVE_F },
650 { X86::CMOVE_Fp64 , X86::CMOVE_F },
651 { X86::CMOVE_Fp80 , X86::CMOVE_F },
652 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
654 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
655 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
657 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
658 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
660 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
661 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
663 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
664 { X86::CMOVP_Fp32 , X86::CMOVP_F },
665 { X86::CMOVP_Fp64 , X86::CMOVP_F },
666 { X86::CMOVP_Fp80 , X86::CMOVP_F },
667 { X86::COM_FpIr32 , X86::COM_FIr },
668 { X86::COM_FpIr64 , X86::COM_FIr },
669 { X86::COM_FpIr80 , X86::COM_FIr },
670 { X86::COM_Fpr32 , X86::COM_FST0r },
671 { X86::COM_Fpr64 , X86::COM_FST0r },
672 { X86::COM_Fpr80 , X86::COM_FST0r },
673 { X86::DIVR_Fp32m , X86::DIVR_F32m },
674 { X86::DIVR_Fp64m , X86::DIVR_F64m },
675 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
676 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
677 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
678 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
679 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
680 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
681 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
682 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
683 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
684 { X86::DIV_Fp32m , X86::DIV_F32m },
685 { X86::DIV_Fp64m , X86::DIV_F64m },
686 { X86::DIV_Fp64m32 , X86::DIV_F32m },
687 { X86::DIV_Fp80m32 , X86::DIV_F32m },
688 { X86::DIV_Fp80m64 , X86::DIV_F64m },
689 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
690 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
691 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
692 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
693 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
694 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
695 { X86::ILD_Fp16m32 , X86::ILD_F16m },
696 { X86::ILD_Fp16m64 , X86::ILD_F16m },
697 { X86::ILD_Fp16m80 , X86::ILD_F16m },
698 { X86::ILD_Fp32m32 , X86::ILD_F32m },
699 { X86::ILD_Fp32m64 , X86::ILD_F32m },
700 { X86::ILD_Fp32m80 , X86::ILD_F32m },
701 { X86::ILD_Fp64m32 , X86::ILD_F64m },
702 { X86::ILD_Fp64m64 , X86::ILD_F64m },
703 { X86::ILD_Fp64m80 , X86::ILD_F64m },
704 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
705 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
707 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
708 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
710 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
711 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
712 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
713 { X86::IST_Fp16m32 , X86::IST_F16m },
714 { X86::IST_Fp16m64 , X86::IST_F16m },
715 { X86::IST_Fp16m80 , X86::IST_F16m },
716 { X86::IST_Fp32m32 , X86::IST_F32m },
717 { X86::IST_Fp32m64 , X86::IST_F32m },
718 { X86::IST_Fp32m80 , X86::IST_F32m },
719 { X86::IST_Fp64m32 , X86::IST_FP64m },
720 { X86::IST_Fp64m64 , X86::IST_FP64m },
721 { X86::IST_Fp64m80 , X86::IST_FP64m },
722 { X86::LD_Fp032 , X86::LD_F0 },
723 { X86::LD_Fp064 , X86::LD_F0 },
724 { X86::LD_Fp080 , X86::LD_F0 },
725 { X86::LD_Fp132 , X86::LD_F1 },
726 { X86::LD_Fp164 , X86::LD_F1 },
727 { X86::LD_Fp180 , X86::LD_F1 },
728 { X86::LD_Fp32m , X86::LD_F32m },
729 { X86::LD_Fp32m64 , X86::LD_F32m },
730 { X86::LD_Fp32m80 , X86::LD_F32m },
731 { X86::LD_Fp64m , X86::LD_F64m },
732 { X86::LD_Fp64m80 , X86::LD_F64m },
733 { X86::LD_Fp80m , X86::LD_F80m },
734 { X86::MUL_Fp32m , X86::MUL_F32m },
735 { X86::MUL_Fp64m , X86::MUL_F64m },
736 { X86::MUL_Fp64m32 , X86::MUL_F32m },
737 { X86::MUL_Fp80m32 , X86::MUL_F32m },
738 { X86::MUL_Fp80m64 , X86::MUL_F64m },
739 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
740 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
741 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
742 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
743 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
744 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
745 { X86::SQRT_Fp32 , X86::SQRT_F },
746 { X86::SQRT_Fp64 , X86::SQRT_F },
747 { X86::SQRT_Fp80 , X86::SQRT_F },
748 { X86::ST_Fp32m , X86::ST_F32m },
749 { X86::ST_Fp64m , X86::ST_F64m },
750 { X86::ST_Fp64m32 , X86::ST_F32m },
751 { X86::ST_Fp80m32 , X86::ST_F32m },
752 { X86::ST_Fp80m64 , X86::ST_F64m },
753 { X86::ST_FpP80m , X86::ST_FP80m },
754 { X86::SUBR_Fp32m , X86::SUBR_F32m },
755 { X86::SUBR_Fp64m , X86::SUBR_F64m },
756 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
758 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
759 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
761 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
762 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
764 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
765 { X86::SUB_Fp32m , X86::SUB_F32m },
766 { X86::SUB_Fp64m , X86::SUB_F64m },
767 { X86::SUB_Fp64m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m32 , X86::SUB_F32m },
769 { X86::SUB_Fp80m64 , X86::SUB_F64m },
770 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
772 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
773 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
775 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
776 { X86::TST_Fp32 , X86::TST_F },
777 { X86::TST_Fp64 , X86::TST_F },
778 { X86::TST_Fp80 , X86::TST_F },
779 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
781 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
782 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
784 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
785 { X86::XAM_Fp32 , X86::XAM_F },
786 { X86::XAM_Fp64 , X86::XAM_F },
787 { X86::XAM_Fp80 , X86::XAM_F },
793 assert(Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
805 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
807 { X86::COMP_FST0r, X86::FCOMPP },
808 { X86::COM_FIr , X86::COM_FIPr },
809 { X86::COM_FST0r , X86::COMP_FST0r },
811 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
812 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
814 { X86::IST_F16m , X86::IST_FP16m },
815 { X86::IST_F32m , X86::IST_FP32m },
817 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
819 { X86::ST_F32m , X86::ST_FP32m },
820 { X86::ST_F64m , X86::ST_FP64m },
821 { X86::ST_Frr , X86::ST_FPrr },
823 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
824 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
826 { X86::UCOM_FIr , X86::UCOM_FIPr },
828 { X86::UCOM_FPr , X86::UCOM_FPPr },
829 { X86::UCOM_Fr , X86::UCOM_FPr },
869 MI.dropDebugNumber();
876 if (Next !=
MBB.
end() && Next->readsRegister(X86::FPSW))
888 if (getStackEntry(0) == FPRegNo) {
896 I = freeStackSlotBefore(++
I, FPRegNo);
903 unsigned STReg = getSTReg(FPRegNo);
904 unsigned OldSlot = getSlot(FPRegNo);
905 unsigned TopReg =
Stack[StackTop-1];
906 Stack[OldSlot] = TopReg;
907 RegMap[TopReg] = OldSlot;
908 RegMap[FPRegNo] = ~0;
909 Stack[--StackTop] = ~0;
918 unsigned Defs =
Mask;
920 for (
unsigned i = 0; i < StackTop; ++i) {
921 unsigned RegNo =
Stack[i];
922 if (!(Defs & (1 << RegNo)))
924 Kills |= (1 << RegNo);
927 Defs &= ~(1 << RegNo);
929 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
932 while (Kills && Defs) {
935 LLVM_DEBUG(
dbgs() <<
"Renaming %fp" << KReg <<
" as imp %fp" << DReg
937 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
939 Kills &= ~(1 << KReg);
940 Defs &= ~(1 <<
DReg);
947 unsigned KReg = getStackEntry(0);
948 if (!(Kills & (1 << KReg)))
952 Kills &= ~(1 << KReg);
960 freeStackSlotBefore(
I, KReg);
961 Kills &= ~(1 << KReg);
970 Defs &= ~(1 <<
DReg);
981void FPS::shuffleStackTop(
const unsigned char *FixStack,
987 unsigned OldReg = getStackEntry(FixCount);
989 unsigned Reg = FixStack[FixCount];
995 moveToTop(OldReg,
I);
1007 unsigned STReturns = 0;
1009 bool ClobbersFPStack =
false;
1010 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1014 if (
Op.isRegMask()) {
1015 bool ClobbersFP0 =
Op.clobbersPhysReg(X86::FP0);
1017 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
1018 for (
unsigned i = 1; i != 8; ++i)
1019 assert(
Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1020 "Inconsistent FP register clobber");
1024 ClobbersFPStack =
true;
1027 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1030 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1036 MI.removeOperand(i);
1044 assert((ClobbersFPStack || STReturns == 0) &&
1045 "ST returns without FP stack clobber");
1046 if (!ClobbersFPStack)
1058 while (StackTop > 0)
1061 for (
unsigned I = 0;
I <
N; ++
I)
1067 I->dropDebugNumber();
1076 unsigned FirstFPRegOp = ~0
U, SecondFPRegOp = ~0
U;
1077 unsigned LiveMask = 0;
1079 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1081 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1088 MI.killsRegister(
Op.getReg())) &&
1089 "Ret only defs operands, and values aren't live beyond it");
1091 if (FirstFPRegOp == ~0U)
1094 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1100 MI.removeOperand(i);
1107 adjustLiveRegs(LiveMask,
MI);
1108 if (!LiveMask)
return;
1114 if (SecondFPRegOp == ~0U) {
1116 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1117 "Top of stack not the right register for RET!");
1129 if (StackTop == 1) {
1130 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1131 "Stack misconfiguration for RET!");
1135 unsigned NewReg = ScratchFPReg;
1136 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1137 FirstFPRegOp = NewReg;
1141 assert(StackTop == 2 &&
"Must have two values live!");
1145 if (getStackEntry(0) == SecondFPRegOp) {
1146 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1147 moveToTop(FirstFPRegOp,
MI);
1152 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1153 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1161 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1164 MI.removeOperand(0);
1172 MI.dropDebugNumber();
1179 unsigned NumOps =
MI.getDesc().getNumOperands();
1181 "Can only handle fst* & ftst instructions!");
1185 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg);
1193 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1194 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1196 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1197 MI.getOpcode() == X86::IST_Fp64m64 ||
1198 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1200 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1201 MI.getOpcode() == X86::IST_Fp64m80 ||
1202 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1204 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1205 MI.getOpcode() == X86::ST_FpP80m)) {
1206 duplicateToTop(Reg, ScratchFPReg,
I);
1212 MI.removeOperand(NumOps - 1);
1217 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1218 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1219 MI.getOpcode() == X86::ST_FP80m) {
1223 }
else if (KillsSrc) {
1227 MI.dropDebugNumber();
1242 unsigned NumOps =
MI.getDesc().getNumOperands();
1243 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1248 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg);
1261 duplicateToTop(Reg,
getFPReg(
MI.getOperand(0)),
I);
1265 MI.removeOperand(1);
1266 MI.removeOperand(0);
1268 MI.dropDebugNumber();
1278 { X86::ADD_Fp32 , X86::ADD_FST0r },
1279 { X86::ADD_Fp64 , X86::ADD_FST0r },
1280 { X86::ADD_Fp80 , X86::ADD_FST0r },
1281 { X86::DIV_Fp32 , X86::DIV_FST0r },
1282 { X86::DIV_Fp64 , X86::DIV_FST0r },
1283 { X86::DIV_Fp80 , X86::DIV_FST0r },
1284 { X86::MUL_Fp32 , X86::MUL_FST0r },
1285 { X86::MUL_Fp64 , X86::MUL_FST0r },
1286 { X86::MUL_Fp80 , X86::MUL_FST0r },
1287 { X86::SUB_Fp32 , X86::SUB_FST0r },
1288 { X86::SUB_Fp64 , X86::SUB_FST0r },
1289 { X86::SUB_Fp80 , X86::SUB_FST0r },
1294 { X86::ADD_Fp32 , X86::ADD_FST0r },
1295 { X86::ADD_Fp64 , X86::ADD_FST0r },
1296 { X86::ADD_Fp80 , X86::ADD_FST0r },
1297 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1298 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1299 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1300 { X86::MUL_Fp32 , X86::MUL_FST0r },
1301 { X86::MUL_Fp64 , X86::MUL_FST0r },
1302 { X86::MUL_Fp80 , X86::MUL_FST0r },
1303 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1304 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1305 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1310 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1311 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1312 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1313 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1314 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1315 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1316 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1317 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1318 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1319 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1320 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1321 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1326 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1327 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1328 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1329 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1330 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1331 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1332 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1333 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1334 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1335 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1336 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1337 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1354 unsigned NumOperands =
MI.getDesc().getNumOperands();
1355 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1357 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1358 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1359 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1360 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1363 unsigned TOS = getStackEntry(0);
1367 if (Op0 != TOS && Op1 != TOS) {
1374 }
else if (KillsOp1) {
1383 duplicateToTop(Op0, Dest,
I);
1387 }
else if (!KillsOp0 && !KillsOp1) {
1391 duplicateToTop(Op0, Dest,
I);
1398 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1399 "Stack conditions not set up right!");
1404 bool isForward = TOS == Op0;
1405 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1419 assert(
Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1422 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1428 if (!
MI.mayRaiseFPException())
1429 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1433 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1434 assert(!updateST0 &&
"Should have updated other operand!");
1440 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1441 assert(UpdatedSlot < StackTop && Dest < 7);
1442 Stack[UpdatedSlot] = Dest;
1443 RegMap[Dest] = UpdatedSlot;
1453 unsigned NumOperands =
MI.getDesc().getNumOperands();
1454 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1455 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1456 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1457 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0);
1458 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1465 MI.getOperand(0).setReg(getSTReg(Op1));
1466 MI.removeOperand(1);
1468 MI.dropDebugNumber();
1471 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1472 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1484 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1);
1491 MI.removeOperand(0);
1492 MI.removeOperand(1);
1493 MI.getOperand(0).setReg(getSTReg(Op1));
1495 MI.dropDebugNumber();
1498 if (Op0 != Op1 && KillsOp1) {
1500 freeStackSlotAfter(
I, Op1);
1517 if (
MI.isReturn()) {
1522 switch (
MI.getOpcode()) {
1524 case TargetOpcode::COPY: {
1528 bool KillsSrc =
MI.killsRegister(MO1.
getReg());
1533 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1537 unsigned Slot = getSlot(SrcFP);
1539 RegMap[DstFP] =
Slot;
1543 duplicateToTop(SrcFP, DstFP, Inst);
1548 case TargetOpcode::IMPLICIT_DEF: {
1550 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1551 LLVM_DEBUG(
dbgs() <<
"Emitting LD_F0 for implicit FP" << Reg <<
'\n');
1557 case TargetOpcode::INLINEASM:
1558 case TargetOpcode::INLINEASM_BR: {
1592 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1593 unsigned NumOps = 0;
1598 i != e &&
MI.getOperand(i).isImm(); i += 1 + NumOps) {
1599 unsigned Flags =
MI.getOperand(i).getImm();
1602 NumOps =
F.getNumOperandRegisters();
1608 unsigned STReg = MO.
getReg() - X86::FP0;
1614 if (
F.hasRegClassConstraint(RCID)) {
1619 switch (
F.getKind()) {
1620 case InlineAsm::Kind::RegUse:
1621 STUses |= (1u << STReg);
1623 case InlineAsm::Kind::RegDef:
1624 case InlineAsm::Kind::RegDefEarlyClobber:
1625 STDefs |= (1u << STReg);
1627 case InlineAsm::Kind::Clobber:
1628 STClobbers |= (1u << STReg);
1636 MI.emitError(
"fixed input regs must be last on the x87 stack");
1641 MI.emitError(
"output regs must be last on the x87 stack");
1647 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1648 MI.emitError(
"clobbers must be last on the x87 stack");
1651 unsigned STPopped = STUses & (STDefs | STClobbers);
1653 MI.emitError(
"implicitly popped regs must be last on the x87 stack");
1656 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1657 << NumSTPopped <<
", and defines " << NumSTDefs
1663 for (
unsigned I = 0,
E =
MI.getNumOperands();
I <
E; ++
I)
1666 "Operands with constraint \"f\" cannot overlap with defs");
1672 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1674 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1681 if (
Op.isUse() &&
Op.isKill())
1682 FPKills |= 1U << FPReg;
1686 FPKills &= ~(STDefs | STClobbers);
1689 unsigned char STUsesArray[8];
1691 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1694 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1696 dbgs() <<
"Before asm: ";
1701 for (
unsigned i = 0, e =
MI.getNumOperands(); i !=
e; ++i) {
1703 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1708 if (FRegIdx.
count(i))
1710 Op.setReg(getSTReg(FPReg));
1713 Op.setReg(X86::ST0 + FPReg);
1717 StackTop -= NumSTPopped;
1719 for (
unsigned i = 0; i < NumSTDefs; ++i)
1720 pushReg(NumSTDefs - i - 1);
1732 freeStackSlotAfter(Inst, FPReg);
1733 FPKills &= ~(1U << FPReg);
1757 LPR.addLiveOuts(
MBB);
1760 if (
MI.isDebugInstr())
1763 std::bitset<8> Defs;
1766 for (
auto &MO :
MI.operands()) {
1777 if (!LPR.contains(MO.
getReg()))
1780 Uses.push_back(&MO);
1783 for (
auto *MO :
Uses)
1787 LPR.stepBackward(
MI);
unsigned const MachineRegisterInfo * MRI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
static constexpr uint32_t Opcode
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(NodeRef N)