81 #include "llvm/Config/llvm-config.h"
129 using namespace llvm;
131 #define DEBUG_TYPE "loop-reduce"
145 cl::desc(
"Enable LSR phi elimination"));
150 cl::desc(
"Add instruction count to a LSR cost model"));
155 cl::desc(
"Narrow LSR complex solution using"
156 " expectation of registers number"));
162 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
163 " with the same ScaledReg and Scale"));
167 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
170 "Don't prefer any addressing mode"),
173 "Prefer pre-indexed addressing mode"),
176 "Prefer post-indexed addressing mode")));
181 cl::desc(
"LSR search space complexity limit"));
185 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Stress test LSR IV chains"));
203 Type *MemTy =
nullptr;
206 MemAccessTy() =
default;
207 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
210 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
236 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
238 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
249 class RegUseTracker {
252 RegUsesTy RegUsesMap;
256 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
257 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
258 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
260 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
278 RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
279 std::pair<RegUsesTy::iterator, bool> Pair =
280 RegUsesMap.insert(std::make_pair(
Reg, RegSortData()));
281 RegSortData &RSD = Pair.first->second;
284 RSD.UsedByIndices.resize(
std::max(RSD.UsedByIndices.size(), LUIdx + 1));
285 RSD.UsedByIndices.set(LUIdx);
289 RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
290 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
291 assert(It != RegUsesMap.end());
292 RegSortData &RSD = It->second;
293 assert(RSD.UsedByIndices.size() > LUIdx);
294 RSD.UsedByIndices.reset(LUIdx);
298 RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
299 assert(LUIdx <= LastLUIdx);
303 for (
auto &Pair : RegUsesMap) {
305 if (LUIdx < UsedByIndices.
size())
306 UsedByIndices[LUIdx] =
307 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
313 RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
314 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
315 if (
I == RegUsesMap.end())
319 if (
i == -1)
return false;
320 if ((
size_t)
i != LUIdx)
return true;
325 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
326 assert(
I != RegUsesMap.end() &&
"Unknown register!");
327 return I->second.UsedByIndices;
344 int64_t BaseOffset = 0;
347 bool HasBaseReg =
false;
370 const SCEV *ScaledReg =
nullptr;
375 int64_t UnfoldedOffset = 0;
383 void canonicalize(
const Loop &L);
387 bool hasZeroEnd()
const;
389 size_t getNumRegs()
const;
392 void deleteBaseReg(
const SCEV *&
S);
394 bool referencesReg(
const SCEV *
S)
const;
395 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
396 const RegUseTracker &RegUses)
const;
416 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(
S)) {
417 for (
const SCEV *
S : Add->operands())
424 if (!AR->getStart()->isZero() && AR->isAffine()) {
427 AR->getStepRecurrence(SE),
436 if (
Mul->getOperand(0)->isAllOnesValue()) {
445 for (
const SCEV *
S : MyGood)
447 for (
const SCEV *
S : MyBad)
466 BaseRegs.push_back(Sum);
472 BaseRegs.push_back(Sum);
483 return BaseRegs.size() <= 1;
488 if (Scale == 1 && BaseRegs.empty())
491 const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
492 if (SAR && SAR->
getLoop() == &L)
499 return isa<const SCEVAddRecExpr>(
S) &&
500 (cast<SCEVAddRecExpr>(
S)->getLoop() == &L);
502 return I == BaseRegs.end();
511 void Formula::canonicalize(
const Loop &L) {
516 assert(!BaseRegs.empty() &&
"1*reg => reg, should not be needed.");
520 ScaledReg = BaseRegs.pop_back_val();
527 const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
528 if (!SAR || SAR->
getLoop() != &L) {
530 return isa<const SCEVAddRecExpr>(
S) &&
531 (cast<SCEVAddRecExpr>(
S)->getLoop() == &L);
533 if (
I != BaseRegs.end())
542 bool Formula::unscale() {
546 BaseRegs.push_back(ScaledReg);
551 bool Formula::hasZeroEnd()
const {
552 if (UnfoldedOffset || BaseOffset)
554 if (BaseRegs.size() != 1 || ScaledReg)
561 size_t Formula::getNumRegs()
const {
562 return !!ScaledReg + BaseRegs.size();
568 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
569 ScaledReg ? ScaledReg->getType() :
570 BaseGV ? BaseGV->getType() :
575 void Formula::deleteBaseReg(
const SCEV *&
S) {
576 if (&
S != &BaseRegs.back())
582 bool Formula::referencesReg(
const SCEV *
S)
const {
588 bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
589 const RegUseTracker &RegUses)
const {
591 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
593 for (
const SCEV *BaseReg : BaseRegs)
594 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
599 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 if (!First) OS <<
" + ";
else First =
false;
604 BaseGV->printAsOperand(OS,
false);
606 if (BaseOffset != 0) {
607 if (!First) OS <<
" + ";
else First =
false;
610 for (
const SCEV *BaseReg : BaseRegs) {
611 if (!First) OS <<
" + ";
else First =
false;
612 OS <<
"reg(" << *BaseReg <<
')';
614 if (HasBaseReg && BaseRegs.empty()) {
615 if (!First) OS <<
" + ";
else First =
false;
616 OS <<
"**error: HasBaseReg**";
617 }
else if (!HasBaseReg && !BaseRegs.empty()) {
618 if (!First) OS <<
" + ";
else First =
false;
619 OS <<
"**error: !HasBaseReg**";
622 if (!First) OS <<
" + ";
else First =
false;
623 OS << Scale <<
"*reg(";
630 if (UnfoldedOffset != 0) {
631 if (!First) OS <<
" + ";
632 OS <<
"imm(" << UnfoldedOffset <<
')';
673 bool IgnoreSignificantBits =
false) {
684 if (
RA.isAllOnesValue())
695 const APInt &LA =
C->getAPInt();
704 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
706 IgnoreSignificantBits);
707 if (!Step)
return nullptr;
709 IgnoreSignificantBits);
710 if (!Start)
return nullptr;
720 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
723 for (
const SCEV *
S : Add->operands()) {
725 if (!
Op)
return nullptr;
738 for (
const SCEV *
S :
Mul->operands()) {
741 IgnoreSignificantBits)) {
760 if (
C->getAPInt().getMinSignedBits() <= 64) {
762 return C->getValue()->getSExtValue();
764 }
else if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(
S)) {
786 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
790 }
else if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(
S)) {
812 bool isAddress = isa<LoadInst>(Inst);
814 if (
SI->getPointerOperand() == OperandVal)
816 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
819 switch (II->getIntrinsicID()) {
820 case Intrinsic::memset:
822 case Intrinsic::masked_load:
823 if (II->getArgOperand(0) == OperandVal)
826 case Intrinsic::masked_store:
827 if (II->getArgOperand(1) == OperandVal)
830 case Intrinsic::memmove:
832 if (II->getArgOperand(0) == OperandVal ||
833 II->getArgOperand(1) == OperandVal)
839 if (IntrInfo.
PtrVal == OperandVal)
844 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
845 if (RMW->getPointerOperand() == OperandVal)
848 if (CmpX->getPointerOperand() == OperandVal)
858 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
859 AccessTy.MemTy =
SI->getOperand(0)->getType();
860 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
861 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
862 AccessTy.AddrSpace = LI->getPointerAddressSpace();
863 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
864 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
866 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
867 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
868 switch (II->getIntrinsicID()) {
870 case Intrinsic::memset:
871 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
872 AccessTy.MemTy = OperandVal->
getType();
874 case Intrinsic::memmove:
877 AccessTy.MemTy = OperandVal->
getType();
879 case Intrinsic::masked_load:
881 II->getArgOperand(0)->getType()->getPointerAddressSpace();
883 case Intrinsic::masked_store:
884 AccessTy.MemTy = II->getOperand(0)->getType();
886 II->getArgOperand(1)->getType()->getPointerAddressSpace();
902 if (
PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
904 PTy->getAddressSpace());
934 switch (
S->getSCEVType()) {
951 if (!Processed.
insert(
S).second)
954 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(
S)) {
955 for (
const SCEV *
S : Add->operands()) {
963 if (
Mul->getNumOperands() == 2) {
965 if (isa<SCEVConstant>(
Mul->getOperand(0)))
970 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(
Mul->getOperand(1))) {
971 Value *UVal = U->getValue();
975 if (UI && UI->
getOpcode() == Instruction::Mul &&
1009 const LSRUse &LU,
const Formula &
F);
1013 const LSRUse &LU,
const Formula &
F,
1020 const Loop *L =
nullptr;
1030 L(L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1041 bool isLess(Cost &Other);
1048 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1049 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1050 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1051 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1056 assert(isValid() &&
"invalid cost");
1057 return C.NumRegs == ~0u;
1060 void RateFormula(
const Formula &
F,
1070 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1072 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1085 Value *OperandValToReplace =
nullptr;
1097 LSRFixup() =
default;
1099 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1107 struct UniquifierDenseMapInfo {
1110 V.push_back(
reinterpret_cast<const SCEV *
>(-1));
1116 V.push_back(
reinterpret_cast<const SCEV *
>(-2));
1152 MemAccessTy AccessTy;
1163 bool AllFixupsOutsideLoop =
true;
1170 bool RigidFormula =
false;
1176 Type *WidestFixupType =
nullptr;
1186 LSRUse(KindType K, MemAccessTy AT) :
Kind(K), AccessTy(AT) {}
1188 LSRFixup &getNewFixup() {
1189 Fixups.push_back(LSRFixup());
1193 void pushFixup(LSRFixup &
f) {
1195 if (
f.Offset > MaxOffset)
1196 MaxOffset =
f.Offset;
1197 if (
f.Offset < MinOffset)
1198 MinOffset =
f.Offset;
1201 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1202 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1203 bool InsertFormula(
const Formula &
F,
const Loop &L);
1204 void DeleteFormula(Formula &
F);
1205 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1214 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1216 bool HasBaseReg, int64_t Scale,
1220 if (isa<SCEVUnknown>(
Reg) || isa<SCEVConstant>(
Reg))
1224 if (
const auto *
S = dyn_cast<SCEVAddRecExpr>(
Reg))
1226 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(
Reg))
1228 if (
auto S = dyn_cast<SCEVNAryExpr>(
Reg))
1229 return std::accumulate(
S->op_begin(),
S->op_end(), 0,
1231 return i + getSetupCost(Reg, Depth - 1);
1233 if (
auto S = dyn_cast<SCEVUDivExpr>(
Reg))
1240 void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1246 if (AR->getLoop() != L) {
1253 if (!AR->getLoop()->contains(L)) {
1263 unsigned LoopCost = 1;
1270 if (
auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
1271 if (Step->getAPInt() ==
F.BaseOffset)
1274 const SCEV *LoopStep = AR->getStepRecurrence(*SE);
1275 if (isa<SCEVConstant>(LoopStep)) {
1276 const SCEV *LoopStart = AR->getStart();
1277 if (!isa<SCEVConstant>(LoopStart) &&
1283 C.AddRecCost += LoopCost;
1287 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1288 if (!Regs.
count(AR->getOperand(1))) {
1289 RateRegister(
F, AR->getOperand(1), Regs);
1301 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1303 C.NumIVMuls += isa<SCEVMulExpr>(
Reg) &&
1310 void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1313 if (LoserRegs && LoserRegs->
count(
Reg)) {
1318 RateRegister(
F,
Reg, Regs);
1319 if (LoserRegs && isLoser())
1324 void Cost::RateFormula(
const Formula &
F,
1329 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1331 unsigned PrevAddRecCost =
C.AddRecCost;
1332 unsigned PrevNumRegs =
C.NumRegs;
1333 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1334 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1335 if (VisitedRegs.
count(ScaledReg)) {
1339 RatePrimaryRegister(
F, ScaledReg, Regs, LoserRegs);
1343 for (
const SCEV *BaseReg :
F.BaseRegs) {
1344 if (VisitedRegs.
count(BaseReg)) {
1348 RatePrimaryRegister(
F, BaseReg, Regs, LoserRegs);
1354 size_t NumBaseParts =
F.getNumRegs();
1355 if (NumBaseParts > 1)
1360 C.NumBaseAdds += (
F.UnfoldedOffset != 0);
1366 for (
const LSRFixup &Fixup : LU.Fixups) {
1367 int64_t
O =
Fixup.Offset;
1368 int64_t
Offset = (uint64_t)
O +
F.BaseOffset;
1377 if (LU.Kind == LSRUse::Address &&
Offset != 0 &&
1385 assert(isValid() &&
"invalid cost");
1394 if (
C.NumRegs > TTIRegNum) {
1397 if (PrevNumRegs > TTIRegNum)
1398 C.Insns += (
C.NumRegs - PrevNumRegs);
1400 C.Insns += (
C.NumRegs - TTIRegNum);
1413 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1417 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1420 if (LU.Kind != LSRUse::ICmpZero)
1421 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1422 assert(isValid() &&
"invalid cost");
1438 bool Cost::isLess(Cost &Other) {
1440 C.Insns !=
Other.C.Insns)
1441 return C.Insns <
Other.C.Insns;
1445 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1448 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1449 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1450 if (
C.AddRecCost != 0)
1451 OS <<
", with addrec cost " <<
C.AddRecCost;
1452 if (
C.NumIVMuls != 0)
1453 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1454 << (
C.NumIVMuls == 1 ?
"" :
"s");
1455 if (
C.NumBaseAdds != 0)
1456 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1457 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1458 if (
C.ScaleCost != 0)
1459 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1461 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1462 if (
C.SetupCost != 0)
1463 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1472 bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1474 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1475 for (
unsigned i = 0,
e = PN->getNumIncomingValues();
i !=
e; ++
i)
1476 if (PN->getIncomingValue(
i) == OperandValToReplace &&
1485 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1491 Store->getOperand(0)->printAsOperand(OS,
false);
1492 }
else if (UserInst->getType()->isVoidTy())
1493 OS << UserInst->getOpcodeName();
1495 UserInst->printAsOperand(OS,
false);
1497 OS <<
", OperandValToReplace=";
1498 OperandValToReplace->printAsOperand(OS,
false);
1500 for (
const Loop *PIL : PostIncLoops) {
1501 OS <<
", PostIncLoop=";
1502 PIL->getHeader()->printAsOperand(OS,
false);
1506 OS <<
", Offset=" <<
Offset;
1516 bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1518 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1525 float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1527 for (
const Formula &
F : Formulae)
1528 if (
F.referencesReg(
Reg))
1530 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1535 bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1536 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1538 if (!Formulae.empty() && RigidFormula)
1542 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1550 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1551 "Zero allocated in a scaled register!");
1553 for (
const SCEV *BaseReg :
F.BaseRegs)
1554 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1558 Formulae.push_back(
F);
1561 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1569 void LSRUse::DeleteFormula(Formula &
F) {
1570 if (&
F != &Formulae.back())
1572 Formulae.pop_back();
1576 void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1580 for (
const Formula &
F : Formulae) {
1581 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1582 Regs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
1586 for (
const SCEV *
S : OldRegs)
1588 RegUses.dropRegister(
S, LUIdx);
1591 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1593 OS <<
"LSR Use: Kind=";
1595 case Basic: OS <<
"Basic";
break;
1596 case Special: OS <<
"Special";
break;
1597 case ICmpZero: OS <<
"ICmpZero";
break;
1599 OS <<
"Address of ";
1600 if (AccessTy.MemTy->isPointerTy())
1603 OS << *AccessTy.MemTy;
1606 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1609 OS <<
", Offsets={";
1610 bool NeedComma =
false;
1611 for (
const LSRFixup &Fixup :
Fixups) {
1612 if (NeedComma) OS <<
',';
1618 if (AllFixupsOutsideLoop)
1619 OS <<
", all-fixups-outside-loop";
1621 if (WidestFixupType)
1622 OS <<
", widest fixup type: " << *WidestFixupType;
1631 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1633 bool HasBaseReg, int64_t Scale,
1636 case LSRUse::Address:
1638 HasBaseReg, Scale, AccessTy.AddrSpace,
Fixup);
1640 case LSRUse::ICmpZero:
1647 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1652 if (Scale != 0 && Scale != -1)
1657 if (BaseOffset != 0) {
1665 BaseOffset = -(uint64_t)BaseOffset;
1674 return !BaseGV && Scale == 0 && BaseOffset == 0;
1676 case LSRUse::Special:
1678 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1685 int64_t MinOffset, int64_t MaxOffset,
1686 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1688 bool HasBaseReg, int64_t Scale) {
1690 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1693 MinOffset = (uint64_t)BaseOffset + MinOffset;
1694 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1697 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1700 HasBaseReg, Scale) &&
1706 int64_t MinOffset, int64_t MaxOffset,
1707 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1708 const Formula &
F,
const Loop &L) {
1716 assert((
F.isCanonical(L) ||
F.Scale != 0));
1718 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1723 int64_t MaxOffset, LSRUse::KindType
Kind,
1725 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1728 BaseOffset, HasBaseReg, Scale) ||
1733 BaseGV, BaseOffset,
true, 0));
1737 int64_t MaxOffset, LSRUse::KindType
Kind,
1738 MemAccessTy AccessTy,
const Formula &
F) {
1740 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1744 const LSRUse &LU,
const Formula &
F) {
1747 for (
const LSRFixup &
Fixup : LU.Fixups)
1749 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1750 F.Scale,
Fixup.UserInst))
1756 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1761 const LSRUse &LU,
const Formula &
F,
1770 return F.Scale != 1;
1773 case LSRUse::Address: {
1776 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MinOffset,
F.HasBaseReg,
1777 F.Scale, LU.AccessTy.AddrSpace);
1779 LU.AccessTy.MemTy,
F.BaseGV,
F.BaseOffset + LU.MaxOffset,
F.HasBaseReg,
1780 F.Scale, LU.AccessTy.AddrSpace);
1782 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1783 "Legal addressing mode has an illegal cost!");
1784 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1786 case LSRUse::ICmpZero:
1788 case LSRUse::Special:
1798 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1802 if (BaseOffset == 0 && !BaseGV)
return true;
1806 int64_t Scale =
Kind == LSRUse::ICmpZero ? -1 : 1;
1810 if (!HasBaseReg && Scale == 1) {
1821 int64_t MaxOffset, LSRUse::KindType
Kind,
1822 MemAccessTy AccessTy,
const SCEV *
S,
1825 if (
S->isZero())
return true;
1833 if (!
S->isZero())
return false;
1836 if (BaseOffset == 0 && !BaseGV)
return true;
1840 int64_t Scale =
Kind == LSRUse::ICmpZero ? -1 : 1;
1843 BaseOffset, HasBaseReg, Scale);
1860 const SCEV *IncExpr;
1863 : UserInst(U), IVOperand(
O), IncExpr(
E) {}
1870 const SCEV *ExprBase =
nullptr;
1872 IVChain() =
default;
1873 IVChain(
const IVInc &Head,
const SCEV *Base)
1874 : Incs(1, Head), ExprBase(Base) {}
1879 const_iterator
begin()
const {
1881 return std::next(Incs.begin());
1883 const_iterator
end()
const {
1888 bool hasIncs()
const {
return Incs.size() >= 2; }
1891 void add(
const IVInc &
X) { Incs.push_back(
X); }
1894 Instruction *tailUserInst()
const {
return Incs.back().UserInst; }
1897 bool isProfitableIncrement(
const SCEV *OperExpr,
1898 const SCEV *IncExpr,
1922 bool Changed =
false;
1945 RegUseTracker RegUses;
1950 static const unsigned MaxChains = 8;
1958 void OptimizeShadowIV();
1961 void OptimizeLoopTermCond();
1965 void FinalizeChain(IVChain &Chain);
1966 void CollectChains();
1970 void CollectInterestingTypesAndFactors();
1971 void CollectFixupsAndInitialFormulae();
1977 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
1978 LSRUse::KindType
Kind, MemAccessTy AccessTy);
1980 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType
Kind,
1981 MemAccessTy AccessTy);
1983 void DeleteUse(LSRUse &LU,
size_t LUIdx);
1985 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
1987 void InsertInitialFormula(
const SCEV *
S, LSRUse &LU,
size_t LUIdx);
1988 void InsertSupplementalFormula(
const SCEV *
S, LSRUse &LU,
size_t LUIdx);
1989 void CountRegisters(
const Formula &
F,
size_t LUIdx);
1990 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
1992 void CollectLoopInvariantFixupsAndFormulae();
1994 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula Base,
1995 unsigned Depth = 0);
1997 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
1998 const Formula &Base,
unsigned Depth,
1999 size_t Idx,
bool IsScaledReg =
false);
2000 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula Base);
2001 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2002 const Formula &Base,
size_t Idx,
2003 bool IsScaledReg =
false);
2004 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
2005 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2006 const Formula &Base,
2008 size_t Idx,
bool IsScaledReg =
false);
2009 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
2010 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
2011 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
2012 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base);
2013 void GenerateCrossUseConstantOffsets();
2014 void GenerateAllReuseFormulae();
2016 void FilterOutUndesirableDedicatedRegisters();
2018 size_t EstimateSearchSpaceComplexity()
const;
2019 void NarrowSearchSpaceByDetectingSupersets();
2020 void NarrowSearchSpaceByCollapsingUnrolledCode();
2021 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2022 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2023 void NarrowSearchSpaceByFilterPostInc();
2024 void NarrowSearchSpaceByDeletingCostlyFormulas();
2025 void NarrowSearchSpaceByPickingWinnerRegs();
2026 void NarrowSearchSpaceUsingHeuristics();
2031 const Cost &CurCost,
2045 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2048 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2051 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2061 bool getChanged()
const {
return Changed; }
2063 void print_factors_and_types(
raw_ostream &OS)
const;
2074 void LSRInstance::OptimizeShadowIV() {
2076 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2084 Type *DestTy =
nullptr;
2085 bool IsSigned =
false;
2099 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2101 DestTy = UCast->getDestTy();
2103 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2105 DestTy = SCast->getDestTy();
2107 if (!DestTy)
continue;
2127 if (Mantissa == -1)
continue;
2131 unsigned Entry, Latch;
2141 if (!
Init)
continue;
2143 (
double)
Init->getSExtValue() :
2144 (
double)
Init->getZExtValue());
2148 if (!Incr)
continue;
2150 && Incr->
getOpcode() != Instruction::Sub)
2166 if (!
C->getValue().isStrictlyPositive())
continue;
2175 Instruction::FAdd : Instruction::FSub,
2176 NewPH, CFP,
"IV.S.next.", Incr);
2193 if (U.getUser() ==
Cond) {
2261 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2266 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2267 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2274 if (
const SCEVSMaxExpr *
S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2277 }
else if (
const SCEVSMaxExpr *
S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2280 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2290 if (
Max->getNumOperands() != 2)
2293 const SCEV *MaxLHS =
Max->getOperand(0);
2294 const SCEV *MaxRHS =
Max->getOperand(1);
2312 "Loop condition operand is an addrec in a different loop!");
2316 Value *NewRHS =
nullptr;
2320 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2321 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2322 NewRHS = BO->getOperand(0);
2324 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2325 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2326 NewRHS = BO->getOperand(0);
2333 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2334 NewRHS = SU->getValue();
2350 Cond->replaceAllUsesWith(NewCond);
2353 Cond->eraseFromParent();
2355 if (
Cmp->use_empty())
2356 Cmp->eraseFromParent();
2362 LSRInstance::OptimizeLoopTermCond() {
2381 return LatchBlock !=
BB;
2389 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2395 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2405 if (!FindIVUserForCond(
Cond, CondUse))
2419 if (!DT.dominates(ExitingBlock, LatchBlock))
2424 if (LatchBlock != ExitingBlock)
2428 if (&*UI != CondUse &&
2429 !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2432 const SCEV *
A = IU.getStride(*CondUse, L);
2433 const SCEV *
B = IU.getStride(*UI, L);
2434 if (!A || !
B)
continue;
2447 if (
C->isOne() ||
C->isMinusOne())
2448 goto decline_post_inc;
2450 if (
C->getValue().getMinSignedBits() >= 64 ||
2451 C->getValue().isMinSignedValue())
2452 goto decline_post_inc;
2456 TTI, UI->getUser(), UI->getOperandValToReplace());
2457 int64_t Scale =
C->getSExtValue();
2461 AccessTy.AddrSpace))
2462 goto decline_post_inc;
2467 AccessTy.AddrSpace))
2468 goto decline_post_inc;
2473 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2480 if (
Cond->hasOneUse()) {
2481 Cond->moveBefore(TermBr);
2485 Cond = cast<ICmpInst>(
Cond->clone());
2511 DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2513 if (
BB == Inst->getParent())
2514 IVIncInsertPos = Inst;
2515 else if (
BB != IVIncInsertPos->getParent())
2516 IVIncInsertPos =
BB->getTerminator();
2522 bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2523 bool HasBaseReg, LSRUse::KindType
Kind,
2524 MemAccessTy AccessTy) {
2525 int64_t NewMinOffset = LU.MinOffset;
2526 int64_t NewMaxOffset = LU.MaxOffset;
2527 MemAccessTy NewAccessTy = AccessTy;
2532 if (LU.Kind !=
Kind)
2538 if (
Kind == LSRUse::Address) {
2539 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2540 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2541 AccessTy.AddrSpace);
2546 if (NewOffset < LU.MinOffset) {
2548 LU.MaxOffset - NewOffset, HasBaseReg))
2550 NewMinOffset = NewOffset;
2551 }
else if (NewOffset > LU.MaxOffset) {
2553 NewOffset - LU.MinOffset, HasBaseReg))
2555 NewMaxOffset = NewOffset;
2559 LU.MinOffset = NewMinOffset;
2560 LU.MaxOffset = NewMaxOffset;
2561 LU.AccessTy = NewAccessTy;
2568 std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2569 LSRUse::KindType
Kind,
2570 MemAccessTy AccessTy) {
2581 std::pair<UseMapTy::iterator, bool>
P =
2585 size_t LUIdx =
P.first->second;
2586 LSRUse &LU =
Uses[LUIdx];
2587 if (reconcileNewOffset(LU,
Offset,
true,
Kind, AccessTy))
2589 return std::make_pair(LUIdx,
Offset);
2593 size_t LUIdx =
Uses.size();
2594 P.first->second = LUIdx;
2595 Uses.push_back(LSRUse(
Kind, AccessTy));
2596 LSRUse &LU =
Uses[LUIdx];
2600 return std::make_pair(LUIdx,
Offset);
2604 void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2605 if (&LU != &
Uses.back())
2610 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2616 LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2617 const LSRUse &OrigLU) {
2619 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
2620 LSRUse &LU =
Uses[LUIdx];
2626 if (&LU != &OrigLU &&
2627 LU.Kind != LSRUse::ICmpZero &&
2628 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2629 LU.WidestFixupType == OrigLU.WidestFixupType &&
2630 LU.HasFormulaWithSameRegs(OrigF)) {
2632 for (
const Formula &
F : LU.Formulae) {
2635 if (
F.BaseRegs == OrigF.BaseRegs &&
2636 F.ScaledReg == OrigF.ScaledReg &&
2637 F.BaseGV == OrigF.BaseGV &&
2638 F.Scale == OrigF.Scale &&
2639 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2640 if (
F.BaseOffset == 0)
2655 void LSRInstance::CollectInterestingTypesAndFactors() {
2661 const SCEV *Expr = IU.getExpr(U);
2667 Worklist.push_back(Expr);
2673 Worklist.push_back(AR->
getStart());
2674 }
else if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(
S)) {
2677 }
while (!Worklist.empty());
2684 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2685 const SCEV *OldStride = *
I;
2686 const SCEV *NewStride = *NewStrideIter;
2697 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2699 if (Factor->getAPInt().getMinSignedBits() <= 64)
2700 Factors.insert(Factor->getAPInt().getSExtValue());
2705 if (Factor->getAPInt().getMinSignedBits() <= 64)
2706 Factors.insert(Factor->getAPInt().getSExtValue());
2712 if (Types.size() == 1)
2724 for(; OI != OE; ++OI) {
2725 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2730 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2742 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2743 return Trunc->getOperand(0);
2770 switch (
S->getSCEVType()) {
2776 return getExprBase(cast<SCEVTruncateExpr>(
S)->getOperand());
2778 return getExprBase(cast<SCEVZeroExtendExpr>(
S)->getOperand());
2780 return getExprBase(cast<SCEVSignExtendExpr>(
S)->getOperand());
2786 for (std::reverse_iterator<SCEVAddExpr::op_iterator>
I(Add->op_end()),
2787 E(Add->op_begin());
I !=
E; ++
I) {
2788 const SCEV *SubExpr = *
I;
2798 return getExprBase(cast<SCEVAddRecExpr>(
S)->getStart());
2808 bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2809 const SCEV *IncExpr,
2817 if (!isa<SCEVConstant>(IncExpr)) {
2819 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2844 if (!Chain.hasIncs())
2847 if (!
Users.empty()) {
2848 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2850 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2853 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2861 if (isa<PHINode>(Chain.tailUserInst())
2862 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2865 const SCEV *LastIncExpr =
nullptr;
2866 unsigned NumConstIncrements = 0;
2867 unsigned NumVarIncrements = 0;
2868 unsigned NumReusedIncrements = 0;
2873 for (
const IVInc &Inc : Chain) {
2876 if (Inc.IncExpr->isZero())
2881 if (isa<SCEVConstant>(Inc.IncExpr)) {
2882 ++NumConstIncrements;
2886 if (Inc.IncExpr == LastIncExpr)
2887 ++NumReusedIncrements;
2891 LastIncExpr = Inc.IncExpr;
2896 if (NumConstIncrements > 1)
2903 cost += NumVarIncrements;
2907 cost -= NumReusedIncrements;
2909 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2926 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2927 const SCEV *LastIncExpr =
nullptr;
2928 for (; ChainIdx < NChains; ++ChainIdx) {
2929 IVChain &Chain = IVChainVec[ChainIdx];
2943 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2952 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2953 LastIncExpr = IncExpr;
2959 if (ChainIdx == NChains) {
2960 if (isa<PHINode>(UserInst))
2966 LastIncExpr = OperExpr;
2970 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2973 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2975 ChainUsersVec.
resize(NChains);
2976 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
2977 <<
") IV=" << *LastIncExpr <<
"\n");
2979 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
2980 <<
") IV+" << *LastIncExpr <<
"\n");
2982 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2984 IVChain &Chain = IVChainVec[ChainIdx];
2988 if (!LastIncExpr->
isZero()) {
2989 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
3005 IVChain::const_iterator IncIter = Chain.Incs.begin();
3006 IVChain::const_iterator IncEnd = Chain.Incs.end();
3007 for( ; IncIter != IncEnd; ++IncIter) {
3008 if (IncIter->UserInst == OtherUse)
3011 if (IncIter != IncEnd)
3015 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3016 && IU.isIVUserOrOperand(OtherUse)) {
3019 NearUsers.
insert(OtherUse);
3024 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3049 void LSRInstance::CollectChains() {
3056 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3057 LatchPath.push_back(Rung->getBlock());
3059 LatchPath.push_back(LoopHeader);
3065 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3075 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3076 ChainIdx < NChains; ++ChainIdx) {
3077 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3083 while (IVOpIter != IVOpEnd) {
3084 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3085 if (UniqueOperands.
insert(IVOpInst).second)
3086 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3087 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3097 dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->
getLoopLatch()));
3099 ChainInstruction(&PN, IncV, ChainUsersVec);
3102 unsigned ChainIdx = 0;
3103 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3104 UsersIdx < NChains; ++UsersIdx) {
3106 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3109 if (ChainIdx != UsersIdx)
3110 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3111 FinalizeChain(IVChainVec[ChainIdx]);
3114 IVChainVec.resize(ChainIdx);
3117 void LSRInstance::FinalizeChain(IVChain &Chain) {
3118 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3119 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3121 for (
const IVInc &Inc : Chain) {
3123 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3124 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3125 IVIncSet.insert(UseI);
3132 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3154 const IVInc &Head = Chain.Incs[0];
3159 Value *IVSrc =
nullptr;
3160 while (IVOpIter != IVOpEnd) {
3171 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3172 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3175 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3177 if (IVOpIter == IVOpEnd) {
3179 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3182 assert(IVSrc &&
"Failed to find IV chain source");
3187 const SCEV *LeftOverExpr =
nullptr;
3188 for (
const IVInc &Inc : Chain) {
3190 if (isa<PHINode>(InsertPt))
3195 Value *IVOper = IVSrc;
3196 if (!Inc.IncExpr->isZero()) {
3200 LeftOverExpr = LeftOverExpr ?
3201 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3203 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
3206 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3209 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3213 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3215 LeftOverExpr =
nullptr;
3218 Type *OperTy = Inc.IVOperand->getType();
3219 if (IVTy != OperTy) {
3221 "cannot extend a chained IV");
3223 IVOper =
Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3225 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3226 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3231 if (isa<PHINode>(Chain.tailUserInst())) {
3239 Value *IVOper = IVSrc;
3241 if (IVTy != PostIncTy) {
3245 IVOper =
Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3247 Phi.replaceUsesOfWith(PostIncV, IVOper);
3253 void LSRInstance::CollectFixupsAndInitialFormulae() {
3255 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3261 find(UserInst->
operands(), U.getOperandValToReplace());
3262 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3263 if (IVIncSet.count(UseI)) {
3264 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3269 MemAccessTy AccessTy;
3271 Kind = LSRUse::Address;
3275 const SCEV *
S = IU.getExpr(U);
3284 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3287 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3289 if (CI->isEquality()) {
3292 Value *
NV = CI->getOperand(1);
3293 if (
NV == U.getOperandValToReplace()) {
3294 CI->setOperand(1, CI->getOperand(0));
3295 CI->setOperand(0,
NV);
3296 NV = CI->getOperand(1);
3306 Kind = LSRUse::ICmpZero;
3312 for (
size_t i = 0,
e = Factors.size();
i !=
e; ++
i)
3313 if (Factors[
i] != -1)
3314 Factors.insert(-(uint64_t)Factors[
i]);
3320 std::pair<size_t, int64_t>
P = getUse(
S,
Kind, AccessTy);
3321 size_t LUIdx =
P.first;
3323 LSRUse &LU =
Uses[LUIdx];
3326 LSRFixup &LF = LU.getNewFixup();
3327 LF.UserInst = UserInst;
3328 LF.OperandValToReplace = U.getOperandValToReplace();
3329 LF.PostIncLoops = TmpPostIncLoops;
3331 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3333 if (!LU.WidestFixupType ||
3336 LU.WidestFixupType = LF.OperandValToReplace->getType();
3339 if (LU.Formulae.empty()) {
3340 InsertInitialFormula(
S, LU, LUIdx);
3341 CountRegisters(LU.Formulae.back(), LUIdx);
3351 LSRInstance::InsertInitialFormula(
const SCEV *
S, LSRUse &LU,
size_t LUIdx) {
3354 LU.RigidFormula =
true;
3357 F.initialMatch(
S, L, SE);
3358 bool Inserted = InsertFormula(LU, LUIdx,
F);
3359 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3365 LSRInstance::InsertSupplementalFormula(
const SCEV *
S,
3366 LSRUse &LU,
size_t LUIdx) {
3368 F.BaseRegs.push_back(
S);
3369 F.HasBaseReg =
true;
3370 bool Inserted = InsertFormula(LU, LUIdx,
F);
3371 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3375 void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3377 RegUses.countRegister(
F.ScaledReg, LUIdx);
3378 for (
const SCEV *BaseReg :
F.BaseRegs)
3379 RegUses.countRegister(BaseReg, LUIdx);
3384 bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3387 "Formula is illegal");
3389 if (!LU.InsertFormula(
F, *L))
3392 CountRegisters(
F, LUIdx);
3402 LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3406 while (!Worklist.empty()) {
3410 if (!Visited.
insert(
S).second)
3414 Worklist.
append(
N->op_begin(),
N->op_end());
3416 Worklist.push_back(
C->getOperand());
3418 Worklist.push_back(
D->getLHS());
3419 Worklist.push_back(
D->getRHS());
3420 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(
S)) {
3421 const Value *V = US->getValue();
3422 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3425 }
else if (isa<UndefValue>(V))
3428 for (
const Use &U : V->
uses()) {
3429 const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3441 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3443 cast<PHINode>(UserInst)->getIncomingBlock(
3445 if (!DT.dominates(L->
getHeader(), UseBB))
3458 if (!isa<SCEVUnknown>(UserS))
3467 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3468 unsigned OtherIdx = !U.getOperandNo();
3469 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3474 std::pair<size_t, int64_t>
P = getUse(
3476 size_t LUIdx =
P.first;
3478 LSRUse &LU =
Uses[LUIdx];
3479 LSRFixup &LF = LU.getNewFixup();
3480 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3481 LF.OperandValToReplace = U;
3483 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3484 if (!LU.WidestFixupType ||
3487 LU.WidestFixupType = LF.OperandValToReplace->getType();
3488 InsertSupplementalFormula(US, LU, LUIdx);
3489 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3505 unsigned Depth = 0) {
3510 if (
const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(
S)) {
3512 for (
const SCEV *
S : Add->operands()) {
3515 Ops.push_back(
C ? SE.
getMulExpr(
C, Remainder) : Remainder);
3527 if (Remainder && (AR->
getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3528 Ops.push_back(
C ? SE.
getMulExpr(
C, Remainder) : Remainder);
3529 Remainder =
nullptr;
3542 if (
Mul->getNumOperands() != 2)
3545 dyn_cast<SCEVConstant>(
Mul->getOperand(0))) {
3547 const SCEV *Remainder =
3560 LSRUse &LU,
const SCEV *
S,
const Loop *L,
3562 if (LU.Kind != LSRUse::Address ||
3563 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3569 if (!isa<SCEVConstant>(LoopStep))
3575 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3582 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3583 const Formula &Base,
3584 unsigned Depth,
size_t Idx,
3586 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3596 AddOps.push_back(Remainder);
3598 if (AddOps.size() == 1)
3612 LU.AccessTy, *J, Base.getNumRegs() > 1))
3618 InnerAddOps.append(std::next(J),
3623 if (InnerAddOps.size() == 1 &&
3625 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3634 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3641 F.ScaledReg =
nullptr;
3643 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3644 }
else if (IsScaledReg)
3645 F.ScaledReg = InnerSum;
3647 F.BaseRegs[Idx] = InnerSum;
3653 SC->getValue()->getZExtValue()))
3655 (uint64_t)
F.UnfoldedOffset +
SC->getValue()->getZExtValue();
3657 F.BaseRegs.push_back(*J);
3662 if (InsertFormula(LU, LUIdx,
F))
3669 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3675 void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3676 Formula Base,
unsigned Depth) {
3677 assert(Base.isCanonical(*L) &&
"Input must be in the canonical form");
3682 for (
size_t i = 0,
e = Base.BaseRegs.size();
i !=
e; ++
i)
3683 GenerateReassociationsImpl(LU, LUIdx, Base,
Depth,
i);
3685 if (Base.Scale == 1)
3686 GenerateReassociationsImpl(LU, LUIdx, Base,
Depth,
3692 void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3695 if (Base.BaseRegs.size() + (Base.Scale == 1) +
3696 (Base.UnfoldedOffset != 0) <= 1)
3703 Formula NewBase = Base;
3704 NewBase.BaseRegs.clear();
3705 Type *CombinedIntegerType =
nullptr;
3706 for (
const SCEV *BaseReg : Base.BaseRegs) {
3709 if (!CombinedIntegerType)
3711 Ops.push_back(BaseReg);
3714 NewBase.BaseRegs.push_back(BaseReg);
3718 if (Ops.size() == 0)
3723 auto GenerateFormula = [&](
const SCEV *Sum) {
3724 Formula
F = NewBase;
3732 F.BaseRegs.push_back(Sum);
3734 (void)InsertFormula(LU, LUIdx,
F);
3738 if (Ops.size() > 1) {
3745 if (NewBase.UnfoldedOffset) {
3746 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3747 Ops.push_back(SE.
getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
3749 NewBase.UnfoldedOffset = 0;
3755 void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3756 const Formula &Base,
size_t Idx,
3758 const SCEV *
G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3760 if (
G->isZero() || !GV)
3764 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
3769 F.BaseRegs[Idx] =
G;
3770 (void)InsertFormula(LU, LUIdx,
F);
3774 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3777 if (Base.BaseGV)
return;
3779 for (
size_t i = 0,
e = Base.BaseRegs.size();
i !=
e; ++
i)
3780 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base,
i);
3781 if (Base.Scale == 1)
3782 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
3787 void LSRInstance::GenerateConstantOffsetsImpl(
3788 LSRUse &LU,
unsigned LUIdx,
const Formula &Base,
3791 auto GenerateOffset = [&](
const SCEV *
G, int64_t
Offset) {
3793 F.BaseOffset = (uint64_t)Base.BaseOffset -
Offset;
3795 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
3802 F.ScaledReg =
nullptr;
3804 F.deleteBaseReg(
F.BaseRegs[Idx]);
3806 }
else if (IsScaledReg)
3809 F.BaseRegs[Idx] = NewG;
3811 (void)InsertFormula(LU, LUIdx,
F);
3815 const SCEV *
G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3826 if (
auto *GAR = dyn_cast<SCEVAddRecExpr>(
G)) {
3828 dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
3829 const APInt &StepInt = StepRec->getAPInt();
3833 for (int64_t
Offset : Worklist) {
3840 for (int64_t
Offset : Worklist)
3844 if (
G->isZero() || Imm == 0)
3847 F.BaseOffset = (uint64_t)
F.BaseOffset + Imm;
3853 F.BaseRegs[Idx] =
G;
3858 (void)InsertFormula(LU, LUIdx,
F);
3862 void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3867 Worklist.push_back(LU.MinOffset);
3868 if (LU.MaxOffset != LU.MinOffset)
3869 Worklist.push_back(LU.MaxOffset);
3871 for (
size_t i = 0,
e = Base.BaseRegs.size();
i !=
e; ++
i)
3872 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist,
i);
3873 if (Base.Scale == 1)
3874 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
3880 void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3882 if (LU.Kind != LSRUse::ICmpZero)
return;
3885 Type *IntTy = Base.getType();
3890 if (LU.MinOffset != LU.MaxOffset)
return;
3893 if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
3895 for (
const SCEV *BaseReg : Base.BaseRegs)
3898 assert(!Base.BaseGV &&
"ICmpZero use is not legal!");
3901 for (int64_t Factor : Factors) {
3905 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3906 if (NewBaseOffset / Factor != Base.BaseOffset)
3914 int64_t
Offset = LU.MinOffset;
3918 if (
Offset / Factor != LU.MinOffset)
3926 F.BaseOffset = NewBaseOffset;
3933 F.BaseOffset = (uint64_t)
F.BaseOffset +
Offset - LU.MinOffset;
3938 for (
size_t i = 0,
e =
F.BaseRegs.size();
i !=
e; ++
i) {
3947 if (
getExactSDiv(
F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3952 if (
F.UnfoldedOffset != 0) {
3956 F.UnfoldedOffset = (uint64_t)
F.UnfoldedOffset * Factor;
3957 if (
F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3966 (void)InsertFormula(LU, LUIdx,
F);
3973 void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3975 Type *IntTy = Base.getType();
3980 if (Base.Scale != 0 && !Base.unscale())
3983 assert(Base.Scale == 0 &&
"unscale did not did its job!");
3986 for (int64_t Factor : Factors) {
3987 Base.Scale = Factor;
3988 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3990 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3995 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3996 LU.AccessTy, Base) &&
3997 LU.AllFixupsOutsideLoop)
3998 LU.Kind = LSRUse::Special;
4004 if (LU.Kind == LSRUse::ICmpZero &&
4005 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
4008 for (
size_t i = 0,
e = Base.BaseRegs.size();
i !=
e; ++
i) {
4009 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[
i]);
4010 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4019 F.ScaledReg = Quotient;
4020 F.deleteBaseReg(
F.BaseRegs[
i]);
4024 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4025 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4029 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4031 (void)InsertFormula(LU, LUIdx,
F);
4039 void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base) {
4041 if (Base.BaseGV)
return;
4044 Type *DstTy = Base.getType();
4048 for (
Type *SrcTy : Types) {
4058 if (NewScaledReg->
isZero())
4060 F.ScaledReg = NewScaledReg;
4062 bool HasZeroBaseReg =
false;
4063 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4065 if (NewBaseReg->
isZero()) {
4066 HasZeroBaseReg =
true;
4069 BaseReg = NewBaseReg;
4076 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4080 (void)InsertFormula(LU, LUIdx,
F);
4093 const SCEV *OrigReg;
4095 WorkItem(
size_t LI, int64_t
I,
const SCEV *R)
4096 : LUIdx(LI), Imm(
I), OrigReg(
R) {}
4104 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4106 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4107 <<
" , add offset " << Imm;
4117 void LSRInstance::GenerateCrossUseConstantOffsets() {
4119 using ImmMapTy = std::map<int64_t, const SCEV *>;
4124 for (
const SCEV *
Use : RegUses) {
4127 auto Pair =
Map.insert(std::make_pair(
Reg, ImmMapTy()));
4130 Pair.first->second.insert(std::make_pair(Imm,
Use));
4131 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4140 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4143 if (Imms.size() == 1)
4147 for (
const auto &Entry
4149 <<
' ' << Entry.first;
4153 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4155 const SCEV *OrigReg = J->second;
4157 int64_t JImm = J->first;
4158 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4160 if (!isa<SCEVConstant>(OrigReg) &&
4161 UsedByIndicesMap[
Reg].
count() == 1) {
4169 int64_t
First = Imms.begin()->first;
4170 int64_t
Last = std::prev(Imms.end())->first;
4173 int64_t Avg = (
First &
Last) + ((First ^ Last) >> 1);
4176 Avg = Avg + ((
First ^
Last) & ((uint64_t)Avg >> 63));
4177 ImmMapTy::const_iterator OtherImms[] = {
4178 Imms.begin(), std::prev(Imms.end()),
4179 Imms.lower_bound(Avg)};
4181 ImmMapTy::const_iterator
M = OtherImms[
i];
4182 if (M == J || M == JE)
continue;
4185 int64_t Imm = (uint64_t)JImm -
M->first;
4188 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4189 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
4196 UsedByIndicesMap.
clear();
4197 UniqueItems.
clear();
4200 for (
const WorkItem &WI : WorkItems) {
4201 size_t LUIdx = WI.LUIdx;
4202 LSRUse &LU =
Uses[LUIdx];
4203 int64_t Imm = WI.Imm;
4204 const SCEV *OrigReg = WI.OrigReg;
4211 for (
size_t L = 0,
LE = LU.Formulae.size(); L !=
LE; ++L) {
4212 Formula
F = LU.Formulae[L];
4219 if (
F.ScaledReg == OrigReg) {
4220 int64_t
Offset = (uint64_t)
F.BaseOffset + Imm * (uint64_t)
F.Scale;
4226 NewF.BaseOffset =
Offset;
4227 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4230 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4235 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4236 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4242 NewF.canonicalize(*this->L);
4243 (void)InsertFormula(LU, LUIdx, NewF);
4246 for (
size_t N = 0,
NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4247 const SCEV *BaseReg =
F.BaseRegs[
N];
4248 if (BaseReg != OrigReg)
4251 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
4253 LU.Kind, LU.AccessTy, NewF)) {
4260 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
4262 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4267 for (
const SCEV *NewReg : NewF.BaseRegs)
4269 if ((
C->getAPInt() + NewF.BaseOffset)
4273 countTrailingZeros<uint64_t>(NewF.BaseOffset))
4277 NewF.canonicalize(*this->L);
4278 (void)InsertFormula(LU, LUIdx, NewF);
4289 LSRInstance::GenerateAllReuseFormulae() {
4292 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4293 LSRUse &LU =
Uses[LUIdx];
4294 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4295 GenerateReassociations(LU, LUIdx, LU.Formulae[
i]);
4296 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4297 GenerateCombinations(LU, LUIdx, LU.Formulae[
i]);
4299 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4300 LSRUse &LU =
Uses[LUIdx];
4301 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4302 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[
i]);
4303 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4304 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[
i]);
4305 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4306 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[
i]);
4307 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4308 GenerateScales(LU, LUIdx, LU.Formulae[
i]);
4310 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4311 LSRUse &LU =
Uses[LUIdx];
4312 for (
size_t i = 0,
f = LU.Formulae.size();
i !=
f; ++
i)
4313 GenerateTruncates(LU, LUIdx, LU.Formulae[
i]);
4316 GenerateCrossUseConstantOffsets();
4319 "After generating reuse formulae:\n";
4320 print_uses(
dbgs()));
4325 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4330 bool ChangedFormulae =
false;
4335 using BestFormulaeTy =
4338 BestFormulaeTy BestFormulae;
4340 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4341 LSRUse &LU =
Uses[LUIdx];
4346 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4347 FIdx != NumForms; ++FIdx) {
4348 Formula &
F = LU.Formulae[FIdx];
4357 Cost CostF(L, SE,
TTI, AMK);
4359 CostF.RateFormula(
F, Regs, VisitedRegs, LU, &LoserRegs);
4360 if (CostF.isLoser()) {
4372 for (
const SCEV *
Reg :
F.BaseRegs) {
4373 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4377 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4378 Key.push_back(
F.ScaledReg);
4383 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4384 BestFormulae.insert(std::make_pair(
Key, FIdx));
4388 Formula &Best = LU.Formulae[
P.first->second];
4390 Cost CostBest(L, SE,
TTI, AMK);
4392 CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
4393 if (CostF.isLess(CostBest))
4397 " in favor of formula ";
4398 Best.print(
dbgs());
dbgs() <<
'\n');
4401 ChangedFormulae =
true;
4403 LU.DeleteFormula(
F);
4411 LU.RecomputeRegs(LUIdx, RegUses);
4414 BestFormulae.clear();
4419 "After filtering out undesirable candidates:\n";
4427 size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4429 for (
const LSRUse &LU :
Uses) {
4430 size_t FSize = LU.Formulae.size();
4445 void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4449 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4450 "which use a superset of registers used by other "
4453 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4454 LSRUse &LU =
Uses[LUIdx];
4456 for (
size_t i = 0,
e = LU.Formulae.size();
i !=
e; ++
i) {
4457 Formula &
F = LU.Formulae[
i];
4462 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4467 NewF.BaseOffset += (uint64_t)
C->getValue()->getSExtValue();
4468 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4469 (
I -
F.BaseRegs.begin()));
4470 if (LU.HasFormulaWithSameRegs(NewF)) {
4473 LU.DeleteFormula(
F);
4479 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4480 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4484 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4485 (
I -
F.BaseRegs.begin()));
4486 if (LU.HasFormulaWithSameRegs(NewF)) {
4489 LU.DeleteFormula(
F);
4500 LU.RecomputeRegs(LUIdx, RegUses);
4509 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4514 dbgs() <<
"The search space is too complex.\n"
4515 "Narrowing the search space by assuming that uses separated "
4516 "by a constant offset will use the same registers.\n");
4520 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4521 LSRUse &LU =
Uses[LUIdx];
4522 for (
const Formula &
F : LU.Formulae) {
4523 if (
F.BaseOffset == 0 || (
F.Scale != 0 &&
F.Scale != 1))
4526 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4530 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4531 LU.Kind, LU.AccessTy))
4536 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4539 for (LSRFixup &Fixup : LU.Fixups) {
4540 Fixup.Offset +=
F.BaseOffset;
4541 LUThatHas->pushFixup(Fixup);
4547 for (
size_t i = 0,
e = LUThatHas->Formulae.size();
i !=
e; ++
i) {
4548 Formula &
F = LUThatHas->Formulae[
i];
4549 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4550 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4552 LUThatHas->DeleteFormula(
F);
4560 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4563 DeleteUse(LU, LUIdx);
4576 void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4580 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4581 "undesirable dedicated registers.\n");
4583 FilterOutUndesirableDedicatedRegisters();
4598 void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4603 dbgs() <<
"The search space is too complex.\n"
4604 "Narrowing the search space by choosing the best Formula "
4605 "from the Formulae with the same Scale and ScaledReg.\n");
4610 BestFormulaeTy BestFormulae;
4612 bool ChangedFormulae =
false;
4617 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4618 LSRUse &LU =
Uses[LUIdx];
4623 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4628 size_t FARegNum = 0;
4629 for (
const SCEV *
Reg : FA.BaseRegs) {
4631 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4633 size_t FBRegNum = 0;
4634 for (
const SCEV *
Reg : FB.BaseRegs) {
4636 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4638 if (FARegNum != FBRegNum)
4639 return FARegNum < FBRegNum;
4643 Cost CostFA(L, SE,
TTI, AMK);
4644 Cost CostFB(L, SE,
TTI, AMK);
4646 CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
4648 CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
4649 return CostFA.isLess(CostFB);
4653 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4655 Formula &
F = LU.Formulae[FIdx];
4658 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
4662 Formula &Best = LU.Formulae[
P.first->second];
4663 if (IsBetterThan(
F, Best))
4667 " in favor of formula ";
4668 Best.print(
dbgs());
dbgs() <<
'\n');
4670 ChangedFormulae =
true;
4672 LU.DeleteFormula(
F);
4678 LU.RecomputeRegs(LUIdx, RegUses);
4681 BestFormulae.clear();
4686 "After filtering out undesirable candidates:\n";
4693 void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
4700 "Narrowing the search space by choosing the lowest "
4701 "register Formula for PostInc Uses.\n");
4703 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4704 LSRUse &LU =
Uses[LUIdx];
4706 if (LU.Kind != LSRUse::Address)
4713 for (
const Formula &
F : LU.Formulae)
4714 MinRegs =
std::min(
F.getNumRegs(), MinRegs);
4717 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4719 Formula &
F = LU.Formulae[FIdx];
4720 if (
F.getNumRegs() > MinRegs) {
4723 LU.DeleteFormula(
F);
4730 LU.RecomputeRegs(LUIdx, RegUses);
4781 void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4795 for (
const SCEV *
Reg : RegUses) {
4799 for (
const LSRUse &LU :
Uses) {
4800 if (!LU.Regs.count(
Reg))
4802 float P = LU.getNotSelectedProbability(
Reg);
4808 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
4812 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
4815 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4816 LSRUse &LU =
Uses[LUIdx];
4818 if (LU.Formulae.size() < 2)
4823 float FMinRegNum = LU.Formulae[0].getNumRegs();
4824 float FMinARegNum = LU.Formulae[0].getNumRegs();
4826 for (
size_t i = 0,
e = LU.Formulae.size();
i !=
e; ++
i) {
4827 Formula &
F = LU.Formulae[
i];
4830 for (
const SCEV *BaseReg :
F.BaseRegs) {
4831 if (UniqRegs.
count(BaseReg))
4833 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4834 if (isa<SCEVAddRecExpr>(BaseReg))
4836 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4838 if (
const SCEV *ScaledReg =
F.ScaledReg) {
4839 if (!UniqRegs.
count(ScaledReg)) {
4841 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4842 if (isa<SCEVAddRecExpr>(ScaledReg))
4844 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4847 if (FMinRegNum > FRegNum ||
4848 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4849 FMinRegNum = FRegNum;
4850 FMinARegNum = FARegNum;
4855 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
4857 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
4858 while (LU.Formulae.size() != 1) {
4861 LU.Formulae.pop_back();
4863 LU.RecomputeRegs(LUIdx, RegUses);
4864 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
4865 Formula &
F = LU.Formulae[0];
4868 UniqRegs.
insert(
F.BaseRegs.begin(),
F.BaseRegs.end());
4878 void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4889 const SCEV *Best =
nullptr;
4890 unsigned BestNum = 0;
4891 for (
const SCEV *
Reg : RegUses) {
4896 BestNum = RegUses.getUsedByIndices(
Reg).count();
4898 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
4899 if (Count > BestNum) {
4905 assert(Best &&
"Failed to find best LSRUse candidate");
4907 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
4908 <<
" will yield profitable reuse.\n");
4913 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4914 LSRUse &LU =
Uses[LUIdx];
4915 if (!LU.Regs.count(Best))
continue;
4918 for (
size_t i = 0,
e = LU.Formulae.size();
i !=
e; ++
i) {
4919 Formula &
F = LU.Formulae[
i];
4920 if (!
F.referencesReg(Best)) {
4922 LU.DeleteFormula(
F);
4926 assert(
e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
4932 LU.RecomputeRegs(LUIdx, RegUses);
4943 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4944 NarrowSearchSpaceByDetectingSupersets();
4945 NarrowSearchSpaceByCollapsingUnrolledCode();
4946 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4948 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
4949 NarrowSearchSpaceByFilterPostInc();
4951 NarrowSearchSpaceByDeletingCostlyFormulas();
4953 NarrowSearchSpaceByPickingWinnerRegs();
4960 const Cost &CurCost,
4973 const LSRUse &LU =
Uses[Workspace.size()];
4980 for (
const SCEV *
S : CurRegs)
4981 if (LU.Regs.count(
S))
4985 Cost NewCost(L, SE,
TTI, AMK);
4986 for (
const Formula &
F : LU.Formulae) {
4994 int NumReqRegsToFind =
std::min(
F.getNumRegs(), ReqRegs.
size());
4995 for (
const SCEV *
Reg : ReqRegs) {
4996 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
4999 if (NumReqRegsToFind == 0)
5003 if (NumReqRegsToFind != 0) {
5014 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU);
5015 if (NewCost.isLess(SolutionCost)) {
5016 Workspace.push_back(&
F);
5017 if (Workspace.size() !=
Uses.size()) {
5018 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5019 NewRegs, VisitedRegs);
5020 if (
F.getNumRegs() == 1 && Workspace.size() == 1)
5021 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5024 dbgs() <<
".\nRegs:\n";
5026 <<
"- " << *
S <<
"\n";
5029 SolutionCost = NewCost;
5030 Solution = Workspace;
5032 Workspace.pop_back();
5041 Cost SolutionCost(L, SE,
TTI, AMK);
5042 SolutionCost.Lose();
5043 Cost CurCost(L, SE,
TTI, AMK);
5049 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5050 CurRegs, VisitedRegs);
5051 if (Solution.empty()) {
5058 "The chosen solution requires ";
5059 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5060 for (
size_t i = 0,
e =
Uses.size();
i !=
e; ++
i) {
5065 Solution[
i]->print(
dbgs());
5069 assert(Solution.size() ==
Uses.size() &&
"Malformed solution!");
5081 bool AllDominate =
true;
5085 if (isa<CatchSwitchInst>(Tentative))
5089 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5090 AllDominate =
false;
5096 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5106 const Loop *IPLoop = LI.getLoopFor(
IP->getParent());
5107 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5111 if (!Rung)
return IP;
5112 Rung = Rung->getIDom();
5113 if (!Rung)
return IP;
5114 IDom = Rung->getBlock();
5117 const Loop *IDomLoop = LI.getLoopFor(IDom);
5118 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5119 if (IDomDepth <= IPLoopDepth &&
5120 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5141 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5142 Inputs.push_back(
I);
5143 if (LU.Kind == LSRUse::ICmpZero)
5145 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5146 Inputs.push_back(
I);
5147 if (LF.PostIncLoops.count(L)) {
5148 if (LF.isUseFullyOutsideLoop(L))
5151 Inputs.push_back(IVIncInsertPos);
5155 for (
const Loop *PIL : LF.PostIncLoops) {
5156 if (PIL == L)
continue;
5161 if (!ExitingBlocks.empty()) {
5163 for (
unsigned i = 1,
e = ExitingBlocks.size();
i !=
e; ++
i)
5164 BB = DT.findNearestCommonDominator(
BB, ExitingBlocks[
i]);
5165 Inputs.push_back(
BB->getTerminator());
5169 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5170 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5171 "Insertion point must be a normal instruction");
5178 while (isa<PHINode>(
IP)) ++
IP;
5181 while (
IP->isEHPad()) ++
IP;
5184 while (isa<DbgInfoIntrinsic>(
IP)) ++
IP;
5189 while (
Rewriter.isInsertedInstruction(&*
IP) &&
IP != LowestIP)
5197 Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5201 if (LU.RigidFormula)
5202 return LF.OperandValToReplace;
5206 IP = AdjustInsertPositionForExpand(
IP, LF, LU,
Rewriter);
5211 Rewriter.setPostInc(LF.PostIncLoops);
5214 Type *OpTy = LF.OperandValToReplace->getType();
5216 Type *Ty =
F.getType();
5230 for (
const SCEV *
Reg :
F.BaseRegs) {
5231 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5239 Value *ICmpScaledV =
nullptr;
5241 const SCEV *ScaledS =
F.ScaledReg;
5247 if (LU.Kind == LSRUse::ICmpZero) {
5257 "The only scale supported by ICmpZero uses is -1!");
5258 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5266 if (!Ops.empty() && LU.Kind == LSRUse::Address &&
5276 Ops.push_back(ScaledS);
5300 int64_t
Offset = (uint64_t)
F.BaseOffset + LF.Offset;