132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
345struct KeyOrderTargetImmediate {
346 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
347 if (
LHS.isScalable() && !
RHS.isScalable())
349 if (!
LHS.isScalable() &&
RHS.isScalable())
351 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
358struct KeyOrderSizeTAndImmediate {
359 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
360 const std::pair<size_t, Immediate> &
RHS)
const {
361 size_t LSize =
LHS.first;
362 size_t RSize =
RHS.first;
364 return LSize < RSize;
365 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
372 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
384 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
386 RegUsesTy RegUsesMap;
390 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
391 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
392 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
394 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
396 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
412RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
413 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
414 RegSortData &RSD = Pair.first->second;
417 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
418 RSD.UsedByIndices.
set(LUIdx);
422RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
423 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
424 assert(It != RegUsesMap.end());
425 RegSortData &RSD = It->second;
427 RSD.UsedByIndices.
reset(LUIdx);
431RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
432 assert(LUIdx <= LastLUIdx);
436 for (
auto &Pair : RegUsesMap) {
437 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
438 if (LUIdx < UsedByIndices.
size())
439 UsedByIndices[LUIdx] =
440 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
441 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
446RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
447 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
448 if (
I == RegUsesMap.end())
450 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
452 if (i == -1)
return false;
453 if ((
size_t)i != LUIdx)
return true;
457const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
458 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
459 assert(
I != RegUsesMap.end() &&
"Unknown register!");
460 return I->second.UsedByIndices;
463void RegUseTracker::clear() {
474 GlobalValue *BaseGV =
nullptr;
477 Immediate BaseOffset = Immediate::getZero();
480 bool HasBaseReg =
false;
503 const SCEV *ScaledReg =
nullptr;
508 Immediate UnfoldedOffset = Immediate::getZero();
512 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
516 void canonicalize(
const Loop &L);
520 bool hasZeroEnd()
const;
522 bool countsDownToZero()
const;
524 size_t getNumRegs()
const;
527 void deleteBaseReg(
const SCEV *&S);
529 bool referencesReg(
const SCEV *S)
const;
530 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
531 const RegUseTracker &RegUses)
const;
533 void print(raw_ostream &OS)
const;
552 for (
const SCEV *S :
Add->operands())
558 const SCEV *Start, *Step;
573 if (
Mul->getOperand(0)->isAllOnesValue()) {
582 for (
const SCEV *S : MyGood)
584 for (
const SCEV *S : MyBad)
596void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
603 BaseRegs.push_back(Sum);
609 BaseRegs.push_back(Sum);
624bool Formula::isCanonical(
const Loop &L)
const {
625 assert((Scale == 0 || ScaledReg) &&
626 "ScaledReg must be non-null if Scale is non-zero");
629 return BaseRegs.size() <= 1;
634 if (Scale == 1 && BaseRegs.empty())
643 return none_of(BaseRegs, [&L](
const SCEV *S) {
654void Formula::canonicalize(
const Loop &L) {
658 if (BaseRegs.empty()) {
660 assert(ScaledReg &&
"Expected 1*reg => reg");
661 assert(Scale == 1 &&
"Expected 1*reg => reg");
662 BaseRegs.push_back(ScaledReg);
670 ScaledReg = BaseRegs.pop_back_val();
678 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
681 if (
I != BaseRegs.end())
691bool Formula::unscale() {
695 BaseRegs.push_back(ScaledReg);
700bool Formula::hasZeroEnd()
const {
701 if (UnfoldedOffset || BaseOffset)
703 if (BaseRegs.size() != 1 || ScaledReg)
708bool Formula::countsDownToZero()
const {
711 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
712 const APInt *StepInt;
720size_t Formula::getNumRegs()
const {
721 return !!ScaledReg + BaseRegs.size();
726Type *Formula::getType()
const {
727 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
728 ScaledReg ? ScaledReg->
getType() :
734void Formula::deleteBaseReg(
const SCEV *&S) {
735 if (&S != &BaseRegs.back())
741bool Formula::referencesReg(
const SCEV *S)
const {
747bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
748 const RegUseTracker &RegUses)
const {
750 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
752 for (
const SCEV *BaseReg : BaseRegs)
753 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
759void Formula::print(raw_ostream &OS)
const {
765 if (BaseOffset.isNonZero()) {
769 for (
const SCEV *BaseReg : BaseRegs) {
771 OS <<
"reg(" << *
BaseReg <<
')';
773 if (HasBaseReg && BaseRegs.empty()) {
775 OS <<
"**error: HasBaseReg**";
776 }
else if (!HasBaseReg && !BaseRegs.empty()) {
778 OS <<
"**error: !HasBaseReg**";
782 OS << Scale <<
"*reg(";
789 if (UnfoldedOffset.isNonZero()) {
790 if (!
First) OS <<
" + ";
791 OS <<
"imm(" << UnfoldedOffset <<
')';
832 bool IgnoreSignificantBits =
false) {
843 if (
RA.isAllOnes()) {
844 if (
LHS->getType()->isPointerTy())
857 const APInt &LA =
C->getAPInt();
866 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
868 IgnoreSignificantBits);
869 if (!Step)
return nullptr;
871 IgnoreSignificantBits);
872 if (!Start)
return nullptr;
885 for (
const SCEV *S :
Add->operands()) {
887 if (!
Op)
return nullptr;
915 for (
const SCEV *S :
Mul->operands()) {
918 IgnoreSignificantBits)) {
938 if (
C->getSignificantBits() <= 64) {
940 return Immediate::getFixed(
C->getSExtValue());
945 if (Result.isNonZero())
951 if (Result.isNonZero())
959 return Immediate::getScalable(
C->getSExtValue());
961 return Immediate::getZero();
996 if (
SI->getPointerOperand() == OperandVal)
1001 switch (
II->getIntrinsicID()) {
1002 case Intrinsic::memset:
1003 case Intrinsic::prefetch:
1004 case Intrinsic::masked_load:
1005 if (
II->getArgOperand(0) == OperandVal)
1008 case Intrinsic::masked_store:
1009 if (
II->getArgOperand(1) == OperandVal)
1012 case Intrinsic::memmove:
1013 case Intrinsic::memcpy:
1014 if (
II->getArgOperand(0) == OperandVal ||
1015 II->getArgOperand(1) == OperandVal)
1020 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1021 if (IntrInfo.
PtrVal == OperandVal)
1027 if (RMW->getPointerOperand() == OperandVal)
1030 if (CmpX->getPointerOperand() == OperandVal)
1039 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1043 AccessTy.MemTy = Ty;
1047 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1049 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1051 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1053 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1055 switch (
II->getIntrinsicID()) {
1056 case Intrinsic::prefetch:
1057 case Intrinsic::memset:
1058 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1059 AccessTy.MemTy = OperandVal->
getType();
1061 case Intrinsic::memmove:
1062 case Intrinsic::memcpy:
1064 AccessTy.MemTy = OperandVal->
getType();
1066 case Intrinsic::masked_load:
1067 AccessTy.AddrSpace =
1068 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1070 case Intrinsic::masked_store:
1071 AccessTy.AddrSpace =
1072 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1076 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1132 if (!Processed.
insert(S).second)
1136 for (
const SCEV *S :
Add->operands()) {
1143 const SCEV *Op0, *Op1;
1152 Value *UVal = U->getValue();
1156 if (UI && UI->
getOpcode() == Instruction::Mul &&
1189 const LSRUse &LU,
const Formula &
F);
1193 const LSRUse &LU,
const Formula &
F,
1200 const Loop *
L =
nullptr;
1201 ScalarEvolution *SE =
nullptr;
1202 const TargetTransformInfo *
TTI =
nullptr;
1203 TargetTransformInfo::LSRCost
C;
1208 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1210 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1228 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1229 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1230 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1231 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1237 return C.NumRegs == ~0
u;
1240 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1241 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1242 bool HardwareLoopProfitable,
1243 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1245 void print(raw_ostream &OS)
const;
1249 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1250 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1251 bool HardwareLoopProfitable);
1252 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1253 SmallPtrSetImpl<const SCEV *> &Regs,
1254 const LSRUse &LU,
bool HardwareLoopProfitable,
1255 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1266 Value *OperandValToReplace =
nullptr;
1276 Immediate
Offset = Immediate::getZero();
1278 LSRFixup() =
default;
1280 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1282 void print(raw_ostream &OS)
const;
1292 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1305 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1308 MemAccessTy AccessTy;
1314 Immediate MinOffset = Immediate::getFixedMax();
1315 Immediate MaxOffset = Immediate::getFixedMin();
1319 bool AllFixupsOutsideLoop =
true;
1324 bool AllFixupsUnconditional =
true;
1331 bool RigidFormula =
false;
1337 Type *WidestFixupType =
nullptr;
1345 SmallPtrSet<const SCEV *, 4> Regs;
1347 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1349 LSRFixup &getNewFixup() {
1350 Fixups.push_back(LSRFixup());
1354 void pushFixup(LSRFixup &f) {
1356 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1357 MaxOffset =
f.Offset;
1358 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1359 MinOffset =
f.Offset;
1362 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1363 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1364 bool InsertFormula(
const Formula &
F,
const Loop &L);
1365 void DeleteFormula(Formula &
F);
1366 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1368 void print(raw_ostream &OS)
const;
1375 LSRUse::KindType Kind, MemAccessTy AccessTy,
1376 GlobalValue *BaseGV, Immediate BaseOffset,
1377 bool HasBaseReg, int64_t Scale,
1378 Instruction *
Fixup =
nullptr);
1391 [&](
unsigned i,
const SCEV *
Reg) {
1392 return i + getSetupCost(Reg, Depth - 1);
1401void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1402 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1403 bool HardwareLoopProfitable) {
1408 if (AR->getLoop() != L) {
1415 if (!AR->getLoop()->contains(L)) {
1425 unsigned LoopCost = 1;
1434 F.BaseOffset.isFixed() &&
1435 *Step ==
F.BaseOffset.getFixedValue();
1440 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1447 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1448 HardwareLoopProfitable)
1450 C.AddRecCost += LoopCost;
1455 if (!Regs.
count(AR->getOperand(1))) {
1456 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1468 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1477void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1478 SmallPtrSetImpl<const SCEV *> &Regs,
1479 const LSRUse &LU,
bool HardwareLoopProfitable,
1480 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1481 if (LoserRegs && LoserRegs->
count(
Reg)) {
1486 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1487 if (LoserRegs && isLoser())
1492void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1493 const DenseSet<const SCEV *> &VisitedRegs,
1494 const LSRUse &LU,
bool HardwareLoopProfitable,
1495 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1498 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1500 unsigned PrevAddRecCost =
C.AddRecCost;
1501 unsigned PrevNumRegs =
C.NumRegs;
1502 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1503 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1504 if (VisitedRegs.
count(ScaledReg)) {
1508 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1513 for (
const SCEV *BaseReg :
F.BaseRegs) {
1514 if (VisitedRegs.
count(BaseReg)) {
1518 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1525 size_t NumBaseParts =
F.getNumRegs();
1526 if (NumBaseParts > 1)
1531 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1537 for (
const LSRFixup &
Fixup : LU.Fixups) {
1538 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1539 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1543 else if (
Offset.isNonZero())
1545 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1549 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1570 if (
C.NumRegs > TTIRegNum) {
1573 if (PrevNumRegs > TTIRegNum)
1574 C.Insns += (
C.NumRegs - PrevNumRegs);
1576 C.Insns += (
C.NumRegs - TTIRegNum);
1589 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1593 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1596 if (LU.Kind != LSRUse::ICmpZero)
1597 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1603 C.Insns = std::numeric_limits<unsigned>::max();
1604 C.NumRegs = std::numeric_limits<unsigned>::max();
1605 C.AddRecCost = std::numeric_limits<unsigned>::max();
1606 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1607 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1608 C.ImmCost = std::numeric_limits<unsigned>::max();
1609 C.SetupCost = std::numeric_limits<unsigned>::max();
1610 C.ScaleCost = std::numeric_limits<unsigned>::max();
1614bool Cost::isLess(
const Cost &
Other)
const {
1616 C.Insns !=
Other.C.Insns)
1617 return C.Insns <
Other.C.Insns;
1621#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1624 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1625 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1626 if (
C.AddRecCost != 0)
1627 OS <<
", with addrec cost " <<
C.AddRecCost;
1628 if (
C.NumIVMuls != 0)
1629 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1630 << (
C.NumIVMuls == 1 ?
"" :
"s");
1631 if (
C.NumBaseAdds != 0)
1632 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1633 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1634 if (
C.ScaleCost != 0)
1635 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1637 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1638 if (
C.SetupCost != 0)
1639 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1648bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1651 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1652 if (PN->getIncomingValue(i) == OperandValToReplace &&
1653 L->contains(PN->getIncomingBlock(i)))
1658 return !
L->contains(UserInst);
1661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1662void LSRFixup::print(raw_ostream &OS)
const {
1667 Store->getOperand(0)->printAsOperand(OS,
false);
1673 OS <<
", OperandValToReplace=";
1676 for (
const Loop *PIL : PostIncLoops) {
1677 OS <<
", PostIncLoop=";
1678 PIL->getHeader()->printAsOperand(OS,
false);
1682 OS <<
", Offset=" <<
Offset;
1692bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1694 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1701float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1703 for (
const Formula &
F : Formulae)
1704 if (
F.referencesReg(
Reg))
1706 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1711bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1712 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1714 if (!Formulae.empty() && RigidFormula)
1718 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1726 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1727 "Zero allocated in a scaled register!");
1729 for (
const SCEV *BaseReg :
F.BaseRegs)
1730 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1734 Formulae.push_back(
F);
1745void LSRUse::DeleteFormula(Formula &
F) {
1746 if (&
F != &Formulae.back())
1748 Formulae.pop_back();
1752void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1754 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1756 for (
const Formula &
F : Formulae) {
1757 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1762 for (
const SCEV *S : OldRegs)
1764 RegUses.dropRegister(S, LUIdx);
1767#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1768void LSRUse::print(raw_ostream &OS)
const {
1769 OS <<
"LSR Use: Kind=";
1771 case Basic: OS <<
"Basic";
break;
1772 case Special: OS <<
"Special";
break;
1773 case ICmpZero: OS <<
"ICmpZero";
break;
1775 OS <<
"Address of ";
1779 OS << *AccessTy.MemTy;
1782 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1785 OS <<
", Offsets={";
1786 bool NeedComma =
false;
1787 for (
const LSRFixup &
Fixup : Fixups) {
1788 if (NeedComma) OS <<
',';
1794 if (AllFixupsOutsideLoop)
1795 OS <<
", all-fixups-outside-loop";
1797 if (AllFixupsUnconditional)
1798 OS <<
", all-fixups-unconditional";
1800 if (WidestFixupType)
1801 OS <<
", widest fixup type: " << *WidestFixupType;
1810 LSRUse::KindType Kind, MemAccessTy AccessTy,
1812 bool HasBaseReg, int64_t Scale,
1815 case LSRUse::Address: {
1816 int64_t FixedOffset =
1817 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1818 int64_t ScalableOffset =
1819 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1820 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1821 HasBaseReg, Scale, AccessTy.AddrSpace,
1822 Fixup, ScalableOffset);
1824 case LSRUse::ICmpZero:
1831 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1836 if (Scale != 0 && Scale != -1)
1841 if (BaseOffset.isNonZero()) {
1844 if (BaseOffset.isScalable())
1854 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1855 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1863 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1865 case LSRUse::Special:
1867 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1874 Immediate MinOffset, Immediate MaxOffset,
1875 LSRUse::KindType Kind, MemAccessTy AccessTy,
1877 bool HasBaseReg, int64_t Scale) {
1878 if (BaseOffset.isNonZero() &&
1879 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1880 BaseOffset.isScalable() != MaxOffset.isScalable()))
1883 int64_t
Base = BaseOffset.getKnownMinValue();
1884 int64_t Min = MinOffset.getKnownMinValue();
1885 int64_t Max = MaxOffset.getKnownMinValue();
1888 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1891 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1894 HasBaseReg, Scale) &&
1900 Immediate MinOffset, Immediate MaxOffset,
1901 LSRUse::KindType Kind, MemAccessTy AccessTy,
1902 const Formula &
F,
const Loop &L) {
1910 assert((
F.isCanonical(L) ||
F.Scale != 0));
1912 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1917 Immediate MaxOffset, LSRUse::KindType Kind,
1919 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1922 BaseOffset, HasBaseReg, Scale) ||
1927 BaseGV, BaseOffset,
true, 0));
1931 Immediate MaxOffset, LSRUse::KindType Kind,
1932 MemAccessTy AccessTy,
const Formula &
F) {
1933 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1934 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1940 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1942 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1946 const LSRUse &LU,
const Formula &
F) {
1948 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1949 for (
const LSRFixup &
Fixup : LU.Fixups)
1951 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1952 F.Scale,
Fixup.UserInst))
1958 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1963 const LSRUse &LU,
const Formula &
F,
1972 return F.Scale != 1;
1975 case LSRUse::Address: {
1977 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1978 if (
F.BaseOffset.isScalable()) {
1979 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1980 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1982 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1983 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1987 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1990 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1993 "Legal addressing mode has an illegal cost!");
1994 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1996 case LSRUse::ICmpZero:
1998 case LSRUse::Special:
2008 LSRUse::KindType Kind, MemAccessTy AccessTy,
2012 if (BaseOffset.isZero() && !BaseGV)
2017 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2021 if (!HasBaseReg && Scale == 1) {
2031 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2041 Immediate MaxOffset, LSRUse::KindType Kind,
2042 MemAccessTy AccessTy,
const SCEV *S,
2045 if (S->
isZero())
return true;
2053 if (!S->
isZero())
return false;
2056 if (BaseOffset.isZero() && !BaseGV)
2059 if (BaseOffset.isScalable())
2064 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2067 BaseOffset, HasBaseReg, Scale);
2084 const SCEV *IncExpr;
2086 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2087 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2094 const SCEV *ExprBase =
nullptr;
2096 IVChain() =
default;
2097 IVChain(
const IVInc &Head,
const SCEV *
Base)
2098 : Incs(1, Head), ExprBase(
Base) {}
2103 const_iterator
begin()
const {
2105 return std::next(Incs.
begin());
2107 const_iterator
end()
const {
2112 bool hasIncs()
const {
return Incs.
size() >= 2; }
2121 bool isProfitableIncrement(
const SCEV *OperExpr,
2122 const SCEV *IncExpr,
2130 SmallPtrSet<Instruction*, 4> FarUsers;
2131 SmallPtrSet<Instruction*, 4> NearUsers;
2137 ScalarEvolution &SE;
2140 AssumptionCache &AC;
2141 TargetLibraryInfo &TLI;
2142 const TargetTransformInfo &
TTI;
2144 MemorySSAUpdater *MSSAU;
2148 bool HardwareLoopProfitable =
false;
2162 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2169 SmallSetVector<Type *, 4> Types;
2175 RegUseTracker RegUses;
2180 static const unsigned MaxChains = 8;
2186 SmallPtrSet<Use*, MaxChains> IVIncSet;
2189 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2195 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2197 void OptimizeShadowIV();
2198 bool FindIVUserForCond(ICmpInst *
Cond, IVStrideUse *&CondUse);
2199 ICmpInst *OptimizeMax(ICmpInst *
Cond, IVStrideUse* &CondUse);
2200 void OptimizeLoopTermCond();
2202 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2203 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2204 void FinalizeChain(IVChain &Chain);
2205 void CollectChains();
2206 void GenerateIVChain(
const IVChain &Chain,
2207 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2209 void CollectInterestingTypesAndFactors();
2210 void CollectFixupsAndInitialFormulae();
2213 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2216 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2217 LSRUse::KindType Kind, MemAccessTy AccessTy);
2219 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2220 MemAccessTy AccessTy);
2222 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2224 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2226 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2227 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2228 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2229 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2230 bool IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const;
2232 void CollectLoopInvariantFixupsAndFormulae();
2234 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2235 unsigned Depth = 0);
2237 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2239 size_t Idx,
bool IsScaledReg =
false);
2240 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2241 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2242 const Formula &
Base,
size_t Idx,
2243 bool IsScaledReg =
false);
2244 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2245 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2246 const Formula &
Base,
2247 const SmallVectorImpl<Immediate> &Worklist,
2248 size_t Idx,
bool IsScaledReg =
false);
2249 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2250 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2251 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2252 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2253 void GenerateCrossUseConstantOffsets();
2254 void GenerateAllReuseFormulae();
2256 void FilterOutUndesirableDedicatedRegisters();
2258 size_t EstimateSearchSpaceComplexity()
const;
2259 void NarrowSearchSpaceByDetectingSupersets();
2260 void NarrowSearchSpaceByCollapsingUnrolledCode();
2261 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2262 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2263 void NarrowSearchSpaceByFilterPostInc();
2264 void NarrowSearchSpaceByDeletingCostlyFormulas();
2265 void NarrowSearchSpaceByPickingWinnerRegs();
2266 void NarrowSearchSpaceUsingHeuristics();
2268 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2270 SmallVectorImpl<const Formula *> &Workspace,
2271 const Cost &CurCost,
2272 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2273 DenseSet<const SCEV *> &VisitedRegs)
const;
2274 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2278 const SmallVectorImpl<Instruction *> &Inputs)
const;
2281 const LSRUse &LU)
const;
2283 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2285 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2286 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2288 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2289 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2290 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2291 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2294 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2295 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2296 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2298 bool getChanged()
const {
return Changed; }
2299 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2300 return ScalarEvolutionIVs;
2303 void print_factors_and_types(raw_ostream &OS)
const;
2304 void print_fixups(raw_ostream &OS)
const;
2305 void print_uses(raw_ostream &OS)
const;
2306 void print(raw_ostream &OS)
const;
2314void LSRInstance::OptimizeShadowIV() {
2324 Type *DestTy =
nullptr;
2325 bool IsSigned =
false;
2341 DestTy = UCast->getDestTy();
2345 DestTy = SCast->getDestTy();
2347 if (!DestTy)
continue;
2367 if (Mantissa == -1)
continue;
2371 unsigned Entry, Latch;
2381 if (!Init)
continue;
2382 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2386 BinaryOperator *Incr =
2388 if (!Incr)
continue;
2389 if (Incr->
getOpcode() != Instruction::Add
2390 && Incr->
getOpcode() != Instruction::Sub)
2394 ConstantInt *
C =
nullptr;
2406 if (!
C->getValue().isStrictlyPositive())
2414 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2416 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2417 : Instruction::FSub,
2434bool LSRInstance::FindIVUserForCond(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2435 for (IVStrideUse &U : IU)
2436 if (
U.getUser() ==
Cond) {
2494ICmpInst *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse* &CondUse) {
2509 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2510 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2516 const SCEVNAryExpr *
Max =
nullptr;
2518 Pred = ICmpInst::ICMP_SLE;
2521 Pred = ICmpInst::ICMP_SLT;
2524 Pred = ICmpInst::ICMP_ULT;
2533 if (
Max->getNumOperands() != 2)
2536 const SCEV *MaxLHS =
Max->getOperand(0);
2537 const SCEV *MaxRHS =
Max->getOperand(1);
2542 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2553 "Loop condition operand is an addrec in a different loop!");
2557 Value *NewRHS =
nullptr;
2558 if (ICmpInst::isTrueWhenEqual(Pred)) {
2562 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2563 NewRHS = BO->getOperand(0);
2566 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2567 NewRHS = BO->getOperand(0);
2575 NewRHS = SU->getValue();
2587 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2588 Cond->getOperand(0), NewRHS,
"scmp");
2592 Cond->replaceAllUsesWith(NewCond);
2595 Cond->eraseFromParent();
2597 if (
Cmp->use_empty()) {
2599 Cmp->eraseFromParent();
2606LSRInstance::OptimizeLoopTermCond() {
2607 SmallPtrSet<Instruction *, 4> PostIncs;
2622 SmallVector<BasicBlock*, 8> ExitingBlocks;
2623 L->getExitingBlocks(ExitingBlocks);
2631 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2645 IVStrideUse *CondUse =
nullptr;
2647 if (!FindIVUserForCond(
Cond, CondUse))
2661 if (!DT.
dominates(ExitingBlock, LatchBlock))
2666 if (LatchBlock != ExitingBlock)
2667 for (
const IVStrideUse &UI : IU)
2670 if (&UI != CondUse &&
2674 const SCEV *
A = IU.getStride(*CondUse, L);
2675 const SCEV *
B = IU.getStride(UI, L);
2676 if (!
A || !
B)
continue;
2685 if (
const SCEVConstant *
D =
2687 const ConstantInt *
C =
D->getValue();
2689 if (
C->isOne() ||
C->isMinusOne())
2690 goto decline_post_inc;
2692 if (
C->getValue().getSignificantBits() >= 64 ||
2693 C->getValue().isMinSignedValue())
2694 goto decline_post_inc;
2697 MemAccessTy AccessTy =
2699 int64_t Scale =
C->getSExtValue();
2703 AccessTy.AddrSpace))
2704 goto decline_post_inc;
2709 AccessTy.AddrSpace))
2710 goto decline_post_inc;
2715 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2721 if (
Cond->getNextNode() != TermBr) {
2722 if (
Cond->hasOneUse()) {
2726 ICmpInst *OldCond =
Cond;
2728 Cond->setName(
L->getHeader()->getName() +
".termcond");
2750 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2751 for (Instruction *Inst : PostIncs)
2757bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2758 bool HasBaseReg, LSRUse::KindType Kind,
2759 MemAccessTy AccessTy) {
2760 Immediate NewMinOffset = LU.MinOffset;
2761 Immediate NewMaxOffset = LU.MaxOffset;
2762 MemAccessTy NewAccessTy = AccessTy;
2767 if (LU.Kind != Kind)
2773 if (Kind == LSRUse::Address) {
2774 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2775 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2776 AccessTy.AddrSpace);
2781 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2783 LU.MaxOffset - NewOffset, HasBaseReg))
2785 NewMinOffset = NewOffset;
2786 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2788 NewOffset - LU.MinOffset, HasBaseReg))
2790 NewMaxOffset = NewOffset;
2796 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2797 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2801 LU.MinOffset = NewMinOffset;
2802 LU.MaxOffset = NewMaxOffset;
2803 LU.AccessTy = NewAccessTy;
2810std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2811 LSRUse::KindType Kind,
2812 MemAccessTy AccessTy) {
2813 const SCEV *
Copy = Expr;
2820 Offset = Immediate::getFixed(0);
2823 std::pair<UseMapTy::iterator, bool>
P =
2824 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2827 size_t LUIdx =
P.first->second;
2828 LSRUse &LU =
Uses[LUIdx];
2829 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2831 return std::make_pair(LUIdx,
Offset);
2835 size_t LUIdx =
Uses.size();
2836 P.first->second = LUIdx;
2837 Uses.push_back(LSRUse(Kind, AccessTy));
2838 LSRUse &LU =
Uses[LUIdx];
2842 return std::make_pair(LUIdx,
Offset);
2846void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2847 if (&LU != &
Uses.back())
2852 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2858LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2859 const LSRUse &OrigLU) {
2861 for (LSRUse &LU :
Uses) {
2867 if (&LU != &OrigLU &&
2868 LU.Kind != LSRUse::ICmpZero &&
2869 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2870 LU.WidestFixupType == OrigLU.WidestFixupType &&
2871 LU.HasFormulaWithSameRegs(OrigF)) {
2873 for (
const Formula &
F : LU.Formulae) {
2876 if (
F.BaseRegs == OrigF.BaseRegs &&
2877 F.ScaledReg == OrigF.ScaledReg &&
2878 F.BaseGV == OrigF.BaseGV &&
2879 F.Scale == OrigF.Scale &&
2880 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2881 if (
F.BaseOffset.isZero())
2896void LSRInstance::CollectInterestingTypesAndFactors() {
2897 SmallSetVector<const SCEV *, 4> Strides;
2901 for (
const IVStrideUse &U : IU) {
2902 const SCEV *Expr = IU.getExpr(U);
2920 }
while (!Worklist.
empty());
2924 for (SmallSetVector<const SCEV *, 4>::const_iterator
2926 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2927 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2928 const SCEV *OldStride = *
I;
2929 const SCEV *NewStride = *NewStrideIter;
2939 if (
const SCEVConstant *Factor =
2942 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2943 Factors.insert(Factor->getAPInt().getSExtValue());
2944 }
else if (
const SCEVConstant *Factor =
2948 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2949 Factors.insert(Factor->getAPInt().getSExtValue());
2955 if (Types.size() == 1)
2967 for(; OI != OE; ++OI) {
2986 return Trunc->getOperand(0);
3019 if (SubExpr->getSCEVType() ==
scAddExpr)
3022 if (SubExpr->getSCEVType() !=
scMulExpr)
3038bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3039 const SCEV *IncExpr,
3040 ScalarEvolution &SE) {
3053 SmallPtrSet<const SCEV*, 8> Processed;
3074 if (!Chain.hasIncs())
3077 if (!
Users.empty()) {
3078 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3080 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3083 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3092 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3095 const SCEV *LastIncExpr =
nullptr;
3096 unsigned NumConstIncrements = 0;
3097 unsigned NumVarIncrements = 0;
3098 unsigned NumReusedIncrements = 0;
3100 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3103 for (
const IVInc &Inc : Chain) {
3104 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3106 if (Inc.IncExpr->isZero())
3112 ++NumConstIncrements;
3116 if (Inc.IncExpr == LastIncExpr)
3117 ++NumReusedIncrements;
3121 LastIncExpr = Inc.IncExpr;
3126 if (NumConstIncrements > 1)
3133 cost += NumVarIncrements;
3137 cost -= NumReusedIncrements;
3139 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3146void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3147 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3151 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3152 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3156 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3157 const SCEV *LastIncExpr =
nullptr;
3158 for (; ChainIdx < NChains; ++ChainIdx) {
3159 IVChain &Chain = IVChainVec[ChainIdx];
3177 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3178 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3182 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3183 LastIncExpr = IncExpr;
3189 if (ChainIdx == NChains) {
3196 LastIncExpr = OperExpr;
3203 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3205 ChainUsersVec.
resize(NChains);
3206 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3207 <<
") IV=" << *LastIncExpr <<
"\n");
3209 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3210 <<
") IV+" << *LastIncExpr <<
"\n");
3212 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3214 IVChain &Chain = IVChainVec[ChainIdx];
3216 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3218 if (!LastIncExpr->
isZero()) {
3219 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3228 for (User *U : IVOper->
users()) {
3234 IVChain::const_iterator IncIter = Chain.Incs.begin();
3235 IVChain::const_iterator IncEnd = Chain.Incs.end();
3236 for( ; IncIter != IncEnd; ++IncIter) {
3237 if (IncIter->UserInst == OtherUse)
3240 if (IncIter != IncEnd)
3245 && IU.isIVUserOrOperand(OtherUse)) {
3248 NearUsers.
insert(OtherUse);
3253 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3278void LSRInstance::CollectChains() {
3282 SmallVector<BasicBlock *,8> LatchPath;
3285 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3291 for (BasicBlock *BB :
reverse(LatchPath)) {
3292 for (Instruction &
I : *BB) {
3304 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3305 ChainIdx < NChains; ++ChainIdx) {
3306 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3309 SmallPtrSet<Instruction*, 4> UniqueOperands;
3312 while (IVOpIter != IVOpEnd) {
3314 if (UniqueOperands.
insert(IVOpInst).second)
3315 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3316 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3321 for (PHINode &PN :
L->getHeader()->phis()) {
3328 ChainInstruction(&PN, IncV, ChainUsersVec);
3331 unsigned ChainIdx = 0;
3332 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3333 UsersIdx < NChains; ++UsersIdx) {
3335 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3338 if (ChainIdx != UsersIdx)
3339 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3340 FinalizeChain(IVChainVec[ChainIdx]);
3343 IVChainVec.resize(ChainIdx);
3346void LSRInstance::FinalizeChain(IVChain &Chain) {
3347 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3348 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3350 for (
const IVInc &Inc : Chain) {
3352 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3353 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3354 IVIncSet.insert(UseI);
3362 Immediate IncOffset = Immediate::getZero();
3371 C->getSignificantBits() > 64)
3373 IncOffset = Immediate::getScalable(
C->getSExtValue());
3389void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3390 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3393 const IVInc &Head = Chain.Incs[0];
3398 Value *IVSrc =
nullptr;
3399 while (IVOpIter != IVOpEnd) {
3410 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3411 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3414 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3416 if (IVOpIter == IVOpEnd) {
3418 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3421 assert(IVSrc &&
"Failed to find IV chain source");
3426 const SCEV *LeftOverExpr =
nullptr;
3427 const SCEV *Accum = SE.
getZero(IntTy);
3431 for (
const IVInc &Inc : Chain) {
3434 InsertPt =
L->getLoopLatch()->getTerminator();
3438 Value *IVOper = IVSrc;
3439 if (!Inc.IncExpr->isZero()) {
3444 LeftOverExpr = LeftOverExpr ?
3445 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3449 bool FoundBase =
false;
3450 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3451 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3453 if (!Remainder->
isZero()) {
3455 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3456 const SCEV *IVOperExpr =
3458 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3467 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3470 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3473 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3477 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3480 LeftOverExpr =
nullptr;
3484 if (IVTy != OperTy) {
3486 "cannot extend a chained IV");
3488 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3490 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3497 for (PHINode &Phi :
L->getHeader()->phis()) {
3501 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3504 Value *IVOper = IVSrc;
3506 if (IVTy != PostIncTy) {
3508 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3509 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3510 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3512 Phi.replaceUsesOfWith(PostIncV, IVOper);
3518void LSRInstance::CollectFixupsAndInitialFormulae() {
3519 BranchInst *ExitBranch =
nullptr;
3520 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3523 SmallPtrSet<const SCEV *, 16> Regs;
3524 DenseSet<const SCEV *> VisitedRegs;
3525 DenseSet<size_t> VisitedLSRUse;
3527 for (
const IVStrideUse &U : IU) {
3532 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3533 if (IVIncSet.count(UseI)) {
3534 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3538 LSRUse::KindType
Kind = LSRUse::Basic;
3539 MemAccessTy AccessTy;
3541 Kind = LSRUse::Address;
3545 const SCEV *S = IU.getExpr(U);
3561 if (CI->isEquality()) {
3564 Value *
NV = CI->getOperand(1);
3565 if (NV ==
U.getOperandValToReplace()) {
3566 CI->setOperand(1, CI->getOperand(0));
3567 CI->setOperand(0, NV);
3568 NV = CI->getOperand(1);
3575 (!
NV->getType()->isPointerTy() ||
3582 Kind = LSRUse::ICmpZero;
3584 }
else if (
L->isLoopInvariant(NV) &&
3587 !
NV->getType()->isPointerTy()) {
3598 Kind = LSRUse::ICmpZero;
3605 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3606 if (Factors[i] != -1)
3607 Factors.insert(-(uint64_t)Factors[i]);
3613 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3614 size_t LUIdx =
P.first;
3616 LSRUse &LU =
Uses[LUIdx];
3619 LSRFixup &LF = LU.getNewFixup();
3620 LF.UserInst = UserInst;
3621 LF.OperandValToReplace =
U.getOperandValToReplace();
3622 LF.PostIncLoops = TmpPostIncLoops;
3624 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3625 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3628 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3630 F.initialMatch(S, L, SE);
3631 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3632 HardwareLoopProfitable);
3633 VisitedLSRUse.
insert(LUIdx);
3636 if (!LU.WidestFixupType ||
3639 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3642 if (LU.Formulae.empty()) {
3643 InsertInitialFormula(S, LU, LUIdx);
3644 CountRegisters(LU.Formulae.back(), LUIdx);
3653void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3657 LU.RigidFormula =
true;
3660 F.initialMatch(S, L, SE);
3661 bool Inserted = InsertFormula(LU, LUIdx,
F);
3662 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3668LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3669 LSRUse &LU,
size_t LUIdx) {
3671 F.BaseRegs.push_back(S);
3672 F.HasBaseReg =
true;
3673 bool Inserted = InsertFormula(LU, LUIdx,
F);
3674 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3678void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3680 RegUses.countRegister(
F.ScaledReg, LUIdx);
3681 for (
const SCEV *BaseReg :
F.BaseRegs)
3682 RegUses.countRegister(BaseReg, LUIdx);
3687bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3690 "Formula is illegal");
3692 if (!LU.InsertFormula(
F, *L))
3695 CountRegisters(
F, LUIdx);
3701bool LSRInstance::IsFixupExecutedEachIncrement(
const LSRFixup &LF)
const {
3713LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3715 SmallPtrSet<const SCEV *, 32> Visited;
3722 while (!Worklist.
empty()) {
3726 if (!Visited.
insert(S).second)
3737 const Value *
V = US->getValue();
3740 if (
L->contains(Inst))
continue;
3744 for (
const Use &U :
V->uses()) {
3754 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3776 bool HasIncompatibleEHPTerminatedBlock =
false;
3778 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3779 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3780 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3781 HasIncompatibleEHPTerminatedBlock =
true;
3786 if (HasIncompatibleEHPTerminatedBlock) {
3809 unsigned OtherIdx = !
U.getOperandNo();
3810 Value *OtherOp = ICI->getOperand(OtherIdx);
3820 std::pair<size_t, Immediate>
P =
3821 getUse(S, LSRUse::Basic, MemAccessTy());
3822 size_t LUIdx =
P.first;
3824 LSRUse &LU =
Uses[LUIdx];
3825 LSRFixup &LF = LU.getNewFixup();
3826 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3827 LF.OperandValToReplace =
U;
3829 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3830 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3831 if (!LU.WidestFixupType ||
3834 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3835 InsertSupplementalFormula(US, LU, LUIdx);
3836 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3852 unsigned Depth = 0) {
3859 for (
const SCEV *S :
Add->operands()) {
3866 const SCEV *Start, *Step;
3871 if (Start->isZero())
3880 Remainder =
nullptr;
3882 if (Remainder != Start) {
3904 LSRUse &LU,
const SCEV *S,
const Loop *L,
3906 if (LU.Kind != LSRUse::Address ||
3907 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3913 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3922void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3923 const Formula &
Base,
3924 unsigned Depth,
size_t Idx,
3926 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3934 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3938 if (AddOps.
size() == 1)
3952 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3957 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3961 if (InnerAddOps.size() == 1 &&
3963 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3966 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3971 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3980 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3983 F.ScaledReg =
nullptr;
3986 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3987 }
else if (IsScaledReg)
3988 F.ScaledReg = InnerSum;
3990 F.BaseRegs[Idx] = InnerSum;
3998 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
4001 F.BaseRegs.push_back(*J);
4006 if (InsertFormula(LU, LUIdx,
F))
4013 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4019void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
4021 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4026 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4027 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4029 if (
Base.Scale == 1)
4030 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4036void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4039 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4040 (
Base.UnfoldedOffset.isNonZero()) <=
4048 Formula NewBase =
Base;
4049 NewBase.BaseRegs.clear();
4050 Type *CombinedIntegerType =
nullptr;
4051 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4054 if (!CombinedIntegerType)
4056 Ops.push_back(BaseReg);
4059 NewBase.BaseRegs.push_back(BaseReg);
4063 if (
Ops.size() == 0)
4068 auto GenerateFormula = [&](
const SCEV *Sum) {
4069 Formula
F = NewBase;
4077 F.BaseRegs.push_back(Sum);
4079 (void)InsertFormula(LU, LUIdx,
F);
4083 if (
Ops.size() > 1) {
4090 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4091 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4093 NewBase.UnfoldedOffset.getFixedValue(),
true));
4094 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4100void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4101 const Formula &
Base,
size_t Idx,
4103 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4105 if (
G->isZero() || !GV)
4109 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4114 F.BaseRegs[Idx] =
G;
4115 (void)InsertFormula(LU, LUIdx,
F);
4119void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4122 if (
Base.BaseGV)
return;
4124 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4125 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4126 if (
Base.Scale == 1)
4127 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4132void LSRInstance::GenerateConstantOffsetsImpl(
4133 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4134 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4136 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4138 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4140 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4142 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4144 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4150 F.ScaledReg =
nullptr;
4152 F.deleteBaseReg(
F.BaseRegs[Idx]);
4154 }
else if (IsScaledReg)
4157 F.BaseRegs[Idx] = NewG;
4159 (void)InsertFormula(LU, LUIdx,
F);
4163 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4174 const APInt *StepInt;
4179 for (Immediate
Offset : Worklist) {
4181 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4187 for (Immediate
Offset : Worklist)
4191 if (
G->isZero() ||
Imm.isZero() ||
4192 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4195 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4196 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4201 F.BaseRegs[Idx] =
G;
4206 (void)InsertFormula(LU, LUIdx,
F);
4210void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4216 if (LU.MaxOffset != LU.MinOffset)
4219 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4220 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4221 if (
Base.Scale == 1)
4222 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4228void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4230 if (LU.Kind != LSRUse::ICmpZero)
return;
4238 if (LU.MinOffset != LU.MaxOffset)
return;
4241 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4243 for (
const SCEV *BaseReg :
Base.BaseRegs)
4244 if (
BaseReg->getType()->isPointerTy())
4246 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4249 for (int64_t Factor : Factors) {
4254 if (
Base.BaseOffset.isMin() && Factor == -1)
4257 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4259 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4260 assert(Factor != 0 &&
"Zero factor not expected!");
4261 if (NewBaseOffset.getFixedValue() / Factor !=
4262 Base.BaseOffset.getFixedValue())
4270 Immediate
Offset = LU.MinOffset;
4271 if (
Offset.isMin() && Factor == -1)
4274 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4282 F.BaseOffset = NewBaseOffset;
4289 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4291 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4294 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4308 if (
F.UnfoldedOffset.isNonZero()) {
4309 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4311 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4312 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4313 Base.UnfoldedOffset.getFixedValue())
4317 IntTy,
F.UnfoldedOffset.getFixedValue()))
4322 (void)InsertFormula(LU, LUIdx,
F);
4329void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4336 if (
Base.Scale != 0 && !
Base.unscale())
4339 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4342 for (int64_t Factor : Factors) {
4343 Base.Scale = Factor;
4344 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4346 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4350 if (LU.Kind == LSRUse::Basic &&
4351 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4352 LU.AccessTy,
Base) &&
4353 LU.AllFixupsOutsideLoop)
4354 LU.Kind = LSRUse::Special;
4360 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4361 Base.BaseOffset.isZero() && !
Base.BaseGV)
4364 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4366 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4367 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4372 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4373 if (!Quotient->isZero()) {
4376 F.ScaledReg = Quotient;
4377 F.deleteBaseReg(
F.BaseRegs[i]);
4381 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4382 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4386 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4388 (void)InsertFormula(LU, LUIdx,
F);
4404 const SCEV *Result =
nullptr;
4405 for (
auto &L :
Loops) {
4409 if (!New || (Result && New != Result))
4414 assert(Result &&
"failed to create expression");
4419void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4421 if (
Base.BaseGV)
return;
4431 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4434 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4438 for (
auto &LF : LU.Fixups)
4439 Loops.push_back(LF.PostIncLoops);
4441 for (
Type *SrcTy : Types) {
4450 const SCEV *NewScaledReg =
4452 if (!NewScaledReg || NewScaledReg->
isZero())
4454 F.ScaledReg = NewScaledReg;
4456 bool HasZeroBaseReg =
false;
4457 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4458 const SCEV *NewBaseReg =
4460 if (!NewBaseReg || NewBaseReg->
isZero()) {
4461 HasZeroBaseReg =
true;
4471 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4475 (void)InsertFormula(LU, LUIdx,
F);
4488 const SCEV *OrigReg;
4490 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4491 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4493 void print(raw_ostream &OS)
const;
4499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4500void WorkItem::print(raw_ostream &OS)
const {
4501 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4502 <<
" , add offset " <<
Imm;
4512void LSRInstance::GenerateCrossUseConstantOffsets() {
4514 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4516 DenseMap<const SCEV *, ImmMapTy>
Map;
4517 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4519 for (
const SCEV *Use : RegUses) {
4522 auto Pair =
Map.try_emplace(
Reg);
4525 Pair.first->second.insert(std::make_pair(Imm, Use));
4526 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4533 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4535 for (
const SCEV *
Reg : Sequence) {
4536 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4539 if (Imms.size() == 1)
4543 for (
const auto &Entry
4545 <<
' ' <<
Entry.first;
4549 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4551 const SCEV *OrigReg = J->second;
4553 Immediate JImm = J->first;
4554 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4557 UsedByIndicesMap[
Reg].
count() == 1) {
4565 Immediate
First = Imms.begin()->first;
4566 Immediate
Last = std::prev(Imms.end())->first;
4567 if (!
First.isCompatibleImmediate(
Last)) {
4574 bool Scalable =
First.isScalable() ||
Last.isScalable();
4575 int64_t FI =
First.getKnownMinValue();
4576 int64_t LI =
Last.getKnownMinValue();
4579 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4582 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4583 ImmMapTy::const_iterator OtherImms[] = {
4584 Imms.begin(), std::prev(Imms.end()),
4585 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4586 for (
const auto &M : OtherImms) {
4587 if (M == J || M == JE)
continue;
4588 if (!JImm.isCompatibleImmediate(
M->first))
4592 Immediate
Imm = JImm.subUnsigned(
M->first);
4593 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4595 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4596 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4603 UsedByIndicesMap.
clear();
4604 UniqueItems.
clear();
4607 for (
const WorkItem &WI : WorkItems) {
4608 size_t LUIdx = WI.LUIdx;
4609 LSRUse &LU =
Uses[LUIdx];
4610 Immediate
Imm = WI.Imm;
4611 const SCEV *OrigReg = WI.OrigReg;
4614 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4618 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4619 Formula
F = LU.Formulae[
L];
4626 if (
F.ScaledReg == OrigReg) {
4627 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4629 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4631 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4632 if (
F.referencesReg(S))
4635 NewF.BaseOffset =
Offset;
4636 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4639 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4648 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4650 if (
C->getValue()->isNegative() !=
4651 (NewF.BaseOffset.isLessThanZero()) &&
4652 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4653 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4658 NewF.canonicalize(*this->L);
4659 (void)InsertFormula(LU, LUIdx, NewF);
4662 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4664 if (BaseReg != OrigReg)
4667 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4668 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4669 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4671 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4673 LU.Kind, LU.AccessTy, NewF)) {
4677 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4681 NewF.UnfoldedOffset = NewUnfoldedOffset;
4683 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4688 for (
const SCEV *NewReg : NewF.BaseRegs)
4690 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4692 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4694 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4695 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4698 NewF.BaseOffset.getFixedValue()))
4703 NewF.canonicalize(*this->L);
4704 (void)InsertFormula(LU, LUIdx, NewF);
4715LSRInstance::GenerateAllReuseFormulae() {
4718 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4719 LSRUse &LU =
Uses[LUIdx];
4720 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4721 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4722 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4723 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4725 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4726 LSRUse &LU =
Uses[LUIdx];
4727 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4728 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4729 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4730 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4731 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4732 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4733 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4734 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4736 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4737 LSRUse &LU =
Uses[LUIdx];
4738 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4739 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4742 GenerateCrossUseConstantOffsets();
4745 "After generating reuse formulae:\n";
4746 print_uses(
dbgs()));
4751void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4752 DenseSet<const SCEV *> VisitedRegs;
4753 SmallPtrSet<const SCEV *, 16> Regs;
4754 SmallPtrSet<const SCEV *, 16> LoserRegs;
4756 bool ChangedFormulae =
false;
4761 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4763 BestFormulaeTy BestFormulae;
4765 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4766 LSRUse &LU =
Uses[LUIdx];
4771 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4772 FIdx != NumForms; ++FIdx) {
4773 Formula &
F = LU.Formulae[FIdx];
4784 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4786 if (CostF.isLoser()) {
4798 for (
const SCEV *
Reg :
F.BaseRegs) {
4799 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4803 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4804 Key.push_back(
F.ScaledReg);
4809 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4810 BestFormulae.insert(std::make_pair(
Key, FIdx));
4814 Formula &Best = LU.Formulae[
P.first->second];
4816 Cost CostBest(L, SE,
TTI, AMK);
4818 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4819 HardwareLoopProfitable);
4820 if (CostF.isLess(CostBest))
4824 " in favor of formula ";
4825 Best.print(
dbgs());
dbgs() <<
'\n');
4828 ChangedFormulae =
true;
4830 LU.DeleteFormula(
F);
4838 LU.RecomputeRegs(LUIdx, RegUses);
4841 BestFormulae.clear();
4846 "After filtering out undesirable candidates:\n";
4854size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4856 for (
const LSRUse &LU :
Uses) {
4857 size_t FSize = LU.Formulae.size();
4872void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4876 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4877 "which use a superset of registers used by other "
4880 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4881 LSRUse &LU =
Uses[LUIdx];
4883 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4884 Formula &
F = LU.Formulae[i];
4885 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4891 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4897 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4898 (uint64_t)
C->getValue()->getSExtValue());
4899 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4900 (
I -
F.BaseRegs.begin()));
4901 if (LU.HasFormulaWithSameRegs(NewF)) {
4904 LU.DeleteFormula(
F);
4915 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4916 (
I -
F.BaseRegs.begin()));
4917 if (LU.HasFormulaWithSameRegs(NewF)) {
4920 LU.DeleteFormula(
F);
4931 LU.RecomputeRegs(LUIdx, RegUses);
4940void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4945 dbgs() <<
"The search space is too complex.\n"
4946 "Narrowing the search space by assuming that uses separated "
4947 "by a constant offset will use the same registers.\n");
4951 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4952 LSRUse &LU =
Uses[LUIdx];
4953 for (
const Formula &
F : LU.Formulae) {
4954 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4957 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4961 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4962 LU.Kind, LU.AccessTy))
4967 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4968 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4971 for (LSRFixup &
Fixup : LU.Fixups) {
4972 Fixup.Offset +=
F.BaseOffset;
4973 LUThatHas->pushFixup(
Fixup);
4979 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4980 Formula &
F = LUThatHas->Formulae[i];
4981 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4982 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4984 LUThatHas->DeleteFormula(
F);
4992 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4995 DeleteUse(LU, LUIdx);
5008void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5012 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
5013 "undesirable dedicated registers.\n");
5015 FilterOutUndesirableDedicatedRegisters();
5030void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5035 dbgs() <<
"The search space is too complex.\n"
5036 "Narrowing the search space by choosing the best Formula "
5037 "from the Formulae with the same Scale and ScaledReg.\n");
5040 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5042 BestFormulaeTy BestFormulae;
5044 bool ChangedFormulae =
false;
5046 DenseSet<const SCEV *> VisitedRegs;
5047 SmallPtrSet<const SCEV *, 16> Regs;
5049 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5050 LSRUse &LU =
Uses[LUIdx];
5055 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5060 size_t FARegNum = 0;
5061 for (
const SCEV *
Reg : FA.BaseRegs) {
5062 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5063 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5065 size_t FBRegNum = 0;
5066 for (
const SCEV *
Reg : FB.BaseRegs) {
5067 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5068 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5070 if (FARegNum != FBRegNum)
5071 return FARegNum < FBRegNum;
5078 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5080 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5081 return CostFA.isLess(CostFB);
5085 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5087 Formula &
F = LU.Formulae[FIdx];
5090 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5094 Formula &Best = LU.Formulae[
P.first->second];
5095 if (IsBetterThan(
F, Best))
5099 " in favor of formula ";
5100 Best.print(
dbgs());
dbgs() <<
'\n');
5102 ChangedFormulae =
true;
5104 LU.DeleteFormula(
F);
5110 LU.RecomputeRegs(LUIdx, RegUses);
5113 BestFormulae.clear();
5118 "After filtering out undesirable candidates:\n";
5125void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5132 "Narrowing the search space by choosing the lowest "
5133 "register Formula for PostInc Uses.\n");
5135 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5136 LSRUse &LU =
Uses[LUIdx];
5138 if (LU.Kind != LSRUse::Address)
5144 size_t MinRegs = std::numeric_limits<size_t>::max();
5145 for (
const Formula &
F : LU.Formulae)
5146 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5149 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5151 Formula &
F = LU.Formulae[FIdx];
5152 if (
F.getNumRegs() > MinRegs) {
5155 LU.DeleteFormula(
F);
5162 LU.RecomputeRegs(LUIdx, RegUses);
5213void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5222 SmallPtrSet<const SCEV *, 4> UniqRegs;
5226 DenseMap <const SCEV *, float> RegNumMap;
5227 for (
const SCEV *
Reg : RegUses) {
5231 for (
const LSRUse &LU :
Uses) {
5232 if (!LU.Regs.count(
Reg))
5234 float P = LU.getNotSelectedProbability(
Reg);
5240 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5244 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5247 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5248 LSRUse &LU =
Uses[LUIdx];
5250 if (LU.Formulae.size() < 2)
5255 float FMinRegNum = LU.Formulae[0].getNumRegs();
5256 float FMinARegNum = LU.Formulae[0].getNumRegs();
5258 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5259 Formula &
F = LU.Formulae[i];
5262 for (
const SCEV *BaseReg :
F.BaseRegs) {
5263 if (UniqRegs.
count(BaseReg))
5265 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5268 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5270 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5271 if (!UniqRegs.
count(ScaledReg)) {
5273 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5276 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5279 if (FMinRegNum > FRegNum ||
5280 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5281 FMinRegNum = FRegNum;
5282 FMinARegNum = FARegNum;
5287 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5289 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5290 while (LU.Formulae.size() != 1) {
5293 LU.Formulae.pop_back();
5295 LU.RecomputeRegs(LUIdx, RegUses);
5296 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5297 Formula &
F = LU.Formulae[0];
5313 MemAccessTy AccessType) {
5323 return TTI.isLegalAddressingMode(
5324 AccessType.MemTy,
nullptr,
5325 Diff->getSExtValue(),
5326 true, 0, AccessType.AddrSpace) &&
5327 !
TTI.isLegalAddressingMode(
5328 AccessType.MemTy,
nullptr,
5329 -Diff->getSExtValue(),
5330 true, 0, AccessType.AddrSpace);
5336void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5339 SmallPtrSet<const SCEV *, 4> Taken;
5347 const SCEV *Best =
nullptr;
5348 unsigned BestNum = 0;
5349 for (
const SCEV *
Reg : RegUses) {
5354 BestNum = RegUses.getUsedByIndices(
Reg).count();
5356 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5357 if (
Count > BestNum) {
5365 if (
Count == BestNum) {
5366 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5367 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5369 Uses[LUIdx].AccessTy)) {
5376 assert(Best &&
"Failed to find best LSRUse candidate");
5378 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5379 <<
" will yield profitable reuse.\n");
5384 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5385 LSRUse &LU =
Uses[LUIdx];
5386 if (!LU.Regs.count(Best))
continue;
5389 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5390 Formula &
F = LU.Formulae[i];
5391 if (!
F.referencesReg(Best)) {
5393 LU.DeleteFormula(
F);
5397 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5403 LU.RecomputeRegs(LUIdx, RegUses);
5414void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5415 NarrowSearchSpaceByDetectingSupersets();
5416 NarrowSearchSpaceByCollapsingUnrolledCode();
5417 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5419 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5420 NarrowSearchSpaceByFilterPostInc();
5422 NarrowSearchSpaceByDeletingCostlyFormulas();
5424 NarrowSearchSpaceByPickingWinnerRegs();
5428void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5430 SmallVectorImpl<const Formula *> &Workspace,
5431 const Cost &CurCost,
5432 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5433 DenseSet<const SCEV *> &VisitedRegs)
const {
5444 const LSRUse &LU =
Uses[Workspace.
size()];
5450 SmallSetVector<const SCEV *, 4> ReqRegs;
5451 for (
const SCEV *S : CurRegs)
5452 if (LU.Regs.count(S))
5455 SmallPtrSet<const SCEV *, 16> NewRegs;
5456 Cost NewCost(L, SE,
TTI, AMK);
5457 for (
const Formula &
F : LU.Formulae) {
5465 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5466 for (
const SCEV *
Reg : ReqRegs) {
5467 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5470 if (NumReqRegsToFind == 0)
5474 if (NumReqRegsToFind != 0) {
5485 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5486 if (NewCost.isLess(SolutionCost)) {
5488 if (Workspace.
size() !=
Uses.size()) {
5489 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5490 NewRegs, VisitedRegs);
5491 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5492 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5495 dbgs() <<
".\nRegs:\n";
5496 for (
const SCEV *S : NewRegs)
dbgs()
5497 <<
"- " << *S <<
"\n";
5500 SolutionCost = NewCost;
5501 Solution = Workspace;
5510void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5512 Cost SolutionCost(L, SE,
TTI, AMK);
5513 SolutionCost.Lose();
5514 Cost CurCost(L, SE,
TTI, AMK);
5515 SmallPtrSet<const SCEV *, 16> CurRegs;
5516 DenseSet<const SCEV *> VisitedRegs;
5520 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5521 CurRegs, VisitedRegs);
5522 if (Solution.
empty()) {
5529 "The chosen solution requires ";
5530 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5531 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5536 Solution[i]->print(
dbgs());
5542 const bool EnableDropUnprofitableSolution = [&] {
5554 if (BaselineCost.isLess(SolutionCost)) {
5555 if (!EnableDropUnprofitableSolution)
5557 dbgs() <<
"Baseline is more profitable than chosen solution, "
5558 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5561 "solution, dropping LSR solution.\n";);
5572 const SmallVectorImpl<Instruction *> &Inputs)
5576 bool AllDominate =
true;
5583 for (Instruction *Inst : Inputs) {
5584 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5585 AllDominate =
false;
5590 if (Tentative->
getParent() == Inst->getParent() &&
5591 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5601 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5602 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5606 if (!Rung)
return IP;
5607 Rung = Rung->getIDom();
5608 if (!Rung)
return IP;
5609 IDom = Rung->getBlock();
5612 const Loop *IDomLoop = LI.getLoopFor(IDom);
5613 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5614 if (IDomDepth <= IPLoopDepth &&
5615 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5632 SmallVector<Instruction *, 4> Inputs;
5635 if (LU.Kind == LSRUse::ICmpZero)
5636 if (Instruction *
I =
5639 if (LF.PostIncLoops.
count(L)) {
5640 if (LF.isUseFullyOutsideLoop(L))
5641 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5647 for (
const Loop *PIL : LF.PostIncLoops) {
5648 if (PIL == L)
continue;
5653 if (!ExitingBlocks.
empty()) {
5655 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5662 "Insertion point must be a normal instruction");
5672 while (IP->isEHPad()) ++IP;
5677 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5685Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5687 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5688 if (LU.RigidFormula)
5689 return LF.OperandValToReplace;
5693 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5698 Rewriter.setPostInc(LF.PostIncLoops);
5703 Type *Ty =
F.getType();
5717 for (
const SCEV *
Reg :
F.BaseRegs) {
5718 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5726 Value *ICmpScaledV =
nullptr;
5728 const SCEV *ScaledS =
F.ScaledReg;
5734 if (LU.Kind == LSRUse::ICmpZero) {
5744 "The only scale supported by ICmpZero uses is -1!");
5745 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5753 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5763 Ops.push_back(ScaledS);
5789 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5790 "Expanding mismatched offsets\n");
5792 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5793 if (
Offset.isNonZero()) {
5794 if (LU.Kind == LSRUse::ICmpZero) {
5799 ConstantInt::get(IntTy, -(uint64_t)
Offset.getFixedValue());
5802 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5807 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5812 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5813 if (UnfoldedOffset.isNonZero()) {
5815 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5819 const SCEV *FullS =
Ops.empty() ?
5830 if (LU.Kind == LSRUse::ICmpZero) {
5834 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5835 "a scale at the same time!");
5836 if (
F.Scale == -1) {
5837 if (ICmpScaledV->
getType() != OpTy) {
5847 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5848 "ICmp does not support folding a global value and "
5849 "a scale at the same time!");
5851 -(uint64_t)
Offset.getFixedValue());
5852 if (
C->getType() != OpTy) {
5856 assert(
C &&
"Cast of ConstantInt should have folded");
5869void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5870 const LSRFixup &LF,
const Formula &
F,
5871 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5872 DenseMap<BasicBlock *, Value *>
Inserted;
5876 bool needUpdateFixups =
false;
5887 Loop *PNLoop = LI.getLoopFor(Parent);
5888 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5894 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5895 .setMergeIdenticalEdges()
5896 .setKeepOneInputPHIs());
5899 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5910 if (
L->contains(BB) && !
L->contains(PN))
5918 needUpdateFixups =
true;
5923 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5936 LF.OperandValToReplace->
getType(),
"tmp",
5943 if (
L->contains(
I) && !
L->contains(BB))
5944 InsertedNonLCSSAInsts.insert(
I);
5947 Pair.first->second = FullV;
5954 if (needUpdateFixups) {
5955 for (LSRUse &LU :
Uses)
5956 for (LSRFixup &
Fixup : LU.Fixups)
5960 if (
Fixup.UserInst == PN) {
5963 bool foundInOriginalPHI =
false;
5965 if (val ==
Fixup.OperandValToReplace) {
5966 foundInOriginalPHI =
true;
5971 if (foundInOriginalPHI)
5982 if (val ==
Fixup.OperandValToReplace)
5983 Fixup.UserInst = NewPN;
5993void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5995 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5999 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
6005 if (FullV->
getType() != OpTy) {
6017 if (LU.Kind == LSRUse::ICmpZero)
6033 if (LU.Kind != LSRUse::Address)
6040 if (IVIncInsertPos->
getParent() == LHeader)
6043 if (!
Fixup.OperandValToReplace ||
6045 Instruction *UI = cast<Instruction>(U);
6046 return UI->getParent() != LHeader;
6051 Type *Ty =
I->getType();
6058void LSRInstance::ImplementSolution(
6059 const SmallVectorImpl<const Formula *> &Solution) {
6065 for (
const IVChain &Chain : IVChainVec) {
6071 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6072 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6075 ?
L->getHeader()->getTerminator()
6077 Rewriter.setIVIncInsertPos(L, InsertPos);
6078 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6082 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6085 for (
const IVChain &Chain : IVChainVec) {
6086 GenerateIVChain(Chain, DeadInsts);
6090 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6108 for (PHINode &PN :
L->getHeader()->phis()) {
6109 BinaryOperator *BO =
nullptr;
6115 case Instruction::Sub:
6120 case Instruction::Add:
6137 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6146LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6147 DominatorTree &DT, LoopInfo &LI,
6148 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6149 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6150 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6153 :
TTI.getPreferredAddressingMode(
L, &SE)),
6155 BaselineCost(
L, SE,
TTI, AMK) {
6157 if (!
L->isLoopSimplifyForm())
6165 unsigned NumUsers = 0;
6169 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6177 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6187 L->getHeader()->printAsOperand(
dbgs(),
false);
6193 HardwareLoopProfitable =
6194 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6198#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6201 Rewriter.disableCanonicalMode();
6202 Rewriter.enableLSRMode();
6206 OptimizeLoopTermCond();
6209 if (IU.empty())
return;
6212 if (!
L->isInnermost()) {
6225 CollectInterestingTypesAndFactors();
6226 CollectFixupsAndInitialFormulae();
6227 CollectLoopInvariantFixupsAndFormulae();
6233 print_uses(
dbgs()));
6235 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6239 GenerateAllReuseFormulae();
6241 FilterOutUndesirableDedicatedRegisters();
6242 NarrowSearchSpaceUsingHeuristics();
6252 if (Solution.
empty())
6257 for (
const LSRUse &LU :
Uses) {
6258 for (
const Formula &
F : LU.Formulae)
6260 F) &&
"Illegal formula generated!");
6265 ImplementSolution(Solution);
6268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6269void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6270 if (Factors.empty() &&
Types.empty())
return;
6272 OS <<
"LSR has identified the following interesting factors and types: ";
6275 for (int64_t Factor : Factors) {
6276 if (!
First) OS <<
", ";
6278 OS <<
'*' << Factor;
6281 for (
Type *Ty : Types) {
6282 if (!
First) OS <<
", ";
6284 OS <<
'(' << *Ty <<
')';
6289void LSRInstance::print_fixups(raw_ostream &OS)
const {
6290 OS <<
"LSR is examining the following fixup sites:\n";
6291 for (
const LSRUse &LU :
Uses)
6292 for (
const LSRFixup &LF : LU.Fixups) {
6299void LSRInstance::print_uses(raw_ostream &OS)
const {
6300 OS <<
"LSR is examining the following uses:\n";
6301 for (
const LSRUse &LU :
Uses) {
6305 for (
const Formula &
F : LU.Formulae) {
6313void LSRInstance::print(raw_ostream &OS)
const {
6314 print_factors_and_types(OS);
6326class LoopStrengthReduce :
public LoopPass {
6330 LoopStrengthReduce();
6333 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6334 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6339LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6343void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6370ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6371 llvm::DIExpression::expr_op_iterator Begin =
6372 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6373 llvm::DIExpression::expr_op_iterator End =
6374 llvm::DIExpression::expr_op_iterator(Expr.
end());
6375 return {Begin, End};
6378struct SCEVDbgValueBuilder {
6379 SCEVDbgValueBuilder() =
default;
6380 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6382 void clone(
const SCEVDbgValueBuilder &
Base) {
6383 LocationOps =
Base.LocationOps;
6388 LocationOps.
clear();
6395 SmallVector<Value *, 2> LocationOps;
6398 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6405 unsigned ArgIndex = 0;
6406 if (It != LocationOps.
end()) {
6407 ArgIndex = std::distance(LocationOps.
begin(), It);
6409 ArgIndex = LocationOps.
size();
6415 void pushValue(
const SCEVUnknown *U) {
6420 bool pushConst(
const SCEVConstant *
C) {
6421 if (
C->getAPInt().getSignificantBits() > 64)
6423 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6424 Expr.
push_back(
C->getAPInt().getSExtValue());
6431 return ToDwarfOpIter(Expr);
6436 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6439 "Expected arithmetic SCEV type");
6441 unsigned EmitOperator = 0;
6442 for (
const auto &
Op : CommExpr->
operands()) {
6445 if (EmitOperator >= 1)
6446 pushOperator(DwarfOp);
6453 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6454 const llvm::SCEV *Inner =
C->getOperand(0);
6455 const llvm::Type *
Type =
C->getType();
6456 uint64_t ToWidth =
Type->getIntegerBitWidth();
6457 bool Success = pushSCEV(Inner);
6459 IsSigned ? llvm::dwarf::DW_ATE_signed
6460 : llvm::dwarf::DW_ATE_unsigned};
6461 for (
const auto &
Op : CastOps)
6467 bool pushSCEV(
const llvm::SCEV *S) {
6470 Success &= pushConst(StartInt);
6475 pushLocation(
U->getValue());
6478 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6481 Success &= pushSCEV(UDiv->getLHS());
6482 Success &= pushSCEV(UDiv->getRHS());
6483 pushOperator(llvm::dwarf::DW_OP_div);
6489 "Unexpected cast type in SCEV.");
6493 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6508 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6510 if (
C->getAPInt().getSignificantBits() > 64)
6512 int64_t
I =
C->getAPInt().getSExtValue();
6514 case llvm::dwarf::DW_OP_plus:
6515 case llvm::dwarf::DW_OP_minus:
6517 case llvm::dwarf::DW_OP_mul:
6518 case llvm::dwarf::DW_OP_div:
6531 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6537 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6538 if (!pushSCEV(Stride))
6540 pushOperator(llvm::dwarf::DW_OP_mul);
6542 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6543 if (!pushSCEV(Start))
6545 pushOperator(llvm::dwarf::DW_OP_plus);
6551 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6552 pushLocation(OffsetValue);
6555 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6556 << std::to_string(
Offset) <<
"\n");
6562 bool createIterCountExpr(
const SCEV *S,
6563 const SCEVDbgValueBuilder &IterationCount,
6564 ScalarEvolution &SE) {
6573 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6577 if (!Rec->isAffine())
6585 clone(IterationCount);
6586 if (!SCEVToValueExpr(*Rec, SE))
6597 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6598 ScalarEvolution &SE) {
6604 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6605 if (!pushSCEV(Start))
6607 pushOperator(llvm::dwarf::DW_OP_minus);
6609 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6610 if (!pushSCEV(Stride))
6612 pushOperator(llvm::dwarf::DW_OP_div);
6620 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6621 SmallVectorImpl<Value *> &DestLocations) {
6623 "Expected the locations vector to contain the IV");
6628 "Expected the location ops to contain the IV.");
6632 for (
const auto &
Op : LocationOps) {
6633 auto It =
find(DestLocations,
Op);
6634 if (It != DestLocations.
end()) {
6636 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6644 for (
const auto &
Op : expr_ops()) {
6646 Op.appendToVector(DestExpr);
6653 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6661struct DVIRecoveryRec {
6662 DVIRecoveryRec(DbgVariableRecord *DVR)
6663 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6665 DbgVariableRecord *DbgRef;
6667 bool HadLocationArgList;
6673 for (
auto &RE : RecoveryExprs)
6675 RecoveryExprs.clear();
6678 ~DVIRecoveryRec() { clear(); }
6686 auto expr_ops = ToDwarfOpIter(Expr);
6688 for (
auto Op : expr_ops)
6697template <
typename T>
6701 "contain any DW_OP_llvm_arg operands.");
6708template <
typename T>
6713 "Expected expression that references DIArglist locations using "
6714 "DW_OP_llvm_arg operands.");
6716 for (
Value *V : Locations)
6733 if (NumLLVMArgs == 0) {
6740 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6770 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6771 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6772 assert(DVIRec.Expr &&
"Expected an expression");
6777 if (!DVIRec.HadLocationArgList) {
6778 assert(DVIRec.LocationOps.size() == 1 &&
6779 "Unexpected number of location ops.");
6783 Value *CachedValue =
6788 for (
WeakVH VH : DVIRec.LocationOps) {
6796 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6801 const SCEV *SCEVInductionVar,
6802 SCEVDbgValueBuilder IterCountExpr) {
6816 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6818 NewLocationOps.
push_back(LSRInductionVar);
6820 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6821 WeakVH VH = DVIRec.LocationOps[i];
6827 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6829 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6837 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6838 <<
" refers to a location that is now undef or erased. "
6839 "Salvage abandoned.\n");
6843 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6844 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6846 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6847 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6851 if (std::optional<APInt>
Offset =
6853 if (
Offset->getSignificantBits() <= 64)
6854 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6857 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6866 assert(DVIRec.RecoveryExprs.size() == 1 &&
6867 "Expected only a single recovery expression for an empty "
6869 assert(DVIRec.RecoveryExprs[0] &&
6870 "Expected a SCEVDbgSalvageBuilder for location 0");
6871 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6872 B->appendToVectors(
NewExpr, NewLocationOps);
6874 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6882 SCEVDbgValueBuilder *DbgBuilder =
6883 DVIRec.RecoveryExprs[LocationArgIndex].get();
6889 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6890 "Expected a positive index for the location-op position.");
6891 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6895 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6899 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6907 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6908 if (DVIToUpdate.empty())
6912 assert(SCEVInductionVar &&
6913 "Anticipated a SCEV for the post-LSR induction variable");
6917 if (!IVAddRec->isAffine())
6925 SCEVDbgValueBuilder IterCountExpr;
6926 IterCountExpr.pushLocation(LSRInductionVar);
6927 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6930 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6933 for (
auto &DVIRec : DVIToUpdate) {
6934 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6945 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6946 for (
const auto &
B : L->getBlocks()) {
6947 for (
auto &
I : *
B) {
6949 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6954 if (DbgVal.isKillLocation())
6959 const auto &HasTranslatableLocationOps =
6961 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6975 if (!HasTranslatableLocationOps(DbgVal))
6978 std::unique_ptr<DVIRecoveryRec> NewRec =
6979 std::make_unique<DVIRecoveryRec>(&DbgVal);
6983 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6984 for (
const auto LocOp : DbgVal.location_ops()) {
6985 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6986 NewRec->LocationOps.push_back(LocOp);
6987 NewRec->HadLocationArgList = DbgVal.hasArgList();
6989 SalvageableDVISCEVs.push_back(std::move(NewRec));
6999 const LSRInstance &LSR) {
7001 auto IsSuitableIV = [&](
PHINode *
P) {
7012 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
7019 if (IsSuitableIV(
P))
7023 for (
PHINode &
P : L.getHeader()->phis()) {
7024 if (IsSuitableIV(&
P))
7042 std::unique_ptr<MemorySSAUpdater> MSSAU;
7044 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7047 const LSRInstance &Reducer =
7048 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7049 Changed |= Reducer.getChanged();
7055 const DataLayout &
DL = L->getHeader()->getDataLayout();
7057#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7060 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7074 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7076 const DataLayout &
DL = L->getHeader()->getDataLayout();
7089 if (SalvageableDVIRecords.
empty())
7095 for (
const auto &L : LI) {
7099 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7100 "could not be identified.\n");
7104 for (
auto &Rec : SalvageableDVIRecords)
7106 SalvageableDVIRecords.
clear();
7110bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7114 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7115 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7116 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7117 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7118 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7119 *
L->getHeader()->getParent());
7120 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7121 *
L->getHeader()->getParent());
7122 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7123 *
L->getHeader()->getParent());
7124 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7127 MSSA = &MSSAAnalysis->getMSSA();
7144char LoopStrengthReduce::ID = 0;
7147 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.