41#include "llvm/Config/llvm-config.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
108#define DEBUG_TYPE "codegenprepare"
111STATISTIC(NumPHIsElim,
"Number of trivial PHIs eliminated");
112STATISTIC(NumGEPsElim,
"Number of GEPs converted to casts");
113STATISTIC(NumCmpUses,
"Number of uses of Cmp expressions replaced with uses of "
115STATISTIC(NumCastUses,
"Number of uses of Cast expressions replaced with uses "
117STATISTIC(NumMemoryInsts,
"Number of memory instructions whose address "
118 "computations were sunk");
120 "Number of phis created when address "
121 "computations were sunk to memory instructions");
123 "Number of select created when address "
124 "computations were sunk to memory instructions");
125STATISTIC(NumExtsMoved,
"Number of [s|z]ext instructions combined with loads");
126STATISTIC(NumExtUses,
"Number of uses of [s|z]ext instructions optimized");
128 "Number of and mask instructions added to form ext loads");
129STATISTIC(NumAndUses,
"Number of uses of and mask instructions optimized");
130STATISTIC(NumRetsDup,
"Number of return instructions duplicated");
131STATISTIC(NumDbgValueMoved,
"Number of debug value instructions moved");
132STATISTIC(NumSelectsExpanded,
"Number of selects turned into branches");
133STATISTIC(NumStoreExtractExposed,
"Number of store(extractelement) exposed");
137 cl::desc(
"Disable branch optimizations in CodeGenPrepare"));
141 cl::desc(
"Disable GC optimizations in CodeGenPrepare"));
146 cl::desc(
"Disable select to branch conversion."));
150 cl::desc(
"Address sinking in CGP using GEPs."));
154 cl::desc(
"Enable sinkinig and/cmp into branches."));
158 cl::desc(
"Disable store(extract) optimizations in CodeGenPrepare"));
162 cl::desc(
"Stress test store(extract) optimizations in CodeGenPrepare"));
166 cl::desc(
"Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
171 cl::desc(
"Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
172 "optimization in CodeGenPrepare"));
176 cl::desc(
"Disable protection against removing loop preheaders"));
180 cl::desc(
"Use profile info to add section prefix for hot/cold functions"));
183 "profile-unknown-in-special-section",
cl::Hidden,
184 cl::desc(
"In profiling mode like sampleFDO, if a function doesn't have "
185 "profile, we cannot tell the function is cold for sure because "
186 "it may be a function newly added without ever being sampled. "
187 "With the flag enabled, compiler can put such profile unknown "
188 "functions into a special section, so runtime system can choose "
189 "to handle it in a different way than .text section, to save "
190 "RAM for example. "));
194 cl::desc(
"Use the basic-block-sections profile to determine the text "
195 "section prefix for hot functions. Functions with "
196 "basic-block-sections profile will be placed in `.text.hot` "
197 "regardless of their FDO profile info. Other functions won't be "
198 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
203 cl::desc(
"Skip merging empty blocks if (frequency of empty block) / "
204 "(frequency of destination block) is greater than this ratio"));
208 cl::desc(
"Force store splitting no matter what the target query says."));
212 cl::desc(
"Enable merging of redundant sexts when one is dominating"
218 cl::desc(
"Disables combining addressing modes with different parts "
219 "in optimizeMemoryInst."));
223 cl::desc(
"Allow creation of Phis in Address sinking."));
227 cl::desc(
"Allow creation of selects in Address sinking."));
231 cl::desc(
"Allow combining of BaseReg field in Address sinking."));
235 cl::desc(
"Allow combining of BaseGV field in Address sinking."));
239 cl::desc(
"Allow combining of BaseOffs field in Address sinking."));
243 cl::desc(
"Allow combining of ScaledReg field in Address sinking."));
248 cl::desc(
"Enable splitting large offset of GEP."));
252 cl::desc(
"Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
256 cl::desc(
"Enable BFI update verification for "
261 cl::desc(
"Enable converting phi types in CodeGenPrepare"));
265 cl::desc(
"Least BB number of huge function."));
293class TypePromotionTransaction;
304 std::unique_ptr<BlockFrequencyInfo>
BFI;
305 std::unique_ptr<BranchProbabilityInfo> BPI;
320 SetOfInstrs InsertedInsts;
324 InstrToOrigTy PromotedInsts;
327 SetOfInstrs RemovedInsts;
346 ValueToSExts ValToSExtendedUses;
356 std::unique_ptr<DominatorTree> DT;
360 bool IsHugeFunc =
false;
389 template <
typename F>
390 void resetIteratorIfInvalidatedWhileCalling(
BasicBlock *BB,
F f) {
394 Value *CurValue = &*CurInstIterator;
401 if (IterHandle != CurValue) {
402 CurInstIterator = BB->
begin();
410 DT = std::make_unique<DominatorTree>(
F);
414 void removeAllAssertingVHReferences(
Value *V);
417 bool eliminateMostlyEmptyBlocks(
Function &
F);
420 void eliminateMostlyEmptyBlock(
BasicBlock *BB);
425 bool optimizeInst(
Instruction *
I, ModifyDT &ModifiedDT);
429 bool optimizeInlineAsmInst(
CallInst *CS);
433 bool optimizeLoadExt(
LoadInst *Load);
439 bool optimizeSwitchPhiConstants(
SwitchInst *SI);
441 bool optimizeExtractElementInst(
Instruction *Inst);
442 bool dupRetToEnableTailCallOpts(
BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool tryToPromoteExts(TypePromotionTransaction &TPT,
451 unsigned CreatedInstsCost = 0);
453 bool splitLargeGEPOffsets();
457 bool performAddressTypePromotion(
458 Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
459 bool HasPromoted, TypePromotionTransaction &TPT,
461 bool splitBranchCondition(
Function &
F, ModifyDT &ModifiedDT);
467 bool optimizeCmp(
CmpInst *Cmp, ModifyDT &ModifiedDT);
468 bool combineToUSubWithOverflow(
CmpInst *Cmp, ModifyDT &ModifiedDT);
469 bool combineToUAddWithOverflow(
CmpInst *Cmp, ModifyDT &ModifiedDT);
475char CodeGenPrepare::ID = 0;
478 "Optimize for code generation",
false,
false)
490bool CodeGenPrepare::runOnFunction(
Function &
F) {
494 DL = &
F.getParent()->getDataLayout();
496 bool EverMadeChange =
false;
498 InsertedInsts.clear();
499 PromotedInsts.clear();
503 SubtargetInfo =
TM->getSubtargetImpl(
F);
504 TLI = SubtargetInfo->getTargetLowering();
505 TRI = SubtargetInfo->getRegisterInfo();
506 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
507 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
508 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
511 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
512 BBSectionsProfileReader =
513 getAnalysisIfAvailable<BasicBlockSectionsProfileReader>();
514 OptSize =
F.hasOptSize();
519 F.setSectionPrefix(
"hot");
524 if (
F.hasFnAttribute(Attribute::Hot) ||
526 F.setSectionPrefix(
"hot");
531 F.hasFnAttribute(Attribute::Cold))
532 F.setSectionPrefix(
"unlikely");
535 F.setSectionPrefix(
"unknown");
544 while (BB !=
nullptr) {
558 EverMadeChange |= eliminateAssumptions(
F);
562 EverMadeChange |= eliminateMostlyEmptyBlocks(
F);
564 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
566 EverMadeChange |= splitBranchCondition(
F, ModifiedDT);
577 bool MadeChange =
true;
578 bool FuncIterated =
false;
584 if (FuncIterated && !FreshBBs.
contains(&BB))
587 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
590 MadeChange |= Changed;
603 else if (FuncIterated)
606 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
611 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
616 FuncIterated = IsHugeFunc;
619 MadeChange |= mergeSExts(
F);
620 if (!LargeOffsetGEPMap.
empty())
621 MadeChange |= splitLargeGEPOffsets();
622 MadeChange |= optimizePhiTypes(
F);
625 eliminateFallThrough(
F);
631 EverMadeChange |= MadeChange;
632 SeenChainsForSExt.
clear();
633 ValToSExtendedUses.clear();
634 RemovedInsts.clear();
635 LargeOffsetGEPMap.
clear();
636 LargeOffsetGEPID.
clear();
660 MadeChange |= !WorkList.
empty();
661 while (!WorkList.
empty()) {
674 if (EverMadeChange || MadeChange)
675 MadeChange |= eliminateFallThrough(
F);
677 EverMadeChange |= MadeChange;
684 if (
auto *SP = dyn_cast<GCStatepointInst>(&
I))
686 for (
auto &
I : Statepoints)
687 EverMadeChange |= simplifyOffsetableRelocate(*
I);
692 EverMadeChange |= placeDbgValues(
F);
693 EverMadeChange |= placePseudoProbes(
F);
700 return EverMadeChange;
703bool CodeGenPrepare::eliminateAssumptions(
Function &
F) {
704 bool MadeChange =
false;
706 CurInstIterator = BB.begin();
707 while (CurInstIterator != BB.end()) {
709 if (
auto *Assume = dyn_cast<AssumeInst>(
I)) {
711 Value *Operand = Assume->getOperand(0);
712 Assume->eraseFromParent();
714 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
725void CodeGenPrepare::removeAllAssertingVHReferences(
Value *V) {
726 LargeOffsetGEPMap.
erase(V);
727 NewGEPBases.
erase(V);
729 auto GEP = dyn_cast<GetElementPtrInst>(V);
735 auto VecI = LargeOffsetGEPMap.
find(
GEP->getPointerOperand());
736 if (VecI == LargeOffsetGEPMap.
end())
739 auto &GEPVector = VecI->second;
742 if (GEPVector.empty())
743 LargeOffsetGEPMap.
erase(VecI);
752 NewBFI.verifyMatch(*BFI);
758bool CodeGenPrepare::eliminateFallThrough(
Function &
F) {
759 bool Changed =
false;
768 for (
auto &Block : Blocks) {
769 auto *BB = cast_or_null<BasicBlock>(Block);
777 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
781 if (Term && !
Term->isConditional()) {
791 FreshBBs.
insert(SinglePred);
799 for (
const auto &Pred : Preds)
800 if (
auto *BB = cast_or_null<BasicBlock>(Pred))
816 if (BBI != BB->
begin()) {
818 while (isa<DbgInfoIntrinsic>(BBI)) {
819 if (BBI == BB->
begin())
823 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
832 if (!canMergeBlocks(BB, DestBB))
842bool CodeGenPrepare::eliminateMostlyEmptyBlocks(
Function &
F) {
845 while (!LoopList.empty()) {
846 Loop *
L = LoopList.pop_back_val();
849 Preheaders.
insert(Preheader);
852 bool MadeChange =
false;
860 for (
auto &Block : Blocks) {
861 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
864 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
866 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.
count(BB)))
869 eliminateMostlyEmptyBlock(BB);
875bool CodeGenPrepare::isMergingEmptyBlockProfitable(
BasicBlock *BB,
891 if (
auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator()))
892 for (
unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
893 if (DestBB == CBI->getSuccessor(i))
924 if (!isa<PHINode>(DestBB->
begin()))
932 if (DestBBPred == BB)
936 return DestPN.getIncomingValueForBlock(BB) ==
937 DestPN.getIncomingValueForBlock(DestBBPred);
939 SameIncomingValueBBs.
insert(DestBBPred);
945 if (SameIncomingValueBBs.
count(Pred))
951 for (
auto *SameValueBB : SameIncomingValueBBs)
952 if (SameValueBB->getUniquePredecessor() == Pred &&
953 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
954 BBFreq +=
BFI->getBlockFreq(SameValueBB);
963bool CodeGenPrepare::canMergeBlocks(
const BasicBlock *BB,
971 if (UI->
getParent() != DestBB || !isa<PHINode>(UI))
977 if (
const PHINode *UPN = dyn_cast<PHINode>(UI))
978 for (
unsigned I = 0,
E = UPN->getNumIncomingValues();
I !=
E; ++
I) {
980 if (
Insn &&
Insn->getParent() == BB &&
981 Insn->getParent() != UPN->getIncomingBlock(
I))
991 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->
begin());
997 if (
const PHINode *BBPN = dyn_cast<PHINode>(BB->
begin())) {
999 for (
unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1000 BBPreds.
insert(BBPN->getIncomingBlock(i));
1008 if (BBPreds.
count(Pred)) {
1010 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1011 const Value *
V2 = PN.getIncomingValueForBlock(BB);
1014 if (
const PHINode *V2PN = dyn_cast<PHINode>(V2))
1015 if (V2PN->getParent() == BB)
1016 V2 = V2PN->getIncomingValueForBlock(Pred);
1032 auto *OldI = dyn_cast<Instruction>(Old);
1046void CodeGenPrepare::eliminateMostlyEmptyBlock(
BasicBlock *BB) {
1056 if (SinglePred != DestBB) {
1057 assert(SinglePred == BB &&
1058 "Single predecessor not the same as predecessor");
1067 FreshBBs.
insert(SinglePred);
1068 FreshBBs.
erase(DestBB);
1078 Value *InVal = PN.removeIncomingValue(BB,
false);
1082 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1083 if (InValPhi && InValPhi->
getParent() == BB) {
1092 for (
unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1093 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1096 PN.addIncoming(InVal, Pred);
1120 for (
auto *ThisRelocate : AllRelocateCalls) {
1121 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1122 ThisRelocate->getDerivedPtrIndex());
1123 RelocateIdxMap.
insert(std::make_pair(K, ThisRelocate));
1125 for (
auto &Item : RelocateIdxMap) {
1126 std::pair<unsigned, unsigned> Key = Item.first;
1127 if (Key.first == Key.second)
1132 auto BaseKey = std::make_pair(Key.first, Key.first);
1135 auto MaybeBase = RelocateIdxMap.
find(BaseKey);
1136 if (MaybeBase == RelocateIdxMap.
end())
1141 RelocateInstMap[MaybeBase->second].push_back(
I);
1149 for (
unsigned i = 1; i <
GEP->getNumOperands(); i++) {
1151 auto *Op = dyn_cast<ConstantInt>(
GEP->getOperand(i));
1152 if (!Op || Op->getZExtValue() > 20)
1156 for (
unsigned i = 1; i <
GEP->getNumOperands(); i++)
1166 bool MadeChange =
false;
1174 &*R != RelocatedBase; ++R)
1175 if (
auto *RI = dyn_cast<GCRelocateInst>(R))
1184 "Not relocating a derived object of the original base object");
1185 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1199 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1200 if (!Derived || Derived->getPointerOperand() !=
Base)
1209 "Should always have one since it's not a terminator");
1213 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1237 Value *ActualRelocatedBase = RelocatedBase;
1238 if (RelocatedBase->
getType() !=
Base->getType()) {
1239 ActualRelocatedBase =
1240 Builder.CreateBitCast(RelocatedBase,
Base->getType());
1242 Value *Replacement =
1243 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1249 Value *ActualReplacement = Replacement;
1250 if (Replacement->
getType() != ToReplace->getType()) {
1254 ToReplace->replaceAllUsesWith(ActualReplacement);
1255 ToReplace->eraseFromParent();
1280 bool MadeChange =
false;
1282 for (
auto *U :
I.users())
1289 if (AllRelocateCalls.
size() < 2)
1296 if (RelocateInstMap.
empty())
1299 for (
auto &Item : RelocateInstMap)
1313 bool MadeChange =
false;
1316 Use &TheUse = UI.getUse();
1323 UserBB = PN->getIncomingBlock(TheUse);
1331 if (
User->isEHPad())
1341 if (UserBB == DefBB)
1345 CastInst *&InsertedCast = InsertedCasts[UserBB];
1347 if (!InsertedCast) {
1351 CI->
getType(),
"", &*InsertPt);
1356 TheUse = InsertedCast;
1380 if (
auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1382 ASC->getDestAddressSpace()))
1403 TargetLowering::TypePromoteInteger)
1406 TargetLowering::TypePromoteInteger)
1422 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1426 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1437static std::optional<std::pair<Instruction *, Constant *>>
1440 if (!L || L->getHeader() != PN->
getParent() || !L->getLoopLatch())
1441 return std::nullopt;
1444 if (!IVInc || LI->
getLoopFor(IVInc->getParent()) != L)
1445 return std::nullopt;
1449 return std::make_pair(IVInc, Step);
1450 return std::nullopt;
1454 auto *
I = dyn_cast<Instruction>(V);
1461 if (
auto *PN = dyn_cast<PHINode>(
LHS))
1463 return IVInc->first ==
I;
1467bool CodeGenPrepare::replaceMathCmpWithIntrinsic(
BinaryOperator *BO,
1475 assert(L &&
"L should not be null after isIVIncrement()");
1490 return BO->
hasOneUse() && DT.dominates(
Cmp->getParent(),
L->getLoopLatch());
1492 if (BO->
getParent() !=
Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1515 if (BO->
getOpcode() == Instruction::Add &&
1516 IID == Intrinsic::usub_with_overflow) {
1517 assert(isa<Constant>(Arg1) &&
"Unexpected input for usubo");
1526 if ((BO->
getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1531 assert(InsertPt !=
nullptr &&
"Parent block did not contain cmp or binop");
1534 Value *MathOV =
Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1535 if (BO->
getOpcode() != Instruction::Xor) {
1536 Value *Math =
Builder.CreateExtractValue(MathOV, 0,
"math");
1540 "Patterns with XOr should use the BO only in the compare");
1541 Value *OV =
Builder.CreateExtractValue(MathOV, 1,
"ov");
1543 Cmp->eraseFromParent();
1553 Value *
A = Cmp->getOperand(0), *
B = Cmp->getOperand(1);
1556 if (isa<Constant>(
A))
1569 for (
User *U :
A->users()) {
1571 Add = cast<BinaryOperator>(U);
1580bool CodeGenPrepare::combineToUAddWithOverflow(
CmpInst *Cmp,
1581 ModifyDT &ModifiedDT) {
1582 bool EdgeCase =
false;
1589 A =
Add->getOperand(0);
1590 B =
Add->getOperand(1);
1596 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1602 if (
Add->getParent() !=
Cmp->getParent() && !
Add->hasOneUse())
1605 if (!replaceMathCmpWithIntrinsic(
Add,
A,
B, Cmp,
1606 Intrinsic::uadd_with_overflow))
1610 ModifiedDT = ModifyDT::ModifyInstDT;
1614bool CodeGenPrepare::combineToUSubWithOverflow(
CmpInst *Cmp,
1615 ModifyDT &ModifiedDT) {
1618 if (isa<Constant>(
A) && isa<Constant>(
B))
1623 if (Pred == ICmpInst::ICMP_UGT) {
1625 Pred = ICmpInst::ICMP_ULT;
1630 Pred = ICmpInst::ICMP_ULT;
1635 Pred = ICmpInst::ICMP_ULT;
1637 if (Pred != ICmpInst::ICMP_ULT)
1643 Value *CmpVariableOperand = isa<Constant>(
A) ?
B :
A;
1645 for (
User *U : CmpVariableOperand->
users()) {
1648 Sub = cast<BinaryOperator>(U);
1653 const APInt *CmpC, *AddC;
1656 Sub = cast<BinaryOperator>(U);
1669 Cmp, Intrinsic::usub_with_overflow))
1673 ModifiedDT = ModifyDT::ModifyInstDT;
1694 bool MadeChange =
false;
1697 Use &TheUse = UI.getUse();
1704 if (isa<PHINode>(
User))
1712 if (UserBB == DefBB)
1716 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1722 Cmp->getOperand(0), Cmp->getOperand(1),
"",
1729 TheUse = InsertedCmp;
1735 if (Cmp->use_empty()) {
1736 Cmp->eraseFromParent();
1768 if (Pred != ICmpInst::ICMP_EQ)
1773 for (
User *U : Cmp->users()) {
1774 if (isa<BranchInst>(U))
1776 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1795 if (CmpBB != FalseBB)
1798 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1802 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1812 for (
User *U : Cmp->users()) {
1813 if (
auto *BI = dyn_cast<BranchInst>(U)) {
1818 if (
auto *
SI = dyn_cast<SelectInst>(U)) {
1821 SI->swapProfMetadata();
1830bool CodeGenPrepare::optimizeCmp(
CmpInst *Cmp, ModifyDT &ModifiedDT) {
1834 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1837 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1852 SetOfInstrs &InsertedInsts) {
1855 assert(!InsertedInsts.count(AndI) &&
1856 "Attempting to optimize already optimized and instruction");
1857 (void)InsertedInsts;
1866 if (!isa<ConstantInt>(AndI->
getOperand(0)) &&
1871 for (
auto *U : AndI->
users()) {
1875 if (!isa<ICmpInst>(
User))
1879 if (!CmpC || !CmpC->
isZero())
1894 Use &TheUse = UI.getUse();
1912 TheUse = InsertedAnd;
1928 if (!isa<TruncInst>(
User)) {
1929 if (
User->getOpcode() != Instruction::And ||
1935 if ((Cimm & (Cimm + 1)).getBoolValue())
1948 auto *TruncI = cast<TruncInst>(
User);
1949 bool MadeChange =
false;
1952 TruncE = TruncI->user_end();
1953 TruncUI != TruncE;) {
1955 Use &TruncTheUse = TruncUI.getUse();
1956 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1975 if (isa<PHINode>(TruncUser))
1980 if (UserBB == TruncUserBB)
1984 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1986 if (!InsertedShift && !InsertedTrunc) {
1990 if (ShiftI->
getOpcode() == Instruction::AShr)
1991 InsertedShift = BinaryOperator::CreateAShr(ShiftI->
getOperand(0), CI,
1994 InsertedShift = BinaryOperator::CreateLShr(ShiftI->
getOperand(0), CI,
2001 assert(TruncInsertPt != TruncUserBB->
end());
2004 TruncI->
getType(),
"", &*TruncInsertPt);
2005 InsertedTrunc->
setDebugLoc(TruncI->getDebugLoc());
2009 TruncTheUse = InsertedTrunc;
2042 bool MadeChange =
false;
2045 Use &TheUse = UI.getUse();
2051 if (isa<PHINode>(
User))
2059 if (UserBB == DefBB) {
2074 if (isa<TruncInst>(
User) &&
2087 if (!InsertedShift) {
2091 if (ShiftI->
getOpcode() == Instruction::AShr)
2092 InsertedShift = BinaryOperator::CreateAShr(ShiftI->
getOperand(0), CI,
2095 InsertedShift = BinaryOperator::CreateLShr(ShiftI->
getOperand(0), CI,
2103 TheUse = InsertedShift;
2151 if (Ty->
isVectorTy() || SizeInBits >
DL->getLargestLegalIntTypeSizeInBits())
2163 FreshBBs.
insert(CallBlock);
2171 FreshBBs.
insert(EndBlock);
2183 Op =
Builder.CreateFreeze(Op, Op->getName() +
".fr");
2185 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2201 ModifiedDT = ModifyDT::ModifyBBDT;
2205bool CodeGenPrepare::optimizeCallInst(
CallInst *CI, ModifyDT &ModifiedDT) {
2214 CurInstIterator = BB->
begin();
2221 if (optimizeInlineAsmInst(CI))
2230 for (
auto &
Arg : CI->
args()) {
2235 if (!
Arg->getType()->isPointerTy())
2238 cast<PointerType>(
Arg->getType())->getAddressSpace()),
2245 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->
getAlign() < PrefAlign &&
2264 if (!MIDestAlign || DestAlign > *MIDestAlign)
2265 MI->setDestAlignment(DestAlign);
2267 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2269 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2270 MTI->setSourceAlignment(SrcAlign);
2278 if (CI->
hasFnAttr(Attribute::Cold) && !OptSize &&
2280 for (
auto &
Arg : CI->
args()) {
2281 if (!
Arg->getType()->isPointerTy())
2283 unsigned AS =
Arg->getType()->getPointerAddressSpace();
2284 if (optimizeMemoryInst(CI,
Arg,
Arg->getType(), AS))
2293 case Intrinsic::assume:
2295 case Intrinsic::experimental_widenable_condition: {
2304 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2309 case Intrinsic::objectsize:
2311 case Intrinsic::is_constant:
2313 case Intrinsic::aarch64_stlxr:
2314 case Intrinsic::aarch64_stxr: {
2323 InsertedInsts.insert(ExtVal);
2327 case Intrinsic::launder_invariant_group:
2328 case Intrinsic::strip_invariant_group: {
2330 auto it = LargeOffsetGEPMap.
find(II);
2331 if (it != LargeOffsetGEPMap.
end()) {
2335 auto GEPs = std::move(it->second);
2336 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2337 LargeOffsetGEPMap.
erase(II);
2344 case Intrinsic::cttz:
2345 case Intrinsic::ctlz:
2349 case Intrinsic::fshl:
2350 case Intrinsic::fshr:
2351 return optimizeFunnelShift(II);
2352 case Intrinsic::dbg_assign:
2353 case Intrinsic::dbg_value:
2354 return fixupDbgValue(II);
2355 case Intrinsic::masked_gather:
2357 case Intrinsic::masked_scatter:
2364 while (!PtrOps.
empty()) {
2367 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2382 if (
Value *V = Simplifier.optimizeCall(CI, Builder)) {
2421bool CodeGenPrepare::dupRetToEnableTailCallOpts(
BasicBlock *BB,
2422 ModifyDT &ModifiedDT) {
2435 BCI = dyn_cast<BitCastInst>(V);
2439 EVI = dyn_cast<ExtractValueInst>(V);
2446 PN = dyn_cast<PHINode>(V);
2454 auto isLifetimeEndOrBitCastFor = [](
const Instruction *Inst) {
2455 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2459 if (
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2468 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2469 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2482 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2493 if (!VisitedBBs.
insert(Pred).second)
2495 if (
Instruction *
I = Pred->rbegin()->getPrevNonDebugInstruction(
true)) {
2504 bool Changed =
false;
2505 for (
auto const &TailCallBB : TailCallBBs) {
2508 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2515 BFI->getBlockFreq(BB) >=
BFI->getBlockFreq(TailCallBB));
2518 (
BFI->getBlockFreq(BB) -
BFI->getBlockFreq(TailCallBB)).getFrequency());
2519 ModifiedDT = ModifyDT::ModifyBBDT;
2542 Value *OriginalValue =
nullptr;
2543 bool InBounds =
true;
2547 BaseRegField = 0x01,
2549 BaseOffsField = 0x04,
2550 ScaledRegField = 0x08,
2552 MultipleFields = 0xff
2565 return MultipleFields;
2566 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
2567 return MultipleFields;
2570 return MultipleFields;
2573 if (InBounds != other.InBounds)
2574 return MultipleFields;
2577 unsigned Result = NoField;
2580 if (BaseGV != other.BaseGV)
2582 if (BaseOffs != other.BaseOffs)
2585 Result |= ScaledRegField;
2592 return MultipleFields;
2594 return static_cast<FieldName
>(
Result);
2615 case ScaledRegField:
2622 void SetCombinedField(FieldName
Field,
Value *V,
2628 case ExtAddrMode::BaseRegField:
2631 case ExtAddrMode::BaseGVField:
2638 case ExtAddrMode::ScaledRegField:
2649 case ExtAddrMode::BaseOffsField:
2668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2670 bool NeedPlus =
false;
2676 BaseGV->printAsOperand(
OS,
false);
2681 OS << (NeedPlus ?
" + " :
"") << BaseOffs;
2686 OS << (NeedPlus ?
" + " :
"") <<
"Base:";
2691 OS << (NeedPlus ?
" + " :
"") <<
Scale <<
"*";
2714class TypePromotionTransaction {
2718 class TypePromotionAction {
2726 TypePromotionAction(
Instruction *Inst) : Inst(Inst) {}
2728 virtual ~TypePromotionAction() =
default;
2735 virtual void undo() = 0;
2740 virtual void commit() {
2746 class InsertionHandler {
2757 bool HasPrevInstruction;
2764 if (HasPrevInstruction)
2765 Point.PrevInst = &*--It;
2772 if (HasPrevInstruction) {
2777 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2787 class InstructionMoveBefore :
public TypePromotionAction {
2789 InsertionHandler Position;
2794 : TypePromotionAction(Inst), Position(Inst) {
2795 LLVM_DEBUG(
dbgs() <<
"Do: move: " << *Inst <<
"\nbefore: " << *Before
2801 void undo()
override {
2803 Position.insert(Inst);
2808 class OperandSetter :
public TypePromotionAction {
2818 : TypePromotionAction(Inst),
Idx(
Idx) {
2820 <<
"for:" << *Inst <<
"\n"
2821 <<
"with:" << *NewVal <<
"\n");
2827 void undo()
override {
2829 <<
"for: " << *Inst <<
"\n"
2830 <<
"with: " << *Origin <<
"\n");
2837 class OperandsHider :
public TypePromotionAction {
2843 OperandsHider(
Instruction *Inst) : TypePromotionAction(Inst) {
2846 OriginalValues.
reserve(NumOpnds);
2847 for (
unsigned It = 0; It < NumOpnds; ++It) {
2859 void undo()
override {
2861 for (
unsigned It = 0, EndIt = OriginalValues.
size(); It != EndIt; ++It)
2867 class TruncBuilder :
public TypePromotionAction {
2874 TruncBuilder(
Instruction *Opnd,
Type *Ty) : TypePromotionAction(Opnd) {
2877 Val =
Builder.CreateTrunc(Opnd, Ty,
"promoted");
2882 Value *getBuiltValue() {
return Val; }
2885 void undo()
override {
2887 if (
Instruction *IVal = dyn_cast<Instruction>(Val))
2888 IVal->eraseFromParent();
2893 class SExtBuilder :
public TypePromotionAction {
2901 : TypePromotionAction(InsertPt) {
2903 Val =
Builder.CreateSExt(Opnd, Ty,
"promoted");
2908 Value *getBuiltValue() {
return Val; }
2911 void undo()
override {
2913 if (
Instruction *IVal = dyn_cast<Instruction>(Val))
2914 IVal->eraseFromParent();
2919 class ZExtBuilder :
public TypePromotionAction {
2927 : TypePromotionAction(InsertPt) {
2930 Val =
Builder.CreateZExt(Opnd, Ty,
"promoted");
2935 Value *getBuiltValue() {
return Val; }
2938 void undo()
override {
2940 if (
Instruction *IVal = dyn_cast<Instruction>(Val))
2941 IVal->eraseFromParent();
2946 class TypeMutator :
public TypePromotionAction {
2953 : TypePromotionAction(Inst), OrigTy(Inst->
getType()) {
2954 LLVM_DEBUG(
dbgs() <<
"Do: MutateType: " << *Inst <<
" with " << *NewTy
2960 void undo()
override {
2961 LLVM_DEBUG(
dbgs() <<
"Undo: MutateType: " << *Inst <<
" with " << *OrigTy
2968 class UsesReplacer :
public TypePromotionAction {
2970 struct InstructionAndIdx {
2978 : Inst(Inst),
Idx(
Idx) {}
2995 : TypePromotionAction(Inst),
New(
New) {
2996 LLVM_DEBUG(
dbgs() <<
"Do: UsersReplacer: " << *Inst <<
" with " << *New
3001 OriginalUses.
push_back(InstructionAndIdx(UserI,
U.getOperandNo()));
3012 void undo()
override {
3014 for (InstructionAndIdx &
Use : OriginalUses)
3015 Use.Inst->setOperand(
Use.Idx, Inst);
3020 for (
auto *DVI : DbgValues)
3021 DVI->replaceVariableLocationOp(New, Inst);
3026 class InstructionRemover :
public TypePromotionAction {
3028 InsertionHandler Inserter;
3032 OperandsHider Hider;
3035 UsesReplacer *Replacer =
nullptr;
3038 SetOfInstrs &RemovedInsts;
3045 InstructionRemover(
Instruction *Inst, SetOfInstrs &RemovedInsts,
3046 Value *New =
nullptr)
3047 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3048 RemovedInsts(RemovedInsts) {
3050 Replacer =
new UsesReplacer(Inst, New);
3051 LLVM_DEBUG(
dbgs() <<
"Do: InstructionRemover: " << *Inst <<
"\n");
3052 RemovedInsts.insert(Inst);
3059 ~InstructionRemover()
override {
delete Replacer; }
3063 void undo()
override {
3064 LLVM_DEBUG(
dbgs() <<
"Undo: InstructionRemover: " << *Inst <<
"\n");
3065 Inserter.insert(Inst);
3069 RemovedInsts.erase(Inst);
3077 using ConstRestorationPt =
const TypePromotionAction *;
3079 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3080 : RemovedInsts(RemovedInsts) {}
3087 void rollback(ConstRestorationPt Point);
3090 ConstRestorationPt getRestorationPoint()
const;
3126 SetOfInstrs &RemovedInsts;
3131void TypePromotionTransaction::setOperand(
Instruction *Inst,
unsigned Idx,
3133 Actions.
push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3134 Inst,
Idx, NewVal));
3137void TypePromotionTransaction::eraseInstruction(
Instruction *Inst,
3140 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3141 Inst, RemovedInsts, NewVal));
3144void TypePromotionTransaction::replaceAllUsesWith(
Instruction *Inst,
3147 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3150void TypePromotionTransaction::mutateType(
Instruction *Inst,
Type *NewTy) {
3152 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3156 std::unique_ptr<TruncBuilder>
Ptr(
new TruncBuilder(Opnd, Ty));
3158 Actions.push_back(std::move(
Ptr));
3164 std::unique_ptr<SExtBuilder>
Ptr(
new SExtBuilder(Inst, Opnd, Ty));
3166 Actions.push_back(std::move(
Ptr));
3172 std::unique_ptr<ZExtBuilder>
Ptr(
new ZExtBuilder(Inst, Opnd, Ty));
3174 Actions.push_back(std::move(
Ptr));
3178void TypePromotionTransaction::moveBefore(
Instruction *Inst,
3181 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
3185TypePromotionTransaction::ConstRestorationPt
3186TypePromotionTransaction::getRestorationPoint()
const {
3187 return !Actions.empty() ? Actions.back().get() :
nullptr;
3190bool TypePromotionTransaction::commit() {
3191 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3198void TypePromotionTransaction::rollback(
3199 TypePromotionTransaction::ConstRestorationPt Point) {
3200 while (!Actions.empty() && Point != Actions.back().get()) {
3201 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3211class AddressingModeMatcher {
3230 const SetOfInstrs &InsertedInsts;
3233 InstrToOrigTy &PromotedInsts;
3236 TypePromotionTransaction &TPT;
3239 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3243 bool IgnoreProfitability;
3251 AddressingModeMatcher(
3256 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3257 TypePromotionTransaction &TPT,
3260 : AddrModeInsts(AMI), TLI(TLI),
TRI(
TRI),
3261 DL(
MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
3262 AccessTy(AT), AddrSpace(AS), MemoryInst(
MI),
AddrMode(AM),
3263 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3264 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI),
BFI(
BFI) {
3265 IgnoreProfitability =
false;
3282 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3287 bool Success = AddressingModeMatcher(AddrModeInsts, TLI,
TRI, LI, getDTFn,
3288 AccessTy, AS, MemoryInst, Result,
3289 InsertedInsts, PromotedInsts, TPT,
3290 LargeOffsetGEP, OptSize, PSI, BFI)
3298 bool matchScaledValue(
Value *ScaleReg, int64_t Scale,
unsigned Depth);
3300 bool matchOperationAddr(
User *AddrInst,
unsigned Opcode,
unsigned Depth,
3301 bool *MovedAway =
nullptr);
3302 bool isProfitableToFoldIntoAddressingMode(
Instruction *
I,
3305 bool valueAlreadyLiveAtInst(
Value *Val,
Value *KnownLive1,
Value *KnownLive2);
3306 bool isPromotionProfitable(
unsigned NewCost,
unsigned OldCost,
3307 Value *PromotedOperand)
const;
3313class PhiNodeSetIterator {
3314 PhiNodeSet *
const Set;
3315 size_t CurrentIndex = 0;
3320 PhiNodeSetIterator(PhiNodeSet *
const Set,
size_t Start);
3322 PhiNodeSetIterator &operator++();
3338 friend class PhiNodeSetIterator;
3341 using iterator = PhiNodeSetIterator;
3356 size_t FirstValidElement = 0;
3363 if (NodeMap.insert(std::make_pair(
Ptr,
NodeList.size())).second) {
3374 if (NodeMap.erase(
Ptr)) {
3375 SkipRemovedElements(FirstValidElement);
3385 FirstValidElement = 0;
3391 if (FirstValidElement == 0)
3392 SkipRemovedElements(FirstValidElement);
3393 return PhiNodeSetIterator(
this, FirstValidElement);
3397 iterator
end() {
return PhiNodeSetIterator(
this,
NodeList.size()); }
3400 size_t size()
const {
return NodeMap.size(); }
3411 void SkipRemovedElements(
size_t &CurrentIndex) {
3412 while (CurrentIndex <
NodeList.size()) {
3413 auto it = NodeMap.find(
NodeList[CurrentIndex]);
3416 if (it != NodeMap.end() && it->second == CurrentIndex)
3423PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *
const Set,
size_t Start)
3424 : Set(Set), CurrentIndex(Start) {}
3426PHINode *PhiNodeSetIterator::operator*()
const {
3428 "PhiNodeSet access out of range");
3429 return Set->NodeList[CurrentIndex];
3432PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3434 "PhiNodeSet access out of range");
3436 Set->SkipRemovedElements(CurrentIndex);
3440bool PhiNodeSetIterator::operator==(
const PhiNodeSetIterator &RHS)
const {
3441 return CurrentIndex ==
RHS.CurrentIndex;
3444bool PhiNodeSetIterator::operator!=(
const PhiNodeSetIterator &RHS)
const {
3445 return !((*this) ==
RHS);
3451class SimplificationTracker {
3456 PhiNodeSet AllPhiNodes;
3465 auto SV = Storage.
find(V);
3466 if (SV == Storage.
end())
3476 while (!WorkList.
empty()) {
3478 if (!Visited.
insert(
P).second)
3480 if (
auto *PI = dyn_cast<Instruction>(
P))
3482 for (
auto *U : PI->users())
3485 PI->replaceAllUsesWith(V);
3486 if (
auto *
PHI = dyn_cast<PHINode>(PI))
3487 AllPhiNodes.erase(
PHI);
3488 if (
auto *
Select = dyn_cast<SelectInst>(PI))
3490 PI->eraseFromParent();
3500 while (OldReplacement !=
From) {
3502 To = dyn_cast<PHINode>(OldReplacement);
3503 OldReplacement = Get(
From);
3505 assert(To && Get(To) == To &&
"Replacement PHI node is already replaced.");
3507 From->replaceAllUsesWith(To);
3508 AllPhiNodes.erase(
From);
3509 From->eraseFromParent();
3512 PhiNodeSet &newPhiNodes() {
return AllPhiNodes; }
3514 void insertNewPhi(
PHINode *PN) { AllPhiNodes.insert(PN); }
3518 unsigned countNewPhiNodes()
const {
return AllPhiNodes.size(); }
3520 unsigned countNewSelectNodes()
const {
return AllSelectNodes.
size(); }
3522 void destroyNewNodes(
Type *CommonType) {
3525 for (
auto *
I : AllPhiNodes) {
3526 I->replaceAllUsesWith(Dummy);
3527 I->eraseFromParent();
3529 AllPhiNodes.clear();
3530 for (
auto *
I : AllSelectNodes) {
3531 I->replaceAllUsesWith(Dummy);
3532 I->eraseFromParent();
3534 AllSelectNodes.clear();
3539class AddressingModeCombiner {
3541 typedef std::pair<PHINode *, PHINode *> PHIPair;
3548 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3551 bool AllAddrModesTrivial =
true;
3554 Type *CommonType =
nullptr;
3564 : SQ(_SQ), Original(OriginalValue) {}
3576 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3579 if (AddrModes.
empty()) {
3587 ExtAddrMode::FieldName ThisDifferentField =
3588 AddrModes[0].compare(NewAddrMode);
3589 if (DifferentField == ExtAddrMode::NoField)
3590 DifferentField = ThisDifferentField;
3591 else if (DifferentField != ThisDifferentField)
3592 DifferentField = ExtAddrMode::MultipleFields;
3595 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3598 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3603 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3608 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3609 !NewAddrMode.HasBaseReg);
3626 bool combineAddrModes() {
3628 if (AddrModes.
size() == 0)
3632 if (AddrModes.
size() == 1 || DifferentField == ExtAddrMode::NoField)
3637 if (AllAddrModesTrivial)
3640 if (!addrModeCombiningAllowed())
3646 FoldAddrToValueMapping
Map;
3647 if (!initializeMap(Map))
3650 Value *CommonValue = findCommon(Map);
3652 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3653 return CommonValue !=
nullptr;
3662 bool initializeMap(FoldAddrToValueMapping &Map) {
3667 for (
auto &AM : AddrModes) {
3668 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3671 if (CommonType && CommonType !=
Type)
3674 Map[AM.OriginalValue] = DV;
3679 assert(CommonType &&
"At least one non-null value must be!");
3680 for (
auto *V : NullValue)
3708 Value *findCommon(FoldAddrToValueMapping &Map) {
3716 SimplificationTracker
ST(SQ);
3721 InsertPlaceholders(Map, TraverseOrder, ST);
3724 FillPlaceholders(Map, TraverseOrder, ST);
3727 ST.destroyNewNodes(CommonType);
3732 unsigned PhiNotMatchedCount = 0;
3734 ST.destroyNewNodes(CommonType);
3738 auto *
Result =
ST.Get(
Map.find(Original)->second);
3740 NumMemoryInstsPhiCreated +=
ST.countNewPhiNodes() + PhiNotMatchedCount;
3741 NumMemoryInstsSelectCreated +=
ST.countNewSelectNodes();
3750 PhiNodeSet &PhiNodesToMatch) {
3757 while (!WorkList.
empty()) {
3759 if (!Visited.
insert(Item).second)
3766 for (
auto *
B : Item.first->blocks()) {
3767 Value *FirstValue = Item.first->getIncomingValueForBlock(
B);
3768 Value *SecondValue = Item.second->getIncomingValueForBlock(
B);
3769 if (FirstValue == SecondValue)
3772 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3773 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3779 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3784 if (Matcher.
count({FirstPhi, SecondPhi}))
3789 if (MatchedPHIs.
insert(FirstPhi).second)
3790 Matcher.
insert({FirstPhi, SecondPhi});
3792 WorkList.
push_back({FirstPhi, SecondPhi});
3801 bool MatchPhiSet(SimplificationTracker &ST,
bool AllowNewPhiNodes,
3802 unsigned &PhiNotMatchedCount) {
3808 PhiNodeSet &PhiNodesToMatch =
ST.newPhiNodes();
3809 while (PhiNodesToMatch.size()) {
3813 WillNotMatch.
clear();
3817 bool IsMatched =
false;
3818 for (
auto &
P :
PHI->getParent()->phis()) {
3820 if (PhiNodesToMatch.count(&
P))
3822 if ((IsMatched = MatchPhiNode(
PHI, &
P, Matched, PhiNodesToMatch)))
3827 for (
auto M : Matched)
3833 for (
auto MV : Matched)
3834 ST.ReplacePhi(MV.first, MV.second);
3839 if (!AllowNewPhiNodes)
3842 PhiNotMatchedCount += WillNotMatch.
size();
3843 for (
auto *
P : WillNotMatch)
3844 PhiNodesToMatch.erase(
P);
3849 void FillPlaceholders(FoldAddrToValueMapping &Map,
3851 SimplificationTracker &ST) {
3852 while (!TraverseOrder.
empty()) {
3854 assert(
Map.contains(Current) &&
"No node to fill!!!");
3859 auto *CurrentSelect = cast<SelectInst>(Current);
3860 auto *TrueValue = CurrentSelect->getTrueValue();
3861 assert(
Map.contains(TrueValue) &&
"No True Value!");
3862 Select->setTrueValue(
ST.Get(Map[TrueValue]));
3863 auto *FalseValue = CurrentSelect->getFalseValue();
3864 assert(
Map.contains(FalseValue) &&
"No False Value!");
3865 Select->setFalseValue(
ST.Get(Map[FalseValue]));
3868 auto *
PHI = cast<PHINode>(V);
3871 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(
B);
3872 assert(
Map.contains(PV) &&
"No predecessor Value!");
3873 PHI->addIncoming(
ST.Get(Map[PV]),
B);
3876 Map[Current] =
ST.Simplify(V);
3885 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3887 SimplificationTracker &ST) {
3889 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
3890 "Address must be a Phi or Select node");
3893 while (!Worklist.
empty()) {
3896 if (
Map.contains(Current))
3902 if (
SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3906 CurrentSelect->getCondition(), Dummy, Dummy,
3907 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3911 Worklist.
push_back(CurrentSelect->getTrueValue());
3912 Worklist.
push_back(CurrentSelect->getFalseValue());
3915 PHINode *CurrentPhi = cast<PHINode>(Current);
3920 ST.insertNewPhi(
PHI);
3926 bool addrModeCombiningAllowed() {
3929 switch (DifferentField) {
3932 case ExtAddrMode::BaseRegField:
3934 case ExtAddrMode::BaseGVField:
3936 case ExtAddrMode::BaseOffsField:
3938 case ExtAddrMode::ScaledRegField:
3948bool AddressingModeMatcher::matchScaledValue(
Value *ScaleReg, int64_t Scale,
3953 return matchAddr(ScaleReg,
Depth);
3968 TestAddrMode.
Scale += Scale;
3983 Value *AddLHS =
nullptr;
3984 if (isa<Instruction>(ScaleReg) &&
3987 TestAddrMode.InBounds =
false;
3994 AddrModeInsts.
push_back(cast<Instruction>(ScaleReg));
4004 auto GetConstantStep =
4005 [
this](
const Value *
V) -> std::optional<std::pair<Instruction *, APInt>> {
4006 auto *PN = dyn_cast<PHINode>(V);
4008 return std::nullopt;
4011 return std::nullopt;
4018 if (
auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4019 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4020 return std::nullopt;
4021 if (
auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4022 return std::make_pair(IVInc->first, ConstantStep->getValue());
4023 return std::nullopt;
4038 if (
auto IVStep = GetConstantStep(ScaleReg)) {
4045 APInt Step = IVStep->second;
4047 if (
Offset.isSignedIntN(64)) {
4048 TestAddrMode.InBounds =
false;
4050 TestAddrMode.BaseOffs -=
Offset.getLimitedValue();
4055 getDTFn().
dominates(IVInc, MemoryInst)) {
4056 AddrModeInsts.
push_back(cast<Instruction>(IVInc));
4075 switch (
I->getOpcode()) {
4076 case Instruction::BitCast:
4077 case Instruction::AddrSpaceCast:
4079 if (
I->getType() ==
I->getOperand(0)->getType())
4081 return I->getType()->isIntOrPtrTy();
4082 case Instruction::PtrToInt:
4085 case Instruction::IntToPtr:
4088 case Instruction::Add:
4090 case Instruction::Mul:
4091 case Instruction::Shl:
4093 return isa<ConstantInt>(
I->getOperand(1));
4094 case Instruction::GetElementPtr:
4107 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4122class TypePromotionHelper {
4125 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4127 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4128 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4129 if (It != PromotedInsts.end()) {
4132 if (It->second.getInt() == ExtTy)
4138 ExtTy = BothExtension;
4140 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->
getType(), ExtTy);
4147 static const Type *getOrigType(
const InstrToOrigTy &PromotedInsts,
4149 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4150 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4151 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4152 return It->second.getPointer();
4167 static bool canGetThrough(
const Instruction *Inst,
Type *ConsideredExtType,
4168 const InstrToOrigTy &PromotedInsts,
bool IsSExt);
4172 static bool shouldExtOperand(
const Instruction *Inst,
int OpIdx) {
4173 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4185 static Value *promoteOperandForTruncAndAnyExt(
4187 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4201 TypePromotionTransaction &TPT,
4202 InstrToOrigTy &PromotedInsts,
4203 unsigned &CreatedInstsCost,
4209 static Value *signExtendOperandForOther(
4211 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4214 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4215 Exts, Truncs, TLI,
true);
4219 static Value *zeroExtendOperandForOther(
4221 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4224 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4225 Exts, Truncs, TLI,
false);
4231 InstrToOrigTy &PromotedInsts,
4232 unsigned &CreatedInstsCost,
4248 const InstrToOrigTy &PromotedInsts);
4253bool TypePromotionHelper::canGetThrough(
const Instruction *Inst,
4254 Type *ConsideredExtType,
4255 const InstrToOrigTy &PromotedInsts,
4264 if (isa<ZExtInst>(Inst))
4268 if (IsSExt && isa<SExtInst>(Inst))
4273 if (
const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4274 if (isa<OverflowingBinaryOperator>(BinOp) &&
4275 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4276 (IsSExt && BinOp->hasNoSignedWrap())))
4280 if ((Inst->
getOpcode() == Instruction::And ||
4285 if (Inst->
getOpcode() == Instruction::Xor) {
4287 if (
const auto *Cst = dyn_cast<ConstantInt>(Inst->
getOperand(1)))
4288 if (!Cst->getValue().isAllOnes())
4297 if (Inst->
getOpcode() == Instruction::LShr && !IsSExt)
4306 const auto *ExtInst = cast<const Instruction>(*Inst->
user_begin());
4307 if (ExtInst->hasOneUse()) {
4308 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4309 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4310 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4320 if (!isa<TruncInst>(Inst))
4334 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4342 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4345 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4355TypePromotionHelper::Action TypePromotionHelper::getAction(
4356 Instruction *Ext,
const SetOfInstrs &InsertedInsts,
4358 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4359 "Unexpected instruction type");
4360 Instruction *ExtOpnd = dyn_cast<Instruction>(
Ext->getOperand(0));
4362 bool IsSExt = isa<SExtInst>(Ext);
4366 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4372 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4377 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4378 isa<ZExtInst>(ExtOpnd))
4379 return promoteOperandForTruncAndAnyExt;
4385 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4388Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4390 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4396 Value *ExtVal = SExt;
4397 bool HasMergedNonFreeExt =
false;
4398 if (isa<ZExtInst>(SExtOpnd)) {
4401 HasMergedNonFreeExt = !TLI.
isExtFree(SExtOpnd);
4405 TPT.eraseInstruction(SExt);
4410 TPT.setOperand(SExt, 0, SExtOpnd->
getOperand(0));
4412 CreatedInstsCost = 0;
4416 TPT.eraseInstruction(SExtOpnd);
4419 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4424 CreatedInstsCost = !TLI.
isExtFree(ExtInst) && !HasMergedNonFreeExt;
4432 TPT.eraseInstruction(ExtInst, NextVal);
4436Value *TypePromotionHelper::promoteOperandForOther(
4438 InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
4445 CreatedInstsCost = 0;
4451 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->
getType());
4452 if (
Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4454 ITrunc->moveAfter(ExtOpnd);
4459 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4462 TPT.setOperand(Ext, 0, ExtOpnd);
4472 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4474 TPT.mutateType(ExtOpnd,
Ext->getType());
4476 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4481 for (
int OpIdx = 0, EndOpIdx = ExtOpnd->
getNumOperands(); OpIdx != EndOpIdx;
4485 !shouldExtOperand(ExtOpnd, OpIdx)) {
4491 if (
const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4493 unsigned BitWidth =
Ext->getType()->getIntegerBitWidth();
4500 if (isa<UndefValue>(Opnd)) {
4511 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd,
Ext->getType())
4512 : TPT.createZExt(Ext, Opnd,
Ext->getType());
4513 if (!isa<Instruction>(ValForExtOpnd)) {
4514 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4517 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
4521 TPT.setOperand(ExtForOpnd, 0, Opnd);
4524 TPT.moveBefore(ExtForOpnd, ExtOpnd);
4525 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
4526 CreatedInstsCost += !TLI.
isExtFree(ExtForOpnd);
4528 ExtForOpnd =
nullptr;
4530 if (ExtForOpnd == Ext) {
4532 TPT.eraseInstruction(Ext);
4545bool AddressingModeMatcher::isPromotionProfitable(
4546 unsigned NewCost,
unsigned OldCost,
Value *PromotedOperand)
const {
4547 LLVM_DEBUG(
dbgs() <<
"OldCost: " << OldCost <<
"\tNewCost: " << NewCost
4552 if (NewCost > OldCost)
4554 if (NewCost < OldCost)
4573bool AddressingModeMatcher::matchOperationAddr(
User *AddrInst,
unsigned Opcode,
4585 case Instruction::PtrToInt:
4588 case Instruction::IntToPtr: {
4596 case Instruction::BitCast:
4606 case Instruction::AddrSpaceCast: {
4614 case Instruction::Add: {
4617 unsigned OldSize = AddrModeInsts.
size();
4622 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4623 TPT.getRestorationPoint();
4632 AddrModeInsts.
resize(OldSize);
4633 TPT.rollback(LastKnownGood);
4642 AddrModeInsts.
resize(OldSize);
4643 TPT.rollback(LastKnownGood);
4649 case Instruction::Mul:
4650 case Instruction::Shl: {
4654 if (!RHS ||
RHS->getBitWidth() > 64)
4656 int64_t Scale = Opcode == Instruction::Shl
4657 ? 1LL <<
RHS->getLimitedValue(
RHS->getBitWidth() - 1)
4658 :
RHS->getSExtValue();
4662 case Instruction::GetElementPtr: {
4665 int VariableOperand = -1;
4666 unsigned VariableScale = 0;
4668 int64_t ConstantOffset = 0;
4670 for (
unsigned i = 1, e = AddrInst->
getNumOperands(); i != e; ++i, ++GTI) {
4674 cast<ConstantInt>(AddrInst->
getOperand(i))->getZExtValue();
4684 dyn_cast<ConstantInt>(AddrInst->
getOperand(i))) {
4692 if (VariableOperand != -1)
4696 VariableOperand = i;
4704 if (VariableOperand == -1) {
4705 AddrMode.BaseOffs += ConstantOffset;
4706 if (ConstantOffset == 0 ||
4710 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4716 ConstantOffset > 0) {
4722 auto *BaseI = dyn_cast<Instruction>(
Base);
4723 auto *
GEP = cast<GetElementPtrInst>(AddrInst);
4724 if (isa<Argument>(
Base) || isa<GlobalValue>(
Base) ||
4725 (BaseI && !isa<CastInst>(BaseI) &&
4726 !isa<GetElementPtrInst>(BaseI))) {
4730 BaseI ? BaseI->
getParent() : &
GEP->getFunction()->getEntryBlock();
4732 LargeOffsetGEP = std::make_pair(
GEP, ConstantOffset);
4735 AddrMode.BaseOffs -= ConstantOffset;
4741 unsigned OldSize = AddrModeInsts.
size();
4744 AddrMode.BaseOffs += ConstantOffset;
4745 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4753 AddrModeInsts.
resize(OldSize);
4761 if (!matchScaledValue(AddrInst->
getOperand(VariableOperand), VariableScale,
4766 AddrModeInsts.
resize(OldSize);
4771 AddrMode.BaseOffs += ConstantOffset;
4772 if (!matchScaledValue(AddrInst->
getOperand(VariableOperand),
4773 VariableScale,
Depth)) {
4776 AddrModeInsts.
resize(OldSize);
4783 case Instruction::SExt:
4784 case Instruction::ZExt: {
4791 TypePromotionHelper::Action TPH =
4792 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4796 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4797 TPT.getRestorationPoint();
4798 unsigned CreatedInstsCost = 0;
4800 Value *PromotedOperand =
4801 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost,
nullptr,
nullptr, TLI);
4816 assert(PromotedOperand &&
4817 "TypePromotionHelper should have filtered out those cases");
4820 unsigned OldSize = AddrModeInsts.
size();
4822 if (!matchAddr(PromotedOperand,
Depth) ||
4827 !isPromotionProfitable(CreatedInstsCost,
4828 ExtCost + (AddrModeInsts.
size() - OldSize),
4831 AddrModeInsts.
resize(OldSize);
4832 LLVM_DEBUG(
dbgs() <<
"Sign extension does not pay off: rollback\n");
4833 TPT.rollback(LastKnownGood);
4847bool AddressingModeMatcher::matchAddr(
Value *
Addr,
unsigned Depth) {
4850 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4851 TPT.getRestorationPoint();
4870 unsigned OldSize = AddrModeInsts.
size();
4873 bool MovedAway =
false;
4874 if (matchOperationAddr(
I,
I->getOpcode(),
Depth, &MovedAway)) {
4882 if (
I->hasOneUse() ||
4883 isProfitableToFoldIntoAddressingMode(
I, BackupAddrMode,
AddrMode)) {
4890 AddrModeInsts.
resize(OldSize);
4891 TPT.rollback(LastKnownGood);
4894 if (matchOperationAddr(CE,
CE->getOpcode(),
Depth))
4896 TPT.rollback(LastKnownGood);
4897 }
else if (isa<ConstantPointerNull>(
Addr)) {
4923 TPT.rollback(LastKnownGood);
4942 if (OpInfo.CallOperandVal == OpVal &&
4944 !OpInfo.isIndirect))
4964 if (!ConsideredInsts.
insert(
I).second)
4972 for (
Use &U :
I->uses()) {
4978 Instruction *UserI = cast<Instruction>(U.getUser());
4979 if (
LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4980 MemoryUses.push_back({U.get(), LI->getType()});
4987 MemoryUses.push_back({U.get(),
SI->getValueOperand()->getType()});
4994 MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
5001 MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
5005 if (
CallInst *CI = dyn_cast<CallInst>(UserI)) {
5006 if (CI->hasFnAttr(Attribute::Cold)) {
5015 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5026 PSI, BFI, SeenInsts))
5037bool AddressingModeMatcher::valueAlreadyLiveAtInst(
Value *Val,
5039 Value *KnownLive2) {
5041 if (Val ==
nullptr || Val == KnownLive1 || Val == KnownLive2)
5045 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5051 if (
AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5082bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5084 if (IgnoreProfitability)
5102 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.
BaseReg, AMBefore.
ScaledReg))
5103 ScaledReg =
nullptr;
5107 if (!BaseReg && !ScaledReg)
5130 for (
const std::pair<Value *, Type *> &Pair : MemoryUses) {
5132 Type *AddressAccessTy = Pair.second;
5133 unsigned AS =
Address->getType()->getPointerAddressSpace();
5139 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(
nullptr,
5141 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5142 TPT.getRestorationPoint();
5143 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI,
TRI, LI, getDTFn,
5144 AddressAccessTy, AS, MemoryInst, Result,
5145 InsertedInsts, PromotedInsts, TPT,
5146 LargeOffsetGEP, OptSize, PSI, BFI);
5147 Matcher.IgnoreProfitability =
true;
5148 bool Success = Matcher.matchAddr(Address, 0);
5155 TPT.rollback(LastKnownGood);
5161 MatchedAddrModeInsts.
clear();