82#ifdef EXPENSIVE_CHECKS
88#define DEBUG_TYPE "dfa-jump-threading"
90STATISTIC(NumTransforms,
"Number of transformations done");
92STATISTIC(NumPaths,
"Number of individual paths threaded");
97 cl::desc(
"View the CFG before DFA Jump Threading"),
101 "dfa-early-exit-heuristic",
102 cl::desc(
"Exit early if an unpredictable value come from the same loop"),
106 "dfa-max-path-length",
107 cl::desc(
"Max number of blocks searched to find a threading path"),
111 "dfa-max-num-visited-paths",
113 "Max number of blocks visited while enumerating paths around a switch"),
118 cl::desc(
"Max number of paths enumerated around a switch"),
123 cl::desc(
"Maximum cost accepted for the transformation"),
127 "dfa-max-cloned-rate",
129 "Maximum cloned instructions rate accepted for the transformation"),
134 cl::desc(
"Maximum unduplicated blocks with outer uses "
135 "accepted for the transformation"),
143class SelectInstToUnfold {
150 SelectInst *getInst() {
return SI; }
151 PHINode *getUse() {
return SIUse; }
153 explicit operator bool()
const {
return SI && SIUse; }
156class DFAJumpThreading {
158 DFAJumpThreading(AssumptionCache *AC, DomTreeUpdater *DTU, LoopInfo *LI,
159 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
160 : AC(AC), DTU(DTU), LI(LI), TTI(TTI), ORE(ORE) {}
162 bool run(Function &
F);
170 while (!
Stack.empty()) {
171 SelectInstToUnfold SIToUnfold =
Stack.pop_back_val();
173 std::vector<SelectInstToUnfold> NewSIsToUnfold;
174 std::vector<BasicBlock *> NewBBs;
175 unfold(DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
182 static void unfold(DomTreeUpdater *DTU, LoopInfo *LI,
183 SelectInstToUnfold SIToUnfold,
184 std::vector<SelectInstToUnfold> *NewSIsToUnfold,
185 std::vector<BasicBlock *> *NewBBs);
190 TargetTransformInfo *TTI;
191 OptimizationRemarkEmitter *ORE;
203 SelectInstToUnfold SIToUnfold,
204 std::vector<SelectInstToUnfold> *NewSIsToUnfold,
205 std::vector<BasicBlock *> *NewBBs) {
206 SelectInst *
SI = SIToUnfold.getInst();
207 PHINode *SIUse = SIToUnfold.getUse();
212 if (UncondBrInst *StartBlockTerm =
217 SI->getContext(), Twine(
SI->getName(),
".si.unfold.false"),
219 NewBBs->push_back(NewBlock);
221 DTU->
applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}});
228 Value *SIOp1 =
SI->getTrueValue();
229 Value *SIOp2 =
SI->getFalseValue();
232 Twine(SIOp2->
getName(),
".si.unfold.phi"),
237 for (PHINode &Phi : EndBlock->
phis()) {
240 Phi.addIncoming(
Phi.getIncomingValueForBlock(StartBlock), NewBlock);
249 Twine(
SI->getName(),
".si.unfold.phi"),
252 if (Pred != StartBlock && Pred != NewBlock)
263 NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse));
265 NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi));
268 StartBlockTerm->eraseFromParent();
272 BI->setMetadata(LLVMContext::MD_prof,
273 SI->getMetadata(LLVMContext::MD_prof));
274 DTU->
applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}});
278 SI->getContext(), Twine(
SI->getName(),
".si.unfold.true"),
281 SI->getContext(), Twine(
SI->getName(),
".si.unfold.false"),
284 NewBBs->push_back(NewBlockT);
285 NewBBs->push_back(NewBlockF);
310 BI->setMetadata(LLVMContext::MD_prof,
311 SI->getMetadata(LLVMContext::MD_prof));
312 DTU->
applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF},
313 {DominatorTree::Insert, NewBlockT, EndBlock},
314 {DominatorTree::Insert, NewBlockF, EndBlock}});
329 NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT));
331 NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF));
338 for (PHINode &Phi : EndBlock->
phis()) {
341 Phi.addIncoming(
Phi.getIncomingValueForBlock(StartBlock), NewBlockT);
342 Phi.addIncoming(
Phi.getIncomingValueForBlock(StartBlock), NewBlockF);
343 Phi.removeIncomingValue(StartBlock);
349 unsigned SuccNum = CondBr->
getSuccessor(1) == EndBlock ? 1 : 0;
351 DTU->
applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock},
352 {DominatorTree::Insert, StartBlock, NewBlockT}});
357 for (BasicBlock *NewBB : *NewBBs)
358 L->addBasicBlockToLoop(NewBB, *LI);
362 assert(
SI->use_empty() &&
"Select must be dead now");
363 SI->eraseFromParent();
390 OS <<
"< " <<
llvm::join(BBNames,
", ") <<
" >";
399struct ThreadingPath {
401 APInt getExitValue()
const {
return ExitVal; }
402 void setExitValue(
const ConstantInt *V) {
403 ExitVal =
V->getValue();
406 void setExitValue(
const APInt &V) {
410 bool isExitValueSet()
const {
return IsExitValSet; }
413 const BasicBlock *getDeterminatorBB()
const {
return DBB; }
414 void setDeterminator(
const BasicBlock *BB) { DBB = BB; }
417 const PathType &getPath()
const {
return Path; }
418 void setPath(
const PathType &NewPath) { Path = NewPath; }
419 void push_back(BasicBlock *BB) { Path.push_back(BB); }
420 void push_front(BasicBlock *BB) { Path.push_front(BB); }
421 void appendExcludingFirst(
const PathType &OtherPath) {
425 void print(raw_ostream &OS)
const {
433 bool IsExitValSet =
false;
437inline raw_ostream &
operator<<(raw_ostream &OS,
const ThreadingPath &TPath) {
444 MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
450 return OptimizationRemarkMissed(
DEBUG_TYPE,
"SwitchNotPredictable", SI)
451 <<
"Switch instruction is not predictable.";
456 virtual ~MainSwitch() =
default;
458 SwitchInst *getInstr()
const {
return Instr; }
469 std::deque<std::pair<Value *, BasicBlock *>> Q;
470 SmallPtrSet<Value *, 16> SeenValues;
473 Value *SICond =
SI->getCondition();
483 addToQueue(SICond,
nullptr, Q, SeenValues);
486 Value *Current = Q.front().first;
487 BasicBlock *CurrentIncomingBB = Q.front().second;
491 for (BasicBlock *IncomingBB :
Phi->blocks()) {
492 Value *Incoming =
Phi->getIncomingValueForBlock(IncomingBB);
493 addToQueue(Incoming, IncomingBB, Q, SeenValues);
497 if (!isValidSelectInst(SelI))
499 addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
500 addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
503 SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
521 <<
"\tExiting early due to unpredictability heuristic.\n");
532 void addToQueue(
Value *Val, BasicBlock *BB,
533 std::deque<std::pair<Value *, BasicBlock *>> &Q,
534 SmallPtrSet<Value *, 16> &SeenValues) {
535 if (SeenValues.
insert(Val).second)
536 Q.push_back({Val, BB});
539 bool isValidSelectInst(SelectInst *SI) {
540 if (!
SI->hasOneUse())
565 for (SelectInstToUnfold SIToUnfold : SelectInsts) {
566 SelectInst *PrevSI = SIToUnfold.getInst();
576 SwitchInst *Instr =
nullptr;
580struct AllSwitchPaths {
581 AllSwitchPaths(
const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
582 LoopInfo *LI, Loop *L)
583 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->
getParent()), ORE(ORE),
584 LI(LI), SwitchOuterLoop(
L) {}
586 std::vector<ThreadingPath> &getThreadingPaths() {
return TPaths; }
587 unsigned getNumThreadingPaths() {
return TPaths.size(); }
588 SwitchInst *getSwitchInst() {
return Switch; }
589 BasicBlock *getSwitchBlock() {
return SwitchBlock; }
599 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
600 std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef,
603 unsigned PathsLimit) {
604 std::vector<ThreadingPath> Res;
605 auto *PhiBB =
Phi->getParent();
609 for (
auto *IncomingBB :
Phi->blocks()) {
610 if (Res.size() >= PathsLimit)
612 if (!UniqueBlocks.
insert(IncomingBB).second)
614 if (!SwitchOuterLoop->
contains(IncomingBB))
617 Value *IncomingValue =
Phi->getIncomingValueForBlock(IncomingBB);
621 if (PhiBB == SwitchBlock &&
624 ThreadingPath NewPath;
625 NewPath.setDeterminator(PhiBB);
626 NewPath.setExitValue(
C);
628 if (IncomingBB != SwitchBlock) {
632 NewPath.push_back(IncomingBB);
634 NewPath.push_back(PhiBB);
635 Res.push_back(NewPath);
639 if (VB.
contains(IncomingBB) || IncomingBB == SwitchBlock)
645 auto *IncomingPhiDefBB = IncomingPhi->getParent();
646 if (!StateDef.contains(IncomingPhiDefBB))
650 if (IncomingPhiDefBB == IncomingBB) {
651 assert(PathsLimit > Res.size());
652 std::vector<ThreadingPath> PredPaths = getPathsFromStateDefMap(
653 StateDef, IncomingPhi, VB, PathsLimit - Res.size());
654 for (ThreadingPath &Path : PredPaths) {
655 Path.push_back(PhiBB);
656 Res.push_back(std::move(Path));
666 assert(PathsLimit > Res.size());
667 auto InterPathLimit = PathsLimit - Res.size();
668 IntermediatePaths = paths(IncomingPhiDefBB, IncomingBB, VB,
670 if (IntermediatePaths.empty())
673 assert(InterPathLimit >= IntermediatePaths.size());
674 auto PredPathLimit = InterPathLimit / IntermediatePaths.size();
675 std::vector<ThreadingPath> PredPaths =
676 getPathsFromStateDefMap(StateDef, IncomingPhi, VB, PredPathLimit);
677 for (
const ThreadingPath &Path : PredPaths) {
678 for (
const PathType &IPath : IntermediatePaths) {
679 ThreadingPath NewPath(Path);
680 NewPath.appendExcludingFirst(IPath);
681 NewPath.push_back(PhiBB);
682 Res.push_back(NewPath);
691 unsigned PathDepth,
unsigned PathsLimit) {
697 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"MaxPathLengthReached",
699 <<
"Exploration stopped after visiting MaxPathLength="
716 SmallPtrSet<BasicBlock *, 4> Successors;
718 if (Res.size() >= PathsLimit)
720 if (!Successors.
insert(Succ).second)
725 Res.push_back({BB, ToBB});
735 if (Succ == CurrLoop->getHeader())
741 assert(PathsLimit > Res.size());
743 paths(Succ, ToBB, Visited, PathDepth + 1, PathsLimit - Res.size());
758 StateDefMap getStateDefMap()
const {
760 DenseSet<const BasicBlock *> MultipleDefBBs;
762 assert(FirstDef &&
"The first definition must be a phi.");
765 Stack.push_back(FirstDef);
766 SmallPtrSet<Value *, 16> SeenValues;
768 while (!
Stack.empty()) {
769 PHINode *CurPhi =
Stack.pop_back_val();
772 auto [
_,
Inserted] = Res.try_emplace(CurDefBlock, CurPhi);
774 MultipleDefBBs.
insert(CurDefBlock);
776 SeenValues.
insert(CurPhi);
778 for (BasicBlock *IncomingBB : CurPhi->
blocks()) {
779 PHINode *IncomingPhi =
783 bool IsOutsideLoops = !SwitchOuterLoop->
contains(IncomingBB);
784 if (SeenValues.
contains(IncomingPhi) || IsOutsideLoops)
787 Stack.push_back(IncomingPhi);
797 for (
auto *BB : MultipleDefBBs) {
798 LLVM_DEBUG(
dbgs() <<
"Not a state-defining block: Multiple defs in "
807 StateDefMap StateDef = getStateDefMap();
808 if (StateDef.empty()) {
810 return OptimizationRemarkMissed(
DEBUG_TYPE,
"SwitchNotPredictable",
812 <<
"Switch instruction is not predictable.";
818 auto *SwitchPhiDefBB = SwitchPhi->getParent();
821 std::vector<ThreadingPath> PathsToPhiDef =
822 getPathsFromStateDefMap(StateDef, SwitchPhi, VB,
MaxNumPaths);
823 if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
824 TPaths = std::move(PathsToPhiDef);
829 auto PathsLimit =
MaxNumPaths / PathsToPhiDef.size();
832 paths(SwitchPhiDefBB, SwitchBlock, VB, 1, PathsLimit);
833 if (PathsToSwitchBB.empty())
836 std::vector<ThreadingPath> TempList;
837 for (
const ThreadingPath &Path : PathsToPhiDef) {
838 SmallPtrSet<BasicBlock *, 32> PathSet(
Path.getPath().begin(),
839 Path.getPath().end());
840 for (
const PathType &PathToSw : PathsToSwitchBB) {
842 [&](
const BasicBlock *BB) {
return PathSet.contains(BB); }))
844 ThreadingPath PathCopy(Path);
845 PathCopy.appendExcludingFirst(PathToSw);
846 TempList.push_back(PathCopy);
849 TPaths = std::move(TempList);
854 BasicBlock *getNextCaseSuccessor(
const APInt &NextState) {
856 if (CaseValToDest.empty()) {
857 for (
auto Case : Switch->
cases()) {
858 APInt CaseVal = Case.getCaseValue()->getValue();
859 CaseValToDest[CaseVal] = Case.getCaseSuccessor();
863 auto SuccIt = CaseValToDest.find(NextState);
871 SmallDenseMap<BasicBlock *, APInt> DestToState;
872 for (ThreadingPath &Path : TPaths) {
873 APInt NextState =
Path.getExitValue();
874 BasicBlock *Dest = getNextCaseSuccessor(NextState);
878 if (NextState != StateIt->second) {
879 LLVM_DEBUG(
dbgs() <<
"Next state in " << Path <<
" is equivalent to "
880 << StateIt->second <<
"\n");
881 Path.setExitValue(StateIt->second);
886 unsigned NumVisited = 0;
889 OptimizationRemarkEmitter *ORE;
890 std::vector<ThreadingPath> TPaths;
891 DenseMap<APInt, BasicBlock *> CaseValToDest;
893 Loop *SwitchOuterLoop;
897 TransformDFA(AllSwitchPaths *SwitchPaths, DomTreeUpdater *DTU,
898 AssumptionCache *AC, TargetTransformInfo *
TTI,
899 OptimizationRemarkEmitter *ORE,
900 SmallPtrSet<const Value *, 32> EphValues)
901 : SwitchPaths(SwitchPaths), DTU(DTU), AC(AC),
TTI(
TTI), ORE(ORE),
902 EphValues(EphValues) {}
905 if (isLegalAndProfitableToTransform()) {
906 createAllExitPaths();
918 bool isLegalAndProfitableToTransform() {
920 uint64_t NumClonedInst = 0;
921 SwitchInst *
Switch = SwitchPaths->getSwitchInst();
924 if (
Switch->getNumSuccessors() <= 1)
930 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
932 APInt NextState = TPath.getExitValue();
933 const BasicBlock *Determinator = TPath.getDeterminatorBB();
936 BasicBlock *BB = SwitchPaths->getSwitchBlock();
937 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
939 Metrics.analyzeBasicBlock(BB, *
TTI, EphValues);
940 NumClonedInst += BB->
size();
941 DuplicateMap[BB].push_back({BB, NextState});
946 if (PathBBs.front() == Determinator)
951 auto DetIt =
llvm::find(PathBBs, Determinator);
952 for (
auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
954 VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
957 Metrics.analyzeBasicBlock(BB, *
TTI, EphValues);
958 NumClonedInst += BB->
size();
959 DuplicateMap[BB].push_back({BB, NextState});
963 LLVM_DEBUG(
dbgs() <<
"DFA Jump Threading: Not jump threading, contains "
964 <<
"non-duplicatable instructions.\n");
966 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NonDuplicatableInst",
968 <<
"Contains non-duplicatable instructions.";
974 if (
Metrics.Convergence != ConvergenceKind::None) {
975 LLVM_DEBUG(
dbgs() <<
"DFA Jump Threading: Not jump threading, contains "
976 <<
"convergent instructions.\n");
978 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ConvergentInst", Switch)
979 <<
"Contains convergent instructions.";
984 if (!
Metrics.NumInsts.isValid()) {
985 LLVM_DEBUG(
dbgs() <<
"DFA Jump Threading: Not jump threading, contains "
986 <<
"instructions with invalid cost.\n");
988 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ConvergentInst", Switch)
989 <<
"Contains instructions with invalid cost.";
998 uint64_t NumOrigInst = 0;
999 uint64_t NumOuterUseBlock = 0;
1000 for (
auto *BB : DuplicateMap.
keys()) {
1001 NumOrigInst += BB->
size();
1005 if (!DuplicateMap.
count(Succ) && Succ->getSinglePredecessor())
1009 if (
double(NumClonedInst) /
double(NumOrigInst) >
MaxClonedRate) {
1010 LLVM_DEBUG(
dbgs() <<
"DFA Jump Threading: Not jump threading, too much "
1011 "instructions wll be cloned\n");
1013 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotProfitable", Switch)
1014 <<
"Too much instructions will be cloned.";
1024 LLVM_DEBUG(
dbgs() <<
"DFA Jump Threading: Not jump threading, too much "
1025 "blocks with outer uses\n");
1027 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotProfitable", Switch)
1028 <<
"Too much blocks with outer uses.";
1035 unsigned JumpTableSize = 0;
1038 if (JumpTableSize == 0) {
1042 unsigned CondBranches =
1043 APInt(32,
Switch->getNumSuccessors()).ceilLogBase2();
1044 assert(CondBranches > 0 &&
1045 "The threaded switch must have multiple branches");
1046 DuplicationCost =
Metrics.NumInsts / CondBranches;
1054 DuplicationCost =
Metrics.NumInsts / JumpTableSize;
1057 LLVM_DEBUG(
dbgs() <<
"\nDFA Jump Threading: Cost to jump thread block "
1058 << SwitchPaths->getSwitchBlock()->getName()
1059 <<
" is: " << DuplicationCost <<
"\n\n");
1062 LLVM_DEBUG(
dbgs() <<
"Not jump threading, duplication cost exceeds the "
1063 <<
"cost threshold.\n");
1065 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotProfitable", Switch)
1066 <<
"Duplication cost exceeds the cost threshold (cost="
1067 <<
ore::NV(
"Cost", DuplicationCost)
1074 return OptimizationRemark(
DEBUG_TYPE,
"JumpThreaded", Switch)
1075 <<
"Switch statement jump-threaded.";
1082 void createAllExitPaths() {
1084 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
1085 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
1089 TPath.push_front(SwitchBlock);
1096 SmallPtrSet<BasicBlock *, 16> BlocksToClean;
1099 for (
const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
1100 createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, DTU);
1106 for (
const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
1107 updateLastSuccessor(TPath, DuplicateMap, DTU);
1113 for (BasicBlock *BB : BlocksToClean)
1123 void createExitPath(
DefMap &NewDefs,
const ThreadingPath &Path,
1125 SmallPtrSet<BasicBlock *, 16> &BlocksToClean,
1126 DomTreeUpdater *DTU) {
1127 APInt NextState =
Path.getExitValue();
1132 if (PathBBs.front() == Determinator)
1133 PathBBs.pop_front();
1135 auto DetIt =
llvm::find(PathBBs, Determinator);
1138 BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt);
1139 for (
auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
1141 BlocksToClean.
insert(BB);
1145 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
1147 updatePredecessor(PrevBB, BB, NextBB, DTU);
1153 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
1154 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
1155 DuplicateMap[BB].push_back({NewBB, NextState});
1156 BlocksToClean.
insert(NewBB);
1168 SSAUpdaterBulk SSAUpdate;
1169 SmallVector<Use *, 16> UsesToRename;
1171 for (
const auto &KV : NewDefs) {
1174 std::vector<Instruction *> Cloned = KV.second;
1178 for (Use &U :
I->uses()) {
1181 if (UserPN->getIncomingBlock(U) == BB)
1183 }
else if (
User->getParent() == BB) {
1192 if (UsesToRename.
empty())
1200 unsigned VarNum = SSAUpdate.
AddVariable(
I->getName(),
I->getType());
1202 for (Instruction *New : Cloned)
1205 while (!UsesToRename.
empty())
1219 static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
1220 const APInt &NextState) {
1222 for (
auto Case :
Switch->cases()) {
1223 if (Case.getCaseValue()->getValue() == NextState) {
1224 NextCase = Case.getCaseSuccessor();
1229 NextCase =
Switch->getDefaultDest();
1237 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
1238 const APInt &NextState,
1241 DomTreeUpdater *DTU) {
1255 for (Instruction &
I : *NewBB) {
1267 updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
1268 updatePredecessor(PrevBB, BB, NewBB, DTU);
1269 updateDefMap(NewDefs, VMap);
1272 SmallPtrSet<BasicBlock *, 4> SuccSet;
1274 if (SuccSet.
insert(SuccBB).second)
1275 DTU->
applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
1285 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1288 std::vector<BasicBlock *> BlocksToUpdate;
1292 if (BB == SwitchPaths->getSwitchBlock()) {
1293 SwitchInst *
Switch = SwitchPaths->getSwitchInst();
1294 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1295 BlocksToUpdate.push_back(NextCase);
1296 BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
1298 BlocksToUpdate.push_back(ClonedSucc);
1303 BlocksToUpdate.push_back(Succ);
1308 BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
1310 BlocksToUpdate.push_back(ClonedSucc);
1317 for (BasicBlock *Succ : BlocksToUpdate) {
1318 for (PHINode &Phi : Succ->phis()) {
1319 Value *Incoming =
Phi.getIncomingValueForBlock(BB);
1322 Phi.addIncoming(Incoming, ClonedBB);
1325 Value *ClonedVal = VMap[Incoming];
1327 Phi.addIncoming(ClonedVal, ClonedBB);
1329 Phi.addIncoming(Incoming, ClonedBB);
1337 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
1338 BasicBlock *NewBB, DomTreeUpdater *DTU) {
1341 if (!isPredecessor(OldBB, PrevBB))
1351 DTU->
applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
1352 {DominatorTree::Insert, PrevBB, NewBB}});
1361 for (
auto Entry : VMap) {
1364 if (!Inst || !
Entry.second ||
1372 NewDefsVector.
push_back({Inst, Cloned});
1376 sort(NewDefsVector, [](
const auto &
LHS,
const auto &
RHS) {
1377 if (
LHS.first ==
RHS.first)
1378 return LHS.second->comesBefore(
RHS.second);
1379 return LHS.first->comesBefore(
RHS.first);
1382 for (
const auto &KV : NewDefsVector)
1383 NewDefs[KV.first].push_back(KV.second);
1391 void updateLastSuccessor(
const ThreadingPath &TPath,
1393 DomTreeUpdater *DTU) {
1394 APInt NextState = TPath.getExitValue();
1396 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1403 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1405 std::vector<DominatorTree::UpdateType> DTUpdates;
1406 SmallPtrSet<BasicBlock *, 4> SuccSet;
1407 for (BasicBlock *Succ :
successors(LastBlock)) {
1408 if (Succ != NextCase && SuccSet.
insert(Succ).second)
1409 DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
1412 Switch->eraseFromParent();
1420 void cleanPhiNodes(BasicBlock *BB) {
1425 PN.eraseFromParent();
1431 for (PHINode &Phi : BB->
phis())
1432 Phi.removeIncomingValueIf([&](
unsigned Index) {
1434 return !isPredecessor(BB, IncomingBB);
1440 BasicBlock *getClonedBB(BasicBlock *BB,
const APInt &NextState,
1446 auto It =
llvm::find_if(ClonedBBs, [NextState](
const ClonedBlock &
C) {
1447 return C.State == NextState;
1449 return It != ClonedBBs.end() ? (*It).BB :
nullptr;
1453 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1457 AllSwitchPaths *SwitchPaths;
1458 DomTreeUpdater *DTU;
1459 AssumptionCache *AC;
1460 TargetTransformInfo *
TTI;
1461 OptimizationRemarkEmitter *ORE;
1462 SmallPtrSet<const Value *, 32> EphValues;
1463 std::vector<ThreadingPath> TPaths;
1467bool DFAJumpThreading::run(Function &
F) {
1468 LLVM_DEBUG(
dbgs() <<
"\nDFA Jump threading: " <<
F.getName() <<
"\n");
1470 if (
F.hasOptSize()) {
1471 LLVM_DEBUG(
dbgs() <<
"Skipping due to the 'minsize' attribute\n");
1479 bool MadeChanges =
false;
1480 LoopInfoBroken =
false;
1482 for (BasicBlock &BB :
F) {
1488 <<
" is a candidate\n");
1489 MainSwitch
Switch(SI, LI, ORE);
1491 if (!
Switch.getInstr()) {
1493 <<
"candidate for jump threading\n");
1498 <<
"candidate for jump threading\n");
1501 unfoldSelectInstrs(
Switch.getSelectInsts());
1502 if (!
Switch.getSelectInsts().empty())
1505 AllSwitchPaths SwitchPaths(&Switch, ORE, LI,
1509 if (SwitchPaths.getNumThreadingPaths() > 0) {
1526 SmallPtrSet<const Value *, 32> EphValues;
1527 if (ThreadableLoops.
size() > 0)
1530 for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1531 TransformDFA Transform(&SwitchPaths, DTU, AC,
TTI, ORE, EphValues);
1532 if (Transform.run())
1533 MadeChanges = LoopInfoBroken =
true;
1538#ifdef EXPENSIVE_CHECKS
1544 "Failed to maintain validity of domtree!");
1559 DFAJumpThreading ThreadImpl(&AC, &DTU, &LI, &
TTI, &ORE);
1560 if (!ThreadImpl.run(
F))
1565 if (!ThreadImpl.LoopInfoBroken)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
SmallPtrSet< const BasicBlock *, 8 > VisitedBlocks
std::deque< BasicBlock * > PathType
std::vector< PathType > PathsType
MapVector< Instruction *, std::vector< Instruction * > > DefMap
std::vector< ClonedBlock > CloneList
DenseMap< BasicBlock *, CloneList > DuplicateBlockMap
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static void updateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic, SSAUpdater &SSAUpdate)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
uint64_t IntrinsicInst * II
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
BasicBlock * getSuccessor(unsigned i) const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
const LoopT * getOutermostLoop() const
Get the outermost loop in which this loop is contained.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class implements a map that also provides access to all stored values in a deterministic order.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
LLVM_ABI Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
LLVM_ABI unsigned AddVariable(StringRef Name, Type *Ty)
Add a new variable to the SSA rewriter.
LLVM_ABI void AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
LLVM_ABI void RewriteAllUses(DominatorTree *DT, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Perform all the necessary updates, including new PHI-nodes insertion and the requested uses update.
LLVM_ABI void AddUse(unsigned Var, Use *U)
Record a use of the symbolic value.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getTrueValue() const
bool erase(PtrType Ptr)
Remove pointer from the set.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void reserve(size_type N)
void push_back(const T &Elt)
BasicBlock * getDefaultDest() const
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
Analysis pass providing the TargetTransformInfo.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI std::string getNameOrAsOperand() const
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
static cl::opt< unsigned > MaxNumPaths("dfa-max-num-paths", cl::desc("Max number of paths enumerated around a switch"), cl::Hidden, cl::init(200))
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)
Check a function for errors, useful for use when debugging a pass.
auto successors(const MachineBasicBlock *BB)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto pred_size(const MachineBasicBlock *BB)
static cl::opt< bool > ClViewCfgBefore("dfa-jump-view-cfg-before", cl::desc("View the CFG before DFA Jump Threading"), cl::Hidden, cl::init(false))
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
static cl::opt< double > MaxClonedRate("dfa-max-cloned-rate", cl::desc("Maximum cloned instructions rate accepted for the transformation"), cl::Hidden, cl::init(7.5))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void sort(IteratorTy Start, IteratorTy End)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static cl::opt< unsigned > MaxNumVisitiedPaths("dfa-max-num-visited-paths", cl::desc("Max number of blocks visited while enumerating paths around a switch"), cl::Hidden, cl::init(2500))
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
static cl::opt< bool > EarlyExitHeuristic("dfa-early-exit-heuristic", cl::desc("Exit early if an unpredictable value come from the same loop"), cl::Hidden, cl::init(true))
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool VerifyDomInfo
Enables verification of dominator trees.
static cl::opt< unsigned > MaxOuterUseBlocks("dfa-max-out-use-blocks", cl::desc("Maximum unduplicated blocks with outer uses " "accepted for the transformation"), cl::Hidden, cl::init(40))
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
static cl::opt< unsigned > MaxPathLength("dfa-max-path-length", cl::desc("Max number of blocks searched to find a threading path"), cl::Hidden, cl::init(20))
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
bool pred_empty(const BasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Integrate with the new Pass Manager.