46 #define DEBUG_TYPE "select-optimize"
49 "Number of select groups considered for conversion to branch");
50 STATISTIC(NumSelectConvertedExpColdOperand,
51 "Number of select groups converted due to expensive cold operand");
53 "Number of select groups converted due to high-predictability");
55 "Number of select groups not converted due to unpredictability");
57 "Number of select groups not converted due to cold basic block");
59 "Number of select groups converted due to loop-level analysis");
60 STATISTIC(NumSelectsConverted,
"Number of selects converted");
63 "cold-operand-threshold",
64 cl::desc(
"Maximum frequency of path for an operand to be considered cold."),
68 "cold-operand-max-cost-multiplier",
69 cl::desc(
"Maximum cost multiplier of TCC_expensive for the dependence "
70 "slice of a cold operand to be considered inexpensive."),
75 cl::desc(
"Gradient gain threshold (%)."),
80 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
84 "select-opti-loop-relative-gain-threshold",
86 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
91 cl::desc(
"Default mispredict rate (initialized to 25%)."));
96 cl::desc(
"Disable loop-level heuristics."));
107 std::unique_ptr<BlockFrequencyInfo>
BFI;
108 std::unique_ptr<BranchProbabilityInfo> BPI;
154 void optimizeSelectsBase(
Function &
F, SelectGroups &ProfSIGroups);
155 void optimizeSelectsInnerLoops(
Function &
F, SelectGroups &ProfSIGroups);
159 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
162 void collectSelectGroups(
BasicBlock &
BB, SelectGroups &SIGroups);
166 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
167 SelectGroups &ProfSIGroups);
168 void findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups,
169 SelectGroups &ProfSIGroups);
183 void getExclBackwardsSlice(
Instruction *
I, std::stack<Instruction *> &Slice,
184 bool ForSinking =
false);
191 bool checkLoopHeuristics(
const Loop *L,
const CostInfo LoopDepth[2]);
195 bool computeLoopCosts(
const Loop *L,
const SelectGroups &SIGroups,
234 TSI =
TM->getSubtargetImpl(
F);
235 TLI = TSI->getTargetLowering();
245 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
246 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
247 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
250 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
251 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
252 TSchedModel.init(TSI);
258 return optimizeSelects(
F);
261 bool SelectOptimize::optimizeSelects(
Function &
F) {
263 SelectGroups ProfSIGroups;
265 optimizeSelectsBase(
F, ProfSIGroups);
267 optimizeSelectsInnerLoops(
F, ProfSIGroups);
271 convertProfitableSIGroups(ProfSIGroups);
274 return !ProfSIGroups.empty();
277 void SelectOptimize::optimizeSelectsBase(
Function &
F,
278 SelectGroups &ProfSIGroups) {
280 SelectGroups SIGroups;
283 Loop *L = LI->getLoopFor(&
BB);
286 collectSelectGroups(
BB, SIGroups);
290 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
293 void SelectOptimize::optimizeSelectsInnerLoops(
Function &
F,
294 SelectGroups &ProfSIGroups) {
297 for (
unsigned long i = 0;
i <
Loops.size(); ++
i)
299 Loops.push_back(ChildL);
305 SelectGroups SIGroups;
307 collectSelectGroups(*
BB, SIGroups);
309 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
322 DefSI = dyn_cast<SelectInst>(V)) {
323 assert(DefSI->getCondition() ==
SI->getCondition() &&
324 "The condition of DefSI does not match with SI");
325 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
327 assert(V &&
"Failed to get select true/false value");
331 void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
332 for (SelectGroup &ASI : ProfSIGroups) {
372 typedef std::stack<Instruction *>::size_type StackSizeType;
373 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
377 if (
auto *TI = dyn_cast<Instruction>(
SI->getTrueValue())) {
378 std::stack<Instruction *> TrueSlice;
379 getExclBackwardsSlice(TI, TrueSlice,
true);
380 maxTrueSliceLen =
std::max(maxTrueSliceLen, TrueSlice.size());
381 TrueSlices.push_back(TrueSlice);
383 if (
auto *FI = dyn_cast<Instruction>(
SI->getFalseValue())) {
384 std::stack<Instruction *> FalseSlice;
385 getExclBackwardsSlice(FI, FalseSlice,
true);
386 maxFalseSliceLen =
std::max(maxFalseSliceLen, FalseSlice.size());
387 FalseSlices.push_back(FalseSlice);
401 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
402 for (
auto &
S : TrueSlices) {
404 TrueSlicesInterleaved.push_back(
S.top());
409 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
410 for (
auto &
S : FalseSlices) {
412 FalseSlicesInterleaved.push_back(
S.top());
424 BFI->setBlockFreq(EndBlock,
BFI->getBlockFreq(StartBlock).getFrequency());
431 auto DIt =
SI->getIterator();
432 while (&*DIt != LastSI) {
433 if (DIt->isDebugOrPseudoInst())
434 DebugPseudoINS.push_back(&*DIt);
437 for (
auto *DI : DebugPseudoINS) {
443 BasicBlock *TrueBlock =
nullptr, *FalseBlock =
nullptr;
444 BranchInst *TrueBranch =
nullptr, *FalseBranch =
nullptr;
445 if (!TrueSlicesInterleaved.empty()) {
450 for (
Instruction *TrueInst : TrueSlicesInterleaved)
451 TrueInst->moveBefore(TrueBranch);
453 if (!FalseSlicesInterleaved.empty()) {
458 for (
Instruction *FalseInst : FalseSlicesInterleaved)
459 FalseInst->moveBefore(FalseBranch);
463 if (TrueBlock == FalseBlock) {
464 assert(TrueBlock ==
nullptr &&
465 "Unexpected basic block transform while optimizing select");
470 FalseBranch->setDebugLoc(
SI->getDebugLoc());
479 if (TrueBlock ==
nullptr) {
482 TrueBlock = StartBlock;
483 }
else if (FalseBlock ==
nullptr) {
486 FalseBlock = StartBlock;
493 IB.CreateFreeze(
SI->getCondition(),
SI->getName() +
".frozen");
494 IB.CreateCondBr(CondFr, TT, FT,
SI);
497 INS.
insert(ASI.begin(), ASI.end());
501 for (
auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
510 SI->replaceAllUsesWith(PN);
511 SI->eraseFromParent();
513 ++NumSelectsConverted;
518 void SelectOptimize::collectSelectGroups(
BasicBlock &
BB,
519 SelectGroups &SIGroups) {
521 while (BBIt !=
BB.end()) {
525 SIGroup.push_back(
SI);
526 while (BBIt !=
BB.end()) {
530 SIGroup.push_back(NSI);
541 if (!isSelectKindSupported(
SI))
544 SIGroups.push_back(SIGroup);
549 void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
550 SelectGroups &ProfSIGroups) {
551 for (SelectGroup &ASI : SIGroups) {
552 ++NumSelectOptAnalyzed;
553 if (isConvertToBranchProfitableBase(ASI))
554 ProfSIGroups.push_back(ASI);
558 void SelectOptimize::findProfitableSIGroupsInnerLoops(
559 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
560 NumSelectOptAnalyzed += SIGroups.size();
572 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
573 {Scaled64::getZero(), Scaled64::getZero()}};
574 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
575 !checkLoopHeuristics(L, LoopCost)) {
579 for (SelectGroup &ASI : SIGroups) {
582 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
584 SelectCost =
std::max(SelectCost, InstCostMap[
SI].PredCost);
585 BranchCost =
std::max(BranchCost, InstCostMap[
SI].NonPredCost);
587 if (BranchCost < SelectCost) {
589 OR <<
"Profitable to convert to branch (loop analysis). BranchCost="
590 << BranchCost.toString() <<
", SelectCost=" << SelectCost.
toString()
593 ++NumSelectConvertedLoop;
594 ProfSIGroups.push_back(ASI);
597 ORmiss <<
"Select is more profitable (loop analysis). BranchCost="
598 << BranchCost.toString()
599 <<
", SelectCost=" << SelectCost.
toString() <<
". ";
605 bool SelectOptimize::isConvertToBranchProfitableBase(
612 if (PSI->isColdBlock(
SI->getParent(),
BFI.get())) {
614 ORmiss <<
"Not converted to branch because of cold basic block. ";
620 if (
SI->getMetadata(LLVMContext::MD_unpredictable)) {
622 ORmiss <<
"Not converted to branch because of unpredictable branch. ";
629 if (isSelectHighlyPredictable(
SI) && TLI->isPredictableSelectExpensive()) {
630 ++NumSelectConvertedHighPred;
631 OR <<
"Converted to branch because of highly predictable branch. ";
638 if (hasExpensiveColdOperand(ASI)) {
639 ++NumSelectConvertedExpColdOperand;
640 OR <<
"Converted to branch because of expensive cold operand.";
645 ORmiss <<
"Not profitable to convert to branch (base heuristic).";
652 return (Numerator + (Denominator / 2)) / Denominator;
655 bool SelectOptimize::hasExpensiveColdOperand(
657 bool ColdOperand =
false;
658 uint64_t TrueWeight, FalseWeight, TotalWeight;
661 TotalWeight = TrueWeight + FalseWeight;
664 }
else if (PSI->hasProfileSummary()) {
666 ORmiss <<
"Profile data available but missing branch-weights metadata for "
667 "select instruction. ";
677 if (TrueWeight < FalseWeight) {
678 ColdI = dyn_cast<Instruction>(
SI->getTrueValue());
679 HotWeight = FalseWeight;
681 ColdI = dyn_cast<Instruction>(
SI->getFalseValue());
682 HotWeight = TrueWeight;
685 std::stack<Instruction *> ColdSlice;
686 getExclBackwardsSlice(ColdI, ColdSlice);
688 while (!ColdSlice.empty()) {
713 void SelectOptimize::getExclBackwardsSlice(
Instruction *
I,
714 std::stack<Instruction *> &Slice,
717 std::queue<Instruction *> Worklist;
719 while (!Worklist.empty()) {
724 if (!Visited.
insert(II).second)
734 isa<SelectInst>(II) || isa<PHINode>(II)))
747 if (
auto *OpI = dyn_cast<Instruction>(II->
getOperand(k)))
752 bool SelectOptimize::isSelectHighlyPredictable(
const SelectInst *
SI) {
756 uint64_t Sum = TrueWeight + FalseWeight;
766 bool SelectOptimize::checkLoopHeuristics(
const Loop *L,
767 const CostInfo LoopCost[2]) {
777 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
778 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
779 ORmissL <<
"No select conversion in the loop due to no reduction of loop's "
785 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
786 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
794 ORmissL <<
"No select conversion in the loop due to small reduction of "
795 "loop's critical path. Gain="
797 <<
", RelativeGain=" << RelativeGain.
toString() <<
"%. ";
807 if (Gain[1] > Gain[0]) {
809 (LoopCost[1].PredCost - LoopCost[0].PredCost);
811 ORmissL <<
"No select conversion in the loop due to small gradient gain. "
813 << GradientGain.
toString() <<
"%. ";
819 else if (Gain[1] < Gain[0]) {
821 <<
"No select conversion in the loop due to negative gradient gain. ";
835 bool SelectOptimize::computeLoopCosts(
836 const Loop *L,
const SelectGroups &SIGroups,
838 const auto &SIset = getSIset(SIGroups);
841 const unsigned Iterations = 2;
842 for (
unsigned Iter = 0; Iter < Iterations; ++Iter) {
844 CostInfo &MaxCost = LoopCost[Iter];
847 if (
I.isDebugOrPseudoInst())
850 Scaled64 IPredCost = Scaled64::getZero(),
851 INonPredCost = Scaled64::getZero();
856 for (
const Use &U :
I.operands()) {
857 auto UI = dyn_cast<Instruction>(U.get());
860 if (InstCostMap.
count(UI)) {
861 IPredCost =
std::max(IPredCost, InstCostMap[UI].PredCost);
862 INonPredCost =
std::max(INonPredCost, InstCostMap[UI].NonPredCost);
865 auto ILatency = computeInstCost(&
I);
868 ORmissL <<
"Invalid instruction cost preventing analysis and "
869 "optimization of the inner-most loop containing this "
883 if (SIset.contains(&
I)) {
884 auto SI = dyn_cast<SelectInst>(&
I);
886 Scaled64 TrueOpCost = Scaled64::getZero(),
887 FalseOpCost = Scaled64::getZero();
888 if (
auto *TI = dyn_cast<Instruction>(
SI->getTrueValue()))
889 if (InstCostMap.
count(TI))
890 TrueOpCost = InstCostMap[TI].NonPredCost;
891 if (
auto *FI = dyn_cast<Instruction>(
SI->getFalseValue()))
892 if (InstCostMap.
count(FI))
893 FalseOpCost = InstCostMap[FI].NonPredCost;
895 getPredictedPathCost(TrueOpCost, FalseOpCost,
SI);
897 Scaled64 CondCost = Scaled64::getZero();
898 if (
auto *CI = dyn_cast<Instruction>(
SI->getCondition()))
899 if (InstCostMap.
count(CI))
900 CondCost = InstCostMap[CI].NonPredCost;
901 Scaled64 MispredictCost = getMispredictionCost(
SI, CondCost);
903 INonPredCost = PredictedPathCost + MispredictCost;
906 InstCostMap[&
I] = {IPredCost, INonPredCost};
907 MaxCost.PredCost =
std::max(MaxCost.PredCost, IPredCost);
908 MaxCost.NonPredCost =
std::max(MaxCost.NonPredCost, INonPredCost);
916 SelectOptimize::getSIset(
const SelectGroups &SIGroups) {
918 for (
const SelectGroup &ASI : SIGroups)
933 SelectOptimize::getMispredictionCost(
const SelectInst *
SI,
935 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
942 if (isSelectHighlyPredictable(
SI))
953 return MispredictCost;
964 uint64_t SumWeight = TrueWeight + FalseWeight;
965 if (SumWeight != 0) {
980 bool SelectOptimize::isSelectKindSupported(
SelectInst *
SI) {
981 bool VectorCond = !
SI->getCondition()->getType()->isIntegerTy(1);
985 if (
SI->getType()->isVectorTy())
989 return TLI->isSelectSupported(SelectKind);