47#define DEBUG_TYPE "select-optimize"
50 "Number of select groups considered for conversion to branch");
52 "Number of select groups converted due to expensive cold operand");
54 "Number of select groups converted due to high-predictability");
56 "Number of select groups not converted due to unpredictability");
58 "Number of select groups not converted due to cold basic block");
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted,
"Number of selects converted");
64 "cold-operand-threshold",
65 cl::desc(
"Maximum frequency of path for an operand to be considered cold."),
69 "cold-operand-max-cost-multiplier",
70 cl::desc(
"Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
76 cl::desc(
"Gradient gain threshold (%)."),
81 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
92 cl::desc(
"Default mispredict rate (initialized to 25%)."));
97 cl::desc(
"Disable loop-level heuristics."));
101class SelectOptimizeImpl {
113 SelectOptimizeImpl() =
default;
118 using Scaled64 = ScaledNumber<uint64_t>;
124 Scaled64 NonPredCost;
135 bool Inverted =
false;
142 SelectLike(Instruction *I,
bool Inverted =
false,
unsigned CondIdx = 0)
143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
150 unsigned getConditionOpIndex() {
return CondIdx; };
156 Value *getTrueValue(
bool HonorInverts =
true)
const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(
false);
160 return Sel->getTrueValue();
172 Value *getFalseValue(
bool HonorInverts =
true)
const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(
false);
176 return Sel->getFalseValue();
181 return BO->getOperand(1 - CondIdx);
189 Scaled64 getOpCostOnBranch(
190 bool IsTrue,
const DenseMap<const Instruction *, CostInfo> &InstCostMap,
191 const TargetTransformInfo *
TTI) {
192 auto *
V = IsTrue ? getTrueValue() : getFalseValue();
195 auto It = InstCostMap.
find(
IV);
196 return It != InstCostMap.
end() ? It->second.NonPredCost
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
211 auto It = InstCostMap.find(OpI);
212 if (It != InstCostMap.end())
213 TotalCost += It->second.NonPredCost;
227 using SelectGroups = SmallVector<SelectGroup, 2>;
231 bool optimizeSelects(Function &
F);
237 void optimizeSelectsBase(Function &
F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(Function &
F, SelectGroups &ProfSIGroups);
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
256 bool isConvertToBranchProfitableBase(
const SelectGroup &ASI);
261 bool hasExpensiveColdOperand(
const SelectGroup &ASI);
266 void getExclBackwardsSlice(Instruction *
I, std::stack<Instruction *> &Slice,
267 Instruction *SI,
bool ForSinking =
false);
270 bool isSelectHighlyPredictable(
const SelectLike SI);
274 bool checkLoopHeuristics(
const Loop *L,
const CostInfo LoopDepth[2]);
278 bool computeLoopCosts(
const Loop *L,
const SelectGroups &SIGroups,
279 DenseMap<const Instruction *, CostInfo> &InstCostMap,
283 SmallDenseMap<const Instruction *, SelectLike, 2>
284 getSImap(
const SelectGroups &SIGroups);
288 SmallDenseMap<const Instruction *, const SelectGroup *, 2>
289 getSGmap(
const SelectGroups &SIGroups);
292 std::optional<uint64_t> computeInstCost(
const Instruction *
I);
295 Scaled64 getMispredictionCost(
const SelectLike SI,
const Scaled64 CondCost);
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
302 bool isSelectKindSupported(
const SelectLike SI);
306 SelectOptimizeImpl Impl;
311 SelectOptimize() : FunctionPass(ID) {
319 return Impl.runOnFunction(
F, *
this);
322 void getAnalysisUsage(AnalysisUsage &AU)
const override {
328 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
336 SelectOptimizeImpl Impl(TM);
337 return Impl.run(
F,
FAM);
340char SelectOptimize::ID = 0;
369 if (!
TTI->enableSelectOptimize())
373 .getCachedResult<ProfileSummaryAnalysis>(*
F.getParent());
374 assert(PSI &&
"This pass requires module analysis pass `profile-summary`!");
383 TSchedModel.
init(TSI);
404 if (!
TTI->enableSelectOptimize())
411 TSchedModel.
init(TSI);
417 return optimizeSelects(
F);
420bool SelectOptimizeImpl::optimizeSelects(
Function &
F) {
422 SelectGroups ProfSIGroups;
424 optimizeSelectsBase(
F, ProfSIGroups);
426 optimizeSelectsInnerLoops(
F, ProfSIGroups);
430 convertProfitableSIGroups(ProfSIGroups);
433 return !ProfSIGroups.empty();
436void SelectOptimizeImpl::optimizeSelectsBase(
Function &
F,
437 SelectGroups &ProfSIGroups) {
439 SelectGroups SIGroups;
443 if (L &&
L->isInnermost())
445 collectSelectGroups(BB, SIGroups);
449 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
452void SelectOptimizeImpl::optimizeSelectsInnerLoops(
Function &
F,
453 SelectGroups &ProfSIGroups) {
456 for (
unsigned long i = 0; i <
Loops.size(); ++i)
460 if (!
L->isInnermost())
463 SelectGroups SIGroups;
465 collectSelectGroups(*BB, SIGroups);
467 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
483 SelectOptimizeImpl::SelectLike &
SI,
bool isTrue,
486 Value *V = isTrue ?
SI.getTrueValue() :
SI.getFalseValue();
489 if (
auto It = OptSelects.find(
IV); It != OptSelects.end())
490 return isTrue ? It->second.first : It->second.second;
495 assert((BO->getOpcode() == Instruction::Add ||
496 BO->getOpcode() == Instruction::Or ||
497 BO->getOpcode() == Instruction::Sub) &&
498 "Only currently handling Add, Or and Sub binary operators.");
500 auto *CBO = BO->clone();
501 auto CondIdx =
SI.getConditionOpIndex();
504 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
507 "Unexpected opcode");
511 unsigned OtherIdx = 1 - CondIdx;
513 if (
auto It = OptSelects.find(
IV); It != OptSelects.end())
514 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
516 CBO->insertBefore(
B->getTerminator()->getIterator());
520void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
521 for (SelectGroup &ASI : ProfSIGroups) {
561 typedef std::stack<Instruction *>::size_type StackSizeType;
562 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
563 for (SelectLike &
SI : ASI.Selects) {
569 std::stack<Instruction *> TrueSlice;
570 getExclBackwardsSlice(TI, TrueSlice,
SI.getI(),
true);
571 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
576 std::stack<Instruction *> FalseSlice;
577 getExclBackwardsSlice(FI, FalseSlice,
SI.getI(),
true);
578 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
594 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
595 for (
auto &S : TrueSlices) {
597 TrueSlicesInterleaved.
push_back(S.top());
602 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
603 for (
auto &S : FalseSlices) {
605 FalseSlicesInterleaved.
push_back(S.top());
612 SelectLike &
SI = ASI.Selects.front();
613 SelectLike &LastSI = ASI.Selects.back();
621 SplitPt.setHeadBit(
true);
630 auto DIt =
SI.getI()->getIterator();
631 auto NIt = ASI.Selects.begin();
632 while (&*DIt != LastSI.getI()) {
633 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
640 for (
auto *DI : SinkInstrs)
641 DI->moveBeforePreserving(InsertionPoint);
658 std::next(LastSI.getI()->getIterator()));
663 BasicBlock *TrueBlock =
nullptr, *FalseBlock =
nullptr;
664 BranchInst *TrueBranch =
nullptr, *FalseBranch =
nullptr;
666 auto HasSelectLike = [](SelectGroup &SG,
bool IsTrue) {
667 for (
auto &SL : SG.Selects) {
668 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) ==
nullptr)
673 if (!TrueSlicesInterleaved.
empty() || HasSelectLike(ASI,
true)) {
677 TrueBranch->
setDebugLoc(LastSI.getI()->getDebugLoc());
678 for (
Instruction *TrueInst : TrueSlicesInterleaved)
681 if (!FalseSlicesInterleaved.
empty() || HasSelectLike(ASI,
false)) {
686 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
687 for (
Instruction *FalseInst : FalseSlicesInterleaved)
688 FalseInst->moveBefore(FalseBranch->getIterator());
692 if (TrueBlock == FalseBlock) {
693 assert(TrueBlock ==
nullptr &&
694 "Unexpected basic block transform while optimizing select");
699 FalseBranch->setDebugLoc(
SI.getI()->getDebugLoc());
708 if (TrueBlock ==
nullptr) {
711 TrueBlock = StartBlock;
712 }
else if (FalseBlock ==
nullptr) {
715 FalseBlock = StartBlock;
722 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() +
".frozen");
729 InsertionPoint = EndBlock->
begin();
730 for (SelectLike &
SI : ASI.Selects) {
738 for (
auto &SG : ProfSIGroups) {
739 if (SG.Condition ==
SI.getI())
743 SI.getI()->replaceAllUsesWith(PN);
750 ++NumSelectsConverted;
752 IB.CreateCondBr(CondFr, TT, FT,
SI.getI());
755 for (SelectLike &
SI : ASI.Selects)
756 SI.getI()->eraseFromParent();
760void SelectOptimizeImpl::collectSelectGroups(
BasicBlock &BB,
761 SelectGroups &SIGroups) {
771 struct SelectLikeInfo {
775 unsigned ConditionIdx;
786 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](
Instruction *
I) {
789 return SelectInfo.
end();
794 Cond->getType()->isIntegerTy(1)) {
796 return SelectInfo.
insert({
I, {
Cond,
true, Inverted, 0}}).first;
800 return SelectInfo.
insert({
I, {
Cond,
true,
true, 0}}).first;
806 return SelectInfo.
insert({
I, {
Cond,
false, Inverted, 0}}).first;
811 I->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1) {
812 for (
auto *CmpI : SeenCmp) {
813 auto Pred = CmpI->getPredicate();
814 if (Val != CmpI->getOperand(0))
826 return SelectInfo.
insert({
I, {CmpI,
true, Inverted, 0}}).first;
829 return SelectInfo.
end();
837 auto MatchZExtOrSExtPattern =
839 auto MatchShiftPattern =
844 if ((
match(
I, MatchZExtOrSExtPattern) &&
X->getType()->isIntegerTy(1)) ||
845 (
match(
I, MatchShiftPattern) &&
846 X->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1)) {
847 if (
I->getOpcode() != Instruction::Add &&
848 I->getOpcode() != Instruction::Sub &&
849 I->getOpcode() != Instruction::Or)
850 return SelectInfo.
end();
852 if (
I->getOpcode() == Instruction::Or &&
I->getType()->isIntegerTy(1))
853 return SelectInfo.
end();
859 unsigned Idx =
I->getOpcode() == Instruction::Sub ? 1 : 0;
860 for (; Idx < 2; Idx++) {
861 auto *
Op =
I->getOperand(Idx);
862 auto It = SelectInfo.
find(
Op);
863 if (It != SelectInfo.
end() && It->second.IsAuxiliary) {
864 Cond = It->second.Cond;
865 bool Inverted = It->second.IsInverted;
866 return SelectInfo.
insert({
I, {
Cond,
false, Inverted, Idx}}).first;
870 return SelectInfo.
end();
873 bool AlreadyProcessed =
false;
876 while (BBIt != BB.
end()) {
878 if (
I->isDebugOrPseudoInst())
881 if (!AlreadyProcessed)
882 It = ProcessSelectInfo(
I);
884 AlreadyProcessed =
false;
886 if (It == SelectInfo.
end() || It->second.IsAuxiliary)
889 if (!
TTI->shouldTreatInstructionLikeSelect(
I))
894 if (!
Cond->getType()->isIntegerTy(1))
897 SelectGroup SIGroup = {
Cond, {}};
898 SIGroup.Selects.emplace_back(
I, It->second.IsInverted,
899 It->second.ConditionIdx);
903 if (!isSelectKindSupported(SIGroup.Selects.front()))
906 while (BBIt != BB.
end()) {
915 It = ProcessSelectInfo(NI);
916 if (It == SelectInfo.
end()) {
917 AlreadyProcessed =
true;
922 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
923 if (
Cond != CurrCond) {
924 AlreadyProcessed =
true;
929 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
933 dbgs() <<
"New Select group (" << SIGroup.Selects.size() <<
") with\n";
934 for (
auto &
SI : SIGroup.Selects)
935 dbgs() <<
" " << *
SI.getI() <<
"\n";
938 SIGroups.push_back(SIGroup);
942void SelectOptimizeImpl::findProfitableSIGroupsBase(
943 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
944 for (SelectGroup &ASI : SIGroups) {
945 ++NumSelectOptAnalyzed;
946 if (isConvertToBranchProfitableBase(ASI))
947 ProfSIGroups.push_back(ASI);
957void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
958 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
959 NumSelectOptAnalyzed += SIGroups.size();
971 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
972 {Scaled64::getZero(), Scaled64::getZero()}};
973 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
974 !checkLoopHeuristics(L, LoopCost)) {
978 for (SelectGroup &ASI : SIGroups) {
981 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
982 for (SelectLike &
SI : ASI.Selects) {
983 const auto &ICM = InstCostMap[
SI.getI()];
984 SelectCost = std::max(SelectCost, ICM.PredCost);
985 BranchCost = std::max(BranchCost, ICM.NonPredCost);
987 if (BranchCost < SelectCost) {
989 ASI.Selects.front().getI());
990 OR <<
"Profitable to convert to branch (loop analysis). BranchCost="
991 << BranchCost.toString() <<
", SelectCost=" << SelectCost.toString()
994 ++NumSelectConvertedLoop;
995 ProfSIGroups.push_back(ASI);
998 ASI.Selects.front().getI());
999 ORmiss <<
"Select is more profitable (loop analysis). BranchCost="
1000 << BranchCost.toString()
1001 <<
", SelectCost=" << SelectCost.toString() <<
". ";
1007bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1008 const SelectGroup &ASI) {
1009 const SelectLike &
SI = ASI.Selects.front();
1018 ORmiss <<
"Not converted to branch because of cold basic block. ";
1024 if (
SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1026 ORmiss <<
"Not converted to branch because of unpredictable branch. ";
1034 ++NumSelectConvertedHighPred;
1035 OR <<
"Converted to branch because of highly predictable branch. ";
1042 if (hasExpensiveColdOperand(ASI)) {
1043 ++NumSelectConvertedExpColdOperand;
1044 OR <<
"Converted to branch because of expensive cold operand.";
1052 auto *BB =
SI.getI()->getParent();
1054 if (L && !
L->isInnermost() &&
L->getLoopLatch() == BB &&
1055 ASI.Selects.size() >= 3) {
1056 OR <<
"Converted to branch because select group in the latch block is big.";
1061 ORmiss <<
"Not profitable to convert to branch (base heuristic).";
1068 return (Numerator + (Denominator / 2)) / Denominator;
1078bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SelectGroup &ASI) {
1079 bool ColdOperand =
false;
1080 uint64_t TrueWeight, FalseWeight, TotalWeight;
1082 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1083 TotalWeight = TrueWeight + FalseWeight;
1088 ASI.Selects.front().getI());
1089 ORmiss <<
"Profile data available but missing branch-weights metadata for "
1090 "select instruction. ";
1097 for (SelectLike
SI : ASI.Selects) {
1100 if (TrueWeight < FalseWeight) {
1102 HotWeight = FalseWeight;
1105 HotWeight = TrueWeight;
1108 std::stack<Instruction *> ColdSlice;
1109 getExclBackwardsSlice(ColdI, ColdSlice,
SI.getI());
1111 while (!ColdSlice.empty()) {
1112 SliceCost +=
TTI->getInstructionCost(ColdSlice.top(),
1138 while (&*It !=
SI) {
1139 if (It->mayWriteToMemory())
1152void SelectOptimizeImpl::getExclBackwardsSlice(
Instruction *
I,
1153 std::stack<Instruction *> &Slice,
1157 std::queue<Instruction *> Worklist;
1159 while (!Worklist.empty()) {
1167 if (!
II->hasOneUse())
1173 if (ForSinking && (
II->isTerminator() ||
II->mayHaveSideEffects() ||
1199bool SelectOptimizeImpl::isSelectHighlyPredictable(
const SelectLike
SI) {
1200 uint64_t TrueWeight, FalseWeight;
1202 uint64_t
Max = std::max(TrueWeight, FalseWeight);
1203 uint64_t Sum = TrueWeight + FalseWeight;
1206 if (Probability >
TTI->getPredictableBranchThreshold())
1213bool SelectOptimizeImpl::checkLoopHeuristics(
const Loop *L,
1214 const CostInfo LoopCost[2]) {
1222 &*
L->getHeader()->getFirstNonPHIIt());
1224 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1225 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1226 ORmissL <<
"No select conversion in the loop due to no reduction of loop's "
1232 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1233 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1240 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1241 ORmissL <<
"No select conversion in the loop due to small reduction of "
1242 "loop's critical path. Gain="
1243 << Gain[1].toString()
1244 <<
", RelativeGain=" << RelativeGain.toString() <<
"%. ";
1254 if (Gain[1] > Gain[0]) {
1255 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1256 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1258 ORmissL <<
"No select conversion in the loop due to small gradient gain. "
1260 << GradientGain.toString() <<
"%. ";
1266 else if (Gain[1] < Gain[0]) {
1268 <<
"No select conversion in the loop due to negative gradient gain. ";
1282bool SelectOptimizeImpl::computeLoopCosts(
1283 const Loop *L,
const SelectGroups &SIGroups,
1285 LLVM_DEBUG(
dbgs() <<
"Calculating Latency / IPredCost / INonPredCost of loop "
1286 <<
L->getHeader()->getName() <<
"\n");
1287 const auto SImap = getSImap(SIGroups);
1288 const auto SGmap = getSGmap(SIGroups);
1291 const unsigned Iterations = 2;
1292 for (
unsigned Iter = 0; Iter < Iterations; ++Iter) {
1294 CostInfo &MaxCost = LoopCost[Iter];
1297 if (
I.isDebugOrPseudoInst())
1300 Scaled64 IPredCost = Scaled64::getZero(),
1301 INonPredCost = Scaled64::getZero();
1306 for (
const Use &U :
I.operands()) {
1310 if (
auto It = InstCostMap.
find(UI); It != InstCostMap.
end()) {
1311 IPredCost = std::max(IPredCost, It->second.PredCost);
1312 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1315 auto ILatency = computeInstCost(&
I);
1318 ORmissL <<
"Invalid instruction cost preventing analysis and "
1319 "optimization of the inner-most loop containing this "
1324 IPredCost += Scaled64::get(*ILatency);
1325 INonPredCost += Scaled64::get(*ILatency);
1333 if (
auto It = SImap.find(&
I); It != SImap.end()) {
1334 auto SI = It->second;
1335 const auto *SG = SGmap.at(&
I);
1336 Scaled64 TrueOpCost =
SI.getOpCostOnBranch(
true, InstCostMap,
TTI);
1337 Scaled64 FalseOpCost =
SI.getOpCostOnBranch(
false, InstCostMap,
TTI);
1338 Scaled64 PredictedPathCost =
1339 getPredictedPathCost(TrueOpCost, FalseOpCost,
SI);
1341 Scaled64 CondCost = Scaled64::getZero();
1343 if (
auto It = InstCostMap.
find(CI); It != InstCostMap.
end())
1344 CondCost = It->second.NonPredCost;
1345 Scaled64 MispredictCost = getMispredictionCost(
SI, CondCost);
1347 INonPredCost = PredictedPathCost + MispredictCost;
1350 << INonPredCost <<
" for " <<
I <<
"\n");
1352 InstCostMap[&
I] = {IPredCost, INonPredCost};
1353 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1354 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1358 <<
" MaxCost = " << MaxCost.PredCost <<
" "
1359 << MaxCost.NonPredCost <<
"\n");
1365SelectOptimizeImpl::getSImap(
const SelectGroups &SIGroups) {
1367 for (
const SelectGroup &ASI : SIGroups)
1368 for (
const SelectLike &
SI : ASI.Selects)
1374SelectOptimizeImpl::getSGmap(
const SelectGroups &SIGroups) {
1376 for (
const SelectGroup &ASI : SIGroups)
1377 for (
const SelectLike &
SI : ASI.Selects)
1382std::optional<uint64_t>
1383SelectOptimizeImpl::computeInstCost(
const Instruction *
I) {
1387 return std::optional<uint64_t>(ICost.
getValue());
1388 return std::nullopt;
1392SelectOptimizeImpl::getMispredictionCost(
const SelectLike
SI,
1393 const Scaled64 CondCost) {
1401 if (isSelectHighlyPredictable(
SI))
1407 Scaled64 MispredictCost =
1408 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1409 Scaled64::get(MispredictRate);
1410 MispredictCost /= Scaled64::get(100);
1412 return MispredictCost;
1418SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1419 const SelectLike
SI) {
1420 Scaled64 PredPathCost;
1421 uint64_t TrueWeight, FalseWeight;
1423 uint64_t SumWeight = TrueWeight + FalseWeight;
1424 if (SumWeight != 0) {
1425 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1426 FalseCost * Scaled64::get(FalseWeight);
1427 PredPathCost /= Scaled64::get(SumWeight);
1428 return PredPathCost;
1433 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1434 FalseCost * Scaled64::get(3) + TrueCost);
1435 PredPathCost /= Scaled64::get(4);
1436 return PredPathCost;
1439bool SelectOptimizeImpl::isSelectKindSupported(
const SelectLike
SI) {
1441 if (
SI.getType()->isVectorTy())
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static bool runOnFunction(Function &F, bool PostInlining)
print mir2vec MIR2Vec Vocabulary Printer Pass
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static const uint32_t IV[8]
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ ICMP_SGE
signed greater or equal
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
std::string getMsg() const
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI void initializeSelectOptimizePass(PassRegistry &)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
unsigned MispredictPenalty