LLVM  16.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1 //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts selects to conditional jumps when profitable.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/Optional.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/ProfDataUtils.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
38 #include <algorithm>
39 #include <memory>
40 #include <queue>
41 #include <stack>
42 #include <string>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "select-optimize"
47 
48 STATISTIC(NumSelectOptAnalyzed,
49  "Number of select groups considered for conversion to branch");
50 STATISTIC(NumSelectConvertedExpColdOperand,
51  "Number of select groups converted due to expensive cold operand");
52 STATISTIC(NumSelectConvertedHighPred,
53  "Number of select groups converted due to high-predictability");
54 STATISTIC(NumSelectUnPred,
55  "Number of select groups not converted due to unpredictability");
56 STATISTIC(NumSelectColdBB,
57  "Number of select groups not converted due to cold basic block");
58 STATISTIC(NumSelectConvertedLoop,
59  "Number of select groups converted due to loop-level analysis");
60 STATISTIC(NumSelectsConverted, "Number of selects converted");
61 
63  "cold-operand-threshold",
64  cl::desc("Maximum frequency of path for an operand to be considered cold."),
65  cl::init(20), cl::Hidden);
66 
68  "cold-operand-max-cost-multiplier",
69  cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
70  "slice of a cold operand to be considered inexpensive."),
71  cl::init(1), cl::Hidden);
72 
73 static cl::opt<unsigned>
74  GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
75  cl::desc("Gradient gain threshold (%)."),
76  cl::init(25), cl::Hidden);
77 
78 static cl::opt<unsigned>
79  GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
80  cl::desc("Minimum gain per loop (in cycles) threshold."),
81  cl::init(4), cl::Hidden);
82 
84  "select-opti-loop-relative-gain-threshold",
85  cl::desc(
86  "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
87  cl::init(8), cl::Hidden);
88 
90  "mispredict-default-rate", cl::Hidden, cl::init(25),
91  cl::desc("Default mispredict rate (initialized to 25%)."));
92 
93 static cl::opt<bool>
94  DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
95  cl::init(false),
96  cl::desc("Disable loop-level heuristics."));
97 
98 namespace {
99 
100 class SelectOptimize : public FunctionPass {
101  const TargetMachine *TM = nullptr;
102  const TargetSubtargetInfo *TSI;
103  const TargetLowering *TLI = nullptr;
104  const TargetTransformInfo *TTI = nullptr;
105  const LoopInfo *LI;
106  DominatorTree *DT;
107  std::unique_ptr<BlockFrequencyInfo> BFI;
108  std::unique_ptr<BranchProbabilityInfo> BPI;
109  ProfileSummaryInfo *PSI;
111  TargetSchedModel TSchedModel;
112 
113 public:
114  static char ID;
115 
116  SelectOptimize() : FunctionPass(ID) {
118  }
119 
120  bool runOnFunction(Function &F) override;
121 
122  void getAnalysisUsage(AnalysisUsage &AU) const override {
129  }
130 
131 private:
132  // Select groups consist of consecutive select instructions with the same
133  // condition.
134  using SelectGroup = SmallVector<SelectInst *, 2>;
135  using SelectGroups = SmallVector<SelectGroup, 2>;
136 
138 
139  struct CostInfo {
140  /// Predicated cost (with selects as conditional moves).
141  Scaled64 PredCost;
142  /// Non-predicated cost (with selects converted to branches).
143  Scaled64 NonPredCost;
144  };
145 
146  // Converts select instructions of a function to conditional jumps when deemed
147  // profitable. Returns true if at least one select was converted.
148  bool optimizeSelects(Function &F);
149 
150  // Heuristics for determining which select instructions can be profitably
151  // conveted to branches. Separate heuristics for selects in inner-most loops
152  // and the rest of code regions (base heuristics for non-inner-most loop
153  // regions).
154  void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
155  void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
156 
157  // Converts to branches the select groups that were deemed
158  // profitable-to-convert.
159  void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
160 
161  // Splits selects of a given basic block into select groups.
162  void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
163 
164  // Determines for which select groups it is profitable converting to branches
165  // (base and inner-most-loop heuristics).
166  void findProfitableSIGroupsBase(SelectGroups &SIGroups,
167  SelectGroups &ProfSIGroups);
168  void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
169  SelectGroups &ProfSIGroups);
170 
171  // Determines if a select group should be converted to a branch (base
172  // heuristics).
173  bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
174 
175  // Returns true if there are expensive instructions in the cold value
176  // operand's (if any) dependence slice of any of the selects of the given
177  // group.
178  bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
179 
180  // For a given source instruction, collect its backwards dependence slice
181  // consisting of instructions exclusively computed for producing the operands
182  // of the source instruction.
183  void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
184  bool ForSinking = false);
185 
186  // Returns true if the condition of the select is highly predictable.
187  bool isSelectHighlyPredictable(const SelectInst *SI);
188 
189  // Loop-level checks to determine if a non-predicated version (with branches)
190  // of the given loop is more profitable than its predicated version.
191  bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
192 
193  // Computes instruction and loop-critical-path costs for both the predicated
194  // and non-predicated version of the given loop.
195  bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
197  CostInfo *LoopCost);
198 
199  // Returns a set of all the select instructions in the given select groups.
200  SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
201 
202  // Returns the latency cost of a given instruction.
203  Optional<uint64_t> computeInstCost(const Instruction *I);
204 
205  // Returns the misprediction cost of a given select when converted to branch.
206  Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
207 
208  // Returns the cost of a branch when the prediction is correct.
209  Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
210  const SelectInst *SI);
211 
212  // Returns true if the target architecture supports lowering a given select.
213  bool isSelectKindSupported(SelectInst *SI);
214 };
215 } // namespace
216 
217 char SelectOptimize::ID = 0;
218 
219 INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
220  false)
227 INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
228  false)
229 
230 FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
231 
233  TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
234  TSI = TM->getSubtargetImpl(F);
235  TLI = TSI->getTargetLowering();
236 
237  // If none of the select types is supported then skip this pass.
238  // This is an optimization pass. Legality issues will be handled by
239  // instruction selection.
240  if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
241  !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
242  !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
243  return false;
244 
245  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
246  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
247  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
248  BPI.reset(new BranchProbabilityInfo(F, *LI));
249  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
250  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
251  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
252  TSchedModel.init(TSI);
253 
254  // When optimizing for size, selects are preferable over branches.
255  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
256  return false;
257 
258  return optimizeSelects(F);
259 }
260 
261 bool SelectOptimize::optimizeSelects(Function &F) {
262  // Determine for which select groups it is profitable converting to branches.
263  SelectGroups ProfSIGroups;
264  // Base heuristics apply only to non-loops and outer loops.
265  optimizeSelectsBase(F, ProfSIGroups);
266  // Separate heuristics for inner-most loops.
267  optimizeSelectsInnerLoops(F, ProfSIGroups);
268 
269  // Convert to branches the select groups that were deemed
270  // profitable-to-convert.
271  convertProfitableSIGroups(ProfSIGroups);
272 
273  // Code modified if at least one select group was converted.
274  return !ProfSIGroups.empty();
275 }
276 
277 void SelectOptimize::optimizeSelectsBase(Function &F,
278  SelectGroups &ProfSIGroups) {
279  // Collect all the select groups.
280  SelectGroups SIGroups;
281  for (BasicBlock &BB : F) {
282  // Base heuristics apply only to non-loops and outer loops.
283  Loop *L = LI->getLoopFor(&BB);
284  if (L && L->isInnermost())
285  continue;
286  collectSelectGroups(BB, SIGroups);
287  }
288 
289  // Determine for which select groups it is profitable converting to branches.
290  findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
291 }
292 
293 void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
294  SelectGroups &ProfSIGroups) {
295  SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
296  // Need to check size on each iteration as we accumulate child loops.
297  for (unsigned long i = 0; i < Loops.size(); ++i)
298  for (Loop *ChildL : Loops[i]->getSubLoops())
299  Loops.push_back(ChildL);
300 
301  for (Loop *L : Loops) {
302  if (!L->isInnermost())
303  continue;
304 
305  SelectGroups SIGroups;
306  for (BasicBlock *BB : L->getBlocks())
307  collectSelectGroups(*BB, SIGroups);
308 
309  findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
310  }
311 }
312 
313 /// If \p isTrue is true, return the true value of \p SI, otherwise return
314 /// false value of \p SI. If the true/false value of \p SI is defined by any
315 /// select instructions in \p Selects, look through the defining select
316 /// instruction until the true/false value is not defined in \p Selects.
317 static Value *
319  const SmallPtrSet<const Instruction *, 2> &Selects) {
320  Value *V = nullptr;
321  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
322  DefSI = dyn_cast<SelectInst>(V)) {
323  assert(DefSI->getCondition() == SI->getCondition() &&
324  "The condition of DefSI does not match with SI");
325  V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
326  }
327  assert(V && "Failed to get select true/false value");
328  return V;
329 }
330 
331 void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
332  for (SelectGroup &ASI : ProfSIGroups) {
333  // The code transformation here is a modified version of the sinking
334  // transformation in CodeGenPrepare::optimizeSelectInst with a more
335  // aggressive strategy of which instructions to sink.
336  //
337  // TODO: eliminate the redundancy of logic transforming selects to branches
338  // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
339  // selects for all cases (with and without profile information).
340 
341  // Transform a sequence like this:
342  // start:
343  // %cmp = cmp uge i32 %a, %b
344  // %sel = select i1 %cmp, i32 %c, i32 %d
345  //
346  // Into:
347  // start:
348  // %cmp = cmp uge i32 %a, %b
349  // %cmp.frozen = freeze %cmp
350  // br i1 %cmp.frozen, label %select.true, label %select.false
351  // select.true:
352  // br label %select.end
353  // select.false:
354  // br label %select.end
355  // select.end:
356  // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
357  //
358  // %cmp should be frozen, otherwise it may introduce undefined behavior.
359  // In addition, we may sink instructions that produce %c or %d into the
360  // destination(s) of the new branch.
361  // If the true or false blocks do not contain a sunken instruction, that
362  // block and its branch may be optimized away. In that case, one side of the
363  // first branch will point directly to select.end, and the corresponding PHI
364  // predecessor block will be the start block.
365 
366  // Find all the instructions that can be soundly sunk to the true/false
367  // blocks. These are instructions that are computed solely for producing the
368  // operands of the select instructions in the group and can be sunk without
369  // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
370  // with side effects).
371  SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
372  typedef std::stack<Instruction *>::size_type StackSizeType;
373  StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
374  for (SelectInst *SI : ASI) {
375  // For each select, compute the sinkable dependence chains of the true and
376  // false operands.
377  if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
378  std::stack<Instruction *> TrueSlice;
379  getExclBackwardsSlice(TI, TrueSlice, true);
380  maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
381  TrueSlices.push_back(TrueSlice);
382  }
383  if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
384  std::stack<Instruction *> FalseSlice;
385  getExclBackwardsSlice(FI, FalseSlice, true);
386  maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
387  FalseSlices.push_back(FalseSlice);
388  }
389  }
390  // In the case of multiple select instructions in the same group, the order
391  // of non-dependent instructions (instructions of different dependence
392  // slices) in the true/false blocks appears to affect performance.
393  // Interleaving the slices seems to experimentally be the optimal approach.
394  // This interleaving scheduling allows for more ILP (with a natural downside
395  // of increasing a bit register pressure) compared to a simple ordering of
396  // one whole chain after another. One would expect that this ordering would
397  // not matter since the scheduling in the backend of the compiler would
398  // take care of it, but apparently the scheduler fails to deliver optimal
399  // ILP with a naive ordering here.
400  SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
401  for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
402  for (auto &S : TrueSlices) {
403  if (!S.empty()) {
404  TrueSlicesInterleaved.push_back(S.top());
405  S.pop();
406  }
407  }
408  }
409  for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
410  for (auto &S : FalseSlices) {
411  if (!S.empty()) {
412  FalseSlicesInterleaved.push_back(S.top());
413  S.pop();
414  }
415  }
416  }
417 
418  // We split the block containing the select(s) into two blocks.
419  SelectInst *SI = ASI.front();
420  SelectInst *LastSI = ASI.back();
421  BasicBlock *StartBlock = SI->getParent();
422  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
423  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
424  BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
425  // Delete the unconditional branch that was just created by the split.
426  StartBlock->getTerminator()->eraseFromParent();
427 
428  // Move any debug/pseudo instructions that were in-between the select
429  // group to the newly-created end block.
430  SmallVector<Instruction *, 2> DebugPseudoINS;
431  auto DIt = SI->getIterator();
432  while (&*DIt != LastSI) {
433  if (DIt->isDebugOrPseudoInst())
434  DebugPseudoINS.push_back(&*DIt);
435  DIt++;
436  }
437  for (auto *DI : DebugPseudoINS) {
438  DI->moveBefore(&*EndBlock->getFirstInsertionPt());
439  }
440 
441  // These are the new basic blocks for the conditional branch.
442  // At least one will become an actual new basic block.
443  BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
444  BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
445  if (!TrueSlicesInterleaved.empty()) {
446  TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
447  EndBlock->getParent(), EndBlock);
448  TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
449  TrueBranch->setDebugLoc(LastSI->getDebugLoc());
450  for (Instruction *TrueInst : TrueSlicesInterleaved)
451  TrueInst->moveBefore(TrueBranch);
452  }
453  if (!FalseSlicesInterleaved.empty()) {
454  FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
455  EndBlock->getParent(), EndBlock);
456  FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
457  FalseBranch->setDebugLoc(LastSI->getDebugLoc());
458  for (Instruction *FalseInst : FalseSlicesInterleaved)
459  FalseInst->moveBefore(FalseBranch);
460  }
461  // If there was nothing to sink, then arbitrarily choose the 'false' side
462  // for a new input value to the PHI.
463  if (TrueBlock == FalseBlock) {
464  assert(TrueBlock == nullptr &&
465  "Unexpected basic block transform while optimizing select");
466 
467  FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
468  EndBlock->getParent(), EndBlock);
469  auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
470  FalseBranch->setDebugLoc(SI->getDebugLoc());
471  }
472 
473  // Insert the real conditional branch based on the original condition.
474  // If we did not create a new block for one of the 'true' or 'false' paths
475  // of the condition, it means that side of the branch goes to the end block
476  // directly and the path originates from the start block from the point of
477  // view of the new PHI.
478  BasicBlock *TT, *FT;
479  if (TrueBlock == nullptr) {
480  TT = EndBlock;
481  FT = FalseBlock;
482  TrueBlock = StartBlock;
483  } else if (FalseBlock == nullptr) {
484  TT = TrueBlock;
485  FT = EndBlock;
486  FalseBlock = StartBlock;
487  } else {
488  TT = TrueBlock;
489  FT = FalseBlock;
490  }
491  IRBuilder<> IB(SI);
492  auto *CondFr =
493  IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
494  IB.CreateCondBr(CondFr, TT, FT, SI);
495 
497  INS.insert(ASI.begin(), ASI.end());
498  // Use reverse iterator because later select may use the value of the
499  // earlier select, and we need to propagate value through earlier select
500  // to get the PHI operand.
501  for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
502  SelectInst *SI = *It;
503  // The select itself is replaced with a PHI Node.
504  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
505  PN->takeName(SI);
506  PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
507  PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
508  PN->setDebugLoc(SI->getDebugLoc());
509 
510  SI->replaceAllUsesWith(PN);
511  SI->eraseFromParent();
512  INS.erase(SI);
513  ++NumSelectsConverted;
514  }
515  }
516 }
517 
518 void SelectOptimize::collectSelectGroups(BasicBlock &BB,
519  SelectGroups &SIGroups) {
520  BasicBlock::iterator BBIt = BB.begin();
521  while (BBIt != BB.end()) {
522  Instruction *I = &*BBIt++;
523  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
524  SelectGroup SIGroup;
525  SIGroup.push_back(SI);
526  while (BBIt != BB.end()) {
527  Instruction *NI = &*BBIt;
528  SelectInst *NSI = dyn_cast<SelectInst>(NI);
529  if (NSI && SI->getCondition() == NSI->getCondition()) {
530  SIGroup.push_back(NSI);
531  } else if (!NI->isDebugOrPseudoInst()) {
532  // Debug/pseudo instructions should be skipped and not prevent the
533  // formation of a select group.
534  break;
535  }
536  ++BBIt;
537  }
538 
539  // If the select type is not supported, no point optimizing it.
540  // Instruction selection will take care of it.
541  if (!isSelectKindSupported(SI))
542  continue;
543 
544  SIGroups.push_back(SIGroup);
545  }
546  }
547 }
548 
549 void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
550  SelectGroups &ProfSIGroups) {
551  for (SelectGroup &ASI : SIGroups) {
552  ++NumSelectOptAnalyzed;
553  if (isConvertToBranchProfitableBase(ASI))
554  ProfSIGroups.push_back(ASI);
555  }
556 }
557 
558 void SelectOptimize::findProfitableSIGroupsInnerLoops(
559  const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
560  NumSelectOptAnalyzed += SIGroups.size();
561  // For each select group in an inner-most loop,
562  // a branch is more preferable than a select/conditional-move if:
563  // i) conversion to branches for all the select groups of the loop satisfies
564  // loop-level heuristics including reducing the loop's critical path by
565  // some threshold (see SelectOptimize::checkLoopHeuristics); and
566  // ii) the total cost of the select group is cheaper with a branch compared
567  // to its predicated version. The cost is in terms of latency and the cost
568  // of a select group is the cost of its most expensive select instruction
569  // (assuming infinite resources and thus fully leveraging available ILP).
570 
572  CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
573  {Scaled64::getZero(), Scaled64::getZero()}};
574  if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
575  !checkLoopHeuristics(L, LoopCost)) {
576  return;
577  }
578 
579  for (SelectGroup &ASI : SIGroups) {
580  // Assuming infinite resources, the cost of a group of instructions is the
581  // cost of the most expensive instruction of the group.
582  Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
583  for (SelectInst *SI : ASI) {
584  SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
585  BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
586  }
587  if (BranchCost < SelectCost) {
588  OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
589  OR << "Profitable to convert to branch (loop analysis). BranchCost="
590  << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
591  << ". ";
592  ORE->emit(OR);
593  ++NumSelectConvertedLoop;
594  ProfSIGroups.push_back(ASI);
595  } else {
596  OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
597  ORmiss << "Select is more profitable (loop analysis). BranchCost="
598  << BranchCost.toString()
599  << ", SelectCost=" << SelectCost.toString() << ". ";
600  ORE->emit(ORmiss);
601  }
602  }
603 }
604 
605 bool SelectOptimize::isConvertToBranchProfitableBase(
606  const SmallVector<SelectInst *, 2> &ASI) {
607  SelectInst *SI = ASI.front();
608  OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
609  OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
610 
611  // Skip cold basic blocks. Better to optimize for size for cold blocks.
612  if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
613  ++NumSelectColdBB;
614  ORmiss << "Not converted to branch because of cold basic block. ";
615  ORE->emit(ORmiss);
616  return false;
617  }
618 
619  // If unpredictable, branch form is less profitable.
620  if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
621  ++NumSelectUnPred;
622  ORmiss << "Not converted to branch because of unpredictable branch. ";
623  ORE->emit(ORmiss);
624  return false;
625  }
626 
627  // If highly predictable, branch form is more profitable, unless a
628  // predictable select is inexpensive in the target architecture.
629  if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
630  ++NumSelectConvertedHighPred;
631  OR << "Converted to branch because of highly predictable branch. ";
632  ORE->emit(OR);
633  return true;
634  }
635 
636  // Look for expensive instructions in the cold operand's (if any) dependence
637  // slice of any of the selects in the group.
638  if (hasExpensiveColdOperand(ASI)) {
639  ++NumSelectConvertedExpColdOperand;
640  OR << "Converted to branch because of expensive cold operand.";
641  ORE->emit(OR);
642  return true;
643  }
644 
645  ORmiss << "Not profitable to convert to branch (base heuristic).";
646  ORE->emit(ORmiss);
647  return false;
648 }
649 
651  uint64_t Denominator) {
652  return (Numerator + (Denominator / 2)) / Denominator;
653 }
654 
655 bool SelectOptimize::hasExpensiveColdOperand(
656  const SmallVector<SelectInst *, 2> &ASI) {
657  bool ColdOperand = false;
658  uint64_t TrueWeight, FalseWeight, TotalWeight;
659  if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
660  uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
661  TotalWeight = TrueWeight + FalseWeight;
662  // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
663  ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
664  } else if (PSI->hasProfileSummary()) {
665  OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
666  ORmiss << "Profile data available but missing branch-weights metadata for "
667  "select instruction. ";
668  ORE->emit(ORmiss);
669  }
670  if (!ColdOperand)
671  return false;
672  // Check if the cold path's dependence slice is expensive for any of the
673  // selects of the group.
674  for (SelectInst *SI : ASI) {
675  Instruction *ColdI = nullptr;
676  uint64_t HotWeight;
677  if (TrueWeight < FalseWeight) {
678  ColdI = dyn_cast<Instruction>(SI->getTrueValue());
679  HotWeight = FalseWeight;
680  } else {
681  ColdI = dyn_cast<Instruction>(SI->getFalseValue());
682  HotWeight = TrueWeight;
683  }
684  if (ColdI) {
685  std::stack<Instruction *> ColdSlice;
686  getExclBackwardsSlice(ColdI, ColdSlice);
687  InstructionCost SliceCost = 0;
688  while (!ColdSlice.empty()) {
689  SliceCost += TTI->getInstructionCost(ColdSlice.top(),
691  ColdSlice.pop();
692  }
693  // The colder the cold value operand of the select is the more expensive
694  // the cmov becomes for computing the cold value operand every time. Thus,
695  // the colder the cold operand is the more its cost counts.
696  // Get nearest integer cost adjusted for coldness.
697  InstructionCost AdjSliceCost =
698  divideNearest(SliceCost * HotWeight, TotalWeight);
699  if (AdjSliceCost >=
701  return true;
702  }
703  }
704  return false;
705 }
706 
707 // For a given source instruction, collect its backwards dependence slice
708 // consisting of instructions exclusively computed for the purpose of producing
709 // the operands of the source instruction. As an approximation
710 // (sufficiently-accurate in practice), we populate this set with the
711 // instructions of the backwards dependence slice that only have one-use and
712 // form an one-use chain that leads to the source instruction.
713 void SelectOptimize::getExclBackwardsSlice(Instruction *I,
714  std::stack<Instruction *> &Slice,
715  bool ForSinking) {
717  std::queue<Instruction *> Worklist;
718  Worklist.push(I);
719  while (!Worklist.empty()) {
720  Instruction *II = Worklist.front();
721  Worklist.pop();
722 
723  // Avoid cycles.
724  if (!Visited.insert(II).second)
725  continue;
726 
727  if (!II->hasOneUse())
728  continue;
729 
730  // Cannot soundly sink instructions with side-effects.
731  // Terminator or phi instructions cannot be sunk.
732  // Avoid sinking other select instructions (should be handled separetely).
733  if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
734  isa<SelectInst>(II) || isa<PHINode>(II)))
735  continue;
736 
737  // Avoid considering instructions with less frequency than the source
738  // instruction (i.e., avoid colder code regions of the dependence slice).
739  if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
740  continue;
741 
742  // Eligible one-use instruction added to the dependence slice.
743  Slice.push(II);
744 
745  // Explore all the operands of the current instruction to expand the slice.
746  for (unsigned k = 0; k < II->getNumOperands(); ++k)
747  if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
748  Worklist.push(OpI);
749  }
750 }
751 
752 bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
753  uint64_t TrueWeight, FalseWeight;
754  if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
755  uint64_t Max = std::max(TrueWeight, FalseWeight);
756  uint64_t Sum = TrueWeight + FalseWeight;
757  if (Sum != 0) {
758  auto Probability = BranchProbability::getBranchProbability(Max, Sum);
759  if (Probability > TTI->getPredictableBranchThreshold())
760  return true;
761  }
762  }
763  return false;
764 }
765 
766 bool SelectOptimize::checkLoopHeuristics(const Loop *L,
767  const CostInfo LoopCost[2]) {
768  // Loop-level checks to determine if a non-predicated version (with branches)
769  // of the loop is more profitable than its predicated version.
770 
772  return true;
773 
774  OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
775  L->getHeader()->getFirstNonPHI());
776 
777  if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
778  LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
779  ORmissL << "No select conversion in the loop due to no reduction of loop's "
780  "critical path. ";
781  ORE->emit(ORmissL);
782  return false;
783  }
784 
785  Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
786  LoopCost[1].PredCost - LoopCost[1].NonPredCost};
787 
788  // Profitably converting to branches need to reduce the loop's critical path
789  // by at least some threshold (absolute gain of GainCycleThreshold cycles and
790  // relative gain of 12.5%).
791  if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
792  Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
793  Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
794  ORmissL << "No select conversion in the loop due to small reduction of "
795  "loop's critical path. Gain="
796  << Gain[1].toString()
797  << ", RelativeGain=" << RelativeGain.toString() << "%. ";
798  ORE->emit(ORmissL);
799  return false;
800  }
801 
802  // If the loop's critical path involves loop-carried dependences, the gradient
803  // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
804  // This check ensures that the latency reduction for the loop's critical path
805  // keeps decreasing with sufficient rate beyond the two analyzed loop
806  // iterations.
807  if (Gain[1] > Gain[0]) {
808  Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
809  (LoopCost[1].PredCost - LoopCost[0].PredCost);
810  if (GradientGain < Scaled64::get(GainGradientThreshold)) {
811  ORmissL << "No select conversion in the loop due to small gradient gain. "
812  "GradientGain="
813  << GradientGain.toString() << "%. ";
814  ORE->emit(ORmissL);
815  return false;
816  }
817  }
818  // If the gain decreases it is not profitable to convert.
819  else if (Gain[1] < Gain[0]) {
820  ORmissL
821  << "No select conversion in the loop due to negative gradient gain. ";
822  ORE->emit(ORmissL);
823  return false;
824  }
825 
826  // Non-predicated version of the loop is more profitable than its
827  // predicated version.
828  return true;
829 }
830 
831 // Computes instruction and loop-critical-path costs for both the predicated
832 // and non-predicated version of the given loop.
833 // Returns false if unable to compute these costs due to invalid cost of loop
834 // instruction(s).
835 bool SelectOptimize::computeLoopCosts(
836  const Loop *L, const SelectGroups &SIGroups,
837  DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
838  const auto &SIset = getSIset(SIGroups);
839  // Compute instruction and loop-critical-path costs across two iterations for
840  // both predicated and non-predicated version.
841  const unsigned Iterations = 2;
842  for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
843  // Cost of the loop's critical path.
844  CostInfo &MaxCost = LoopCost[Iter];
845  for (BasicBlock *BB : L->getBlocks()) {
846  for (const Instruction &I : *BB) {
847  if (I.isDebugOrPseudoInst())
848  continue;
849  // Compute the predicated and non-predicated cost of the instruction.
850  Scaled64 IPredCost = Scaled64::getZero(),
851  INonPredCost = Scaled64::getZero();
852 
853  // Assume infinite resources that allow to fully exploit the available
854  // instruction-level parallelism.
855  // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
856  for (const Use &U : I.operands()) {
857  auto UI = dyn_cast<Instruction>(U.get());
858  if (!UI)
859  continue;
860  if (InstCostMap.count(UI)) {
861  IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
862  INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
863  }
864  }
865  auto ILatency = computeInstCost(&I);
866  if (!ILatency) {
867  OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
868  ORmissL << "Invalid instruction cost preventing analysis and "
869  "optimization of the inner-most loop containing this "
870  "instruction. ";
871  ORE->emit(ORmissL);
872  return false;
873  }
874  IPredCost += Scaled64::get(ILatency.value());
875  INonPredCost += Scaled64::get(ILatency.value());
876 
877  // For a select that can be converted to branch,
878  // compute its cost as a branch (non-predicated cost).
879  //
880  // BranchCost = PredictedPathCost + MispredictCost
881  // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
882  // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
883  if (SIset.contains(&I)) {
884  auto SI = dyn_cast<SelectInst>(&I);
885 
886  Scaled64 TrueOpCost = Scaled64::getZero(),
887  FalseOpCost = Scaled64::getZero();
888  if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
889  if (InstCostMap.count(TI))
890  TrueOpCost = InstCostMap[TI].NonPredCost;
891  if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
892  if (InstCostMap.count(FI))
893  FalseOpCost = InstCostMap[FI].NonPredCost;
894  Scaled64 PredictedPathCost =
895  getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
896 
897  Scaled64 CondCost = Scaled64::getZero();
898  if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
899  if (InstCostMap.count(CI))
900  CondCost = InstCostMap[CI].NonPredCost;
901  Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
902 
903  INonPredCost = PredictedPathCost + MispredictCost;
904  }
905 
906  InstCostMap[&I] = {IPredCost, INonPredCost};
907  MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
908  MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
909  }
910  }
911  }
912  return true;
913 }
914 
916 SelectOptimize::getSIset(const SelectGroups &SIGroups) {
918  for (const SelectGroup &ASI : SIGroups)
919  for (const SelectInst *SI : ASI)
920  SIset.insert(SI);
921  return SIset;
922 }
923 
924 Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
925  InstructionCost ICost =
927  if (auto OC = ICost.getValue())
928  return Optional<uint64_t>(*OC);
929  return Optional<uint64_t>(None);
930 }
931 
933 SelectOptimize::getMispredictionCost(const SelectInst *SI,
934  const Scaled64 CondCost) {
935  uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
936 
937  // Account for the default misprediction rate when using a branch
938  // (conservatively set to 25% by default).
939  uint64_t MispredictRate = MispredictDefaultRate;
940  // If the select condition is obviously predictable, then the misprediction
941  // rate is zero.
942  if (isSelectHighlyPredictable(SI))
943  MispredictRate = 0;
944 
945  // CondCost is included to account for cases where the computation of the
946  // condition is part of a long dependence chain (potentially loop-carried)
947  // that would delay detection of a misprediction and increase its cost.
948  Scaled64 MispredictCost =
949  std::max(Scaled64::get(MispredictPenalty), CondCost) *
950  Scaled64::get(MispredictRate);
951  MispredictCost /= Scaled64::get(100);
952 
953  return MispredictCost;
954 }
955 
956 // Returns the cost of a branch when the prediction is correct.
957 // TrueCost * TrueProbability + FalseCost * FalseProbability.
959 SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
960  const SelectInst *SI) {
961  Scaled64 PredPathCost;
962  uint64_t TrueWeight, FalseWeight;
963  if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
964  uint64_t SumWeight = TrueWeight + FalseWeight;
965  if (SumWeight != 0) {
966  PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
967  FalseCost * Scaled64::get(FalseWeight);
968  PredPathCost /= Scaled64::get(SumWeight);
969  return PredPathCost;
970  }
971  }
972  // Without branch weight metadata, we assume 75% for the one path and 25% for
973  // the other, and pick the result with the biggest cost.
974  PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
975  FalseCost * Scaled64::get(3) + TrueCost);
976  PredPathCost /= Scaled64::get(4);
977  return PredPathCost;
978 }
979 
980 bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
981  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
982  if (VectorCond)
983  return false;
985  if (SI->getType()->isVectorTy())
987  else
988  SelectKind = TargetLowering::ScalarValSelect;
989  return TLI->isSelectSupported(SelectKind);
990 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:160
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:268
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
Optional.h
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
llvm::ARM::PredBlockMask::TT
@ TT
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:217
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
Pass.h
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
Statistic.h
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::IRBuilder<>
ColdOperandMaxCostMultiplier
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1290
selects
Optimize selects
Definition: SelectOptimize.cpp:227
getTrueOrFalseValue
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
Definition: SelectOptimize.cpp:318
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:378
llvm::Optional< uint64_t >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
GainRelativeThreshold
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:233
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.cpp:695
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
Instruction.h
TargetLowering.h
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::SelectInst::getCondition
const Value * getCondition() const
Definition: Instructions.h:1784
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3417
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:113
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:187
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::None
const NoneType None
Definition: None.h:24
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Passes.h
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1399
TargetSchedule.h
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
BranchProbabilityInfo.h
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
uint64_t
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2608
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
ProfDataUtils.h
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3155
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:344
TargetPassConfig.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SystemZISD::OC
@ OC
Definition: SystemZISelLowering.h:122
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1737
DisableLoopLevelHeuristics
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
ColdOperandThreshold
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::ScaledNumber::toString
std::string toString(unsigned Precision=DefaultPrecision)
Convert to a decimal representation in a string.
Definition: ScaledNumber.h:595
GainGradientThreshold
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
TargetSubtargetInfo.h
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) INITIALIZE_PASS_END(SelectOptimize
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
ScaledNumber.h
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:309
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::ScaledNumber< uint64_t >
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::createSelectOptimizePass
FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
Definition: SelectOptimize.cpp:230
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2706
llvm::TargetLoweringBase::ScalarValSelect
@ ScalarValSelect
Definition: TargetLowering.h:238
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SelectOptimize.cpp:46
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:229
llvm::Instruction::isDebugOrPseudoInst
bool isDebugOrPseudoInst() const
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
Definition: Instruction.cpp:735
llvm::TargetLoweringBase::VectorMaskSelect
@ VectorMaskSelect
Definition: TargetLowering.h:241
llvm::initializeSelectOptimizePass
void initializeSelectOptimizePass(PassRegistry &)
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::pdb::DbgHeaderType::Max
@ Max
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:347
llvm::TargetLoweringBase::ScalarCondVectorVal
@ ScalarCondVectorVal
Definition: TargetLowering.h:239
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:104
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
MispredictDefaultRate
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
llvm::divideNearest
uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator)
Returns the integer nearest(Numerator / Denominator).
Definition: MathExtras.h:774
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLoweringBase::SelectSupportKind
SelectSupportKind
Enum that describes what type of support for selects the target has.
Definition: TargetLowering.h:237
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38