LLVM 22.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Dominators.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instruction.h"
36#include "llvm/Pass.h"
40#include <algorithm>
41#include <queue>
42#include <stack>
43
44using namespace llvm;
45using namespace llvm::PatternMatch;
46
47#define DEBUG_TYPE "select-optimize"
48
49STATISTIC(NumSelectOptAnalyzed,
50 "Number of select groups considered for conversion to branch");
51STATISTIC(NumSelectConvertedExpColdOperand,
52 "Number of select groups converted due to expensive cold operand");
53STATISTIC(NumSelectConvertedHighPred,
54 "Number of select groups converted due to high-predictability");
55STATISTIC(NumSelectUnPred,
56 "Number of select groups not converted due to unpredictability");
57STATISTIC(NumSelectColdBB,
58 "Number of select groups not converted due to cold basic block");
59STATISTIC(NumSelectConvertedLoop,
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted, "Number of selects converted");
62
64 "cold-operand-threshold",
65 cl::desc("Maximum frequency of path for an operand to be considered cold."),
66 cl::init(20), cl::Hidden);
67
69 "cold-operand-max-cost-multiplier",
70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
73
75 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
76 cl::desc("Gradient gain threshold (%)."),
77 cl::init(25), cl::Hidden);
78
80 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
81 cl::desc("Minimum gain per loop (in cycles) threshold."),
83
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
89
91 "mispredict-default-rate", cl::Hidden, cl::init(25),
92 cl::desc("Default mispredict rate (initialized to 25%)."));
93
94static cl::opt<bool>
95 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
96 cl::init(false),
97 cl::desc("Disable loop-level heuristics."));
98
99namespace {
100
101class SelectOptimizeImpl {
102 const TargetMachine *TM = nullptr;
103 const TargetSubtargetInfo *TSI = nullptr;
104 const TargetLowering *TLI = nullptr;
105 const TargetTransformInfo *TTI = nullptr;
106 const LoopInfo *LI = nullptr;
108 ProfileSummaryInfo *PSI = nullptr;
109 OptimizationRemarkEmitter *ORE = nullptr;
110 TargetSchedModel TSchedModel;
111
112public:
113 SelectOptimizeImpl() = default;
114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
115 PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
116 bool runOnFunction(Function &F, Pass &P);
117
118 using Scaled64 = ScaledNumber<uint64_t>;
119
120 struct CostInfo {
121 /// Predicated cost (with selects as conditional moves).
122 Scaled64 PredCost;
123 /// Non-predicated cost (with selects converted to branches).
124 Scaled64 NonPredCost;
125 };
126
127 /// SelectLike is an abstraction over SelectInst and other operations that can
128 /// act like selects. For example Or(Zext(icmp), X) can be treated like
129 /// select(icmp, X|1, X).
130 class SelectLike {
131 /// The select (/or) instruction.
132 Instruction *I;
133 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
134 /// opposed to the original condition.
135 bool Inverted = false;
136
137 /// The index of the operand that depends on condition. Only for select-like
138 /// instruction such as Or/Add.
139 unsigned CondIdx;
140
141 public:
142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
144
145 Instruction *getI() { return I; }
146 const Instruction *getI() const { return I; }
147
148 Type *getType() const { return I->getType(); }
149
150 unsigned getConditionOpIndex() { return CondIdx; };
151
152 /// Return the true value for the SelectLike instruction. Note this may not
153 /// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
154 /// the true value would be `or(x,1)`. As this value does not exist, nullptr
155 /// is returned.
156 Value *getTrueValue(bool HonorInverts = true) const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(/*HonorInverts=*/false);
159 if (auto *Sel = dyn_cast<SelectInst>(I))
160 return Sel->getTrueValue();
161 // Or(zext) case - The true value is Or(X), so return nullptr as the value
162 // does not yet exist.
163 if (isa<BinaryOperator>(I))
164 return nullptr;
165
166 llvm_unreachable("Unhandled case in getTrueValue");
167 }
168
169 /// Return the false value for the SelectLike instruction. For example the
170 /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
171 /// `select(c, x|1, x)`)
172 Value *getFalseValue(bool HonorInverts = true) const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(/*HonorInverts=*/false);
175 if (auto *Sel = dyn_cast<SelectInst>(I))
176 return Sel->getFalseValue();
177 // We are on the branch where the condition is zero, which means BinOp
178 // does not perform any computation, and we can simply return the operand
179 // that is not related to the condition
180 if (auto *BO = dyn_cast<BinaryOperator>(I))
181 return BO->getOperand(1 - CondIdx);
182
183 llvm_unreachable("Unhandled case in getFalseValue");
184 }
185
186 /// Return the NonPredCost cost of the op on \p isTrue branch, given the
187 /// costs in \p InstCostMap. This may need to be generated for select-like
188 /// instructions.
189 Scaled64 getOpCostOnBranch(
190 bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,
191 const TargetTransformInfo *TTI) {
192 auto *V = IsTrue ? getTrueValue() : getFalseValue();
193 if (V) {
194 if (auto *IV = dyn_cast<Instruction>(V)) {
195 auto It = InstCostMap.find(IV);
196 return It != InstCostMap.end() ? It->second.NonPredCost
198 }
199 return Scaled64::getZero();
200 }
201 // If getTrue(False)Value() return nullptr, it means we are dealing with
202 // select-like instructions on the branch where the actual computation is
203 // happening. In that case the cost is equal to the cost of computation +
204 // cost of non-dependant on condition operand
206 getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency,
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
209 auto TotalCost = Scaled64::get(Cost.getValue());
210 if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
211 auto It = InstCostMap.find(OpI);
212 if (It != InstCostMap.end())
213 TotalCost += It->second.NonPredCost;
214 }
215 return TotalCost;
216 }
217 };
218
219private:
220 // Select groups consist of consecutive select-like instructions with the same
221 // condition. Between select-likes could be any number of auxiliary
222 // instructions related to the condition like not, zext, ashr/lshr
223 struct SelectGroup {
224 Value *Condition;
226 };
227 using SelectGroups = SmallVector<SelectGroup, 2>;
228
229 // Converts select instructions of a function to conditional jumps when deemed
230 // profitable. Returns true if at least one select was converted.
231 bool optimizeSelects(Function &F);
232
233 // Heuristics for determining which select instructions can be profitably
234 // conveted to branches. Separate heuristics for selects in inner-most loops
235 // and the rest of code regions (base heuristics for non-inner-most loop
236 // regions).
237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
239
240 // Converts to branches the select groups that were deemed
241 // profitable-to-convert.
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
243
244 // Splits selects of a given basic block into select groups.
245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
246
247 // Determines for which select groups it is profitable converting to branches
248 // (base and inner-most-loop heuristics).
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
253
254 // Determines if a select group should be converted to a branch (base
255 // heuristics).
256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
257
258 // Returns true if there are expensive instructions in the cold value
259 // operand's (if any) dependence slice of any of the selects of the given
260 // group.
261 bool hasExpensiveColdOperand(const SelectGroup &ASI);
262
263 // For a given source instruction, collect its backwards dependence slice
264 // consisting of instructions exclusively computed for producing the operands
265 // of the source instruction.
266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
267 Instruction *SI, bool ForSinking = false);
268
269 // Returns true if the condition of the select is highly predictable.
270 bool isSelectHighlyPredictable(const SelectLike SI);
271
272 // Loop-level checks to determine if a non-predicated version (with branches)
273 // of the given loop is more profitable than its predicated version.
274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
275
276 // Computes instruction and loop-critical-path costs for both the predicated
277 // and non-predicated version of the given loop.
278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
279 DenseMap<const Instruction *, CostInfo> &InstCostMap,
280 CostInfo *LoopCost);
281
282 // Returns a set of all the select instructions in the given select groups.
283 SmallDenseMap<const Instruction *, SelectLike, 2>
284 getSImap(const SelectGroups &SIGroups);
285
286 // Returns a map from select-like instructions to the corresponding select
287 // group.
288 SmallDenseMap<const Instruction *, const SelectGroup *, 2>
289 getSGmap(const SelectGroups &SIGroups);
290
291 // Returns the latency cost of a given instruction.
292 std::optional<uint64_t> computeInstCost(const Instruction *I);
293
294 // Returns the misprediction cost of a given select when converted to branch.
295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
296
297 // Returns the cost of a branch when the prediction is correct.
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
300
301 // Returns true if the target architecture supports lowering a given select.
302 bool isSelectKindSupported(const SelectLike SI);
303};
304
305class SelectOptimize : public FunctionPass {
306 SelectOptimizeImpl Impl;
307
308public:
309 static char ID;
310
311 SelectOptimize() : FunctionPass(ID) {
313 }
314
315 bool runOnFunction(Function &F) override {
316 if (skipFunction(F))
317 return false;
318
319 return Impl.runOnFunction(F, *this);
320 }
321
322 void getAnalysisUsage(AnalysisUsage &AU) const override {
323 AU.addRequired<ProfileSummaryInfoWrapperPass>();
324 AU.addRequired<TargetPassConfig>();
325 AU.addRequired<TargetTransformInfoWrapperPass>();
326 AU.addRequired<LoopInfoWrapperPass>();
327 AU.addRequired<BlockFrequencyInfoWrapperPass>();
328 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
329 }
330};
331
332} // namespace
333
336 SelectOptimizeImpl Impl(TM);
337 return Impl.run(F, FAM);
338}
339
340char SelectOptimize::ID = 0;
341
342INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
343 false)
350INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
351 false)
352
353FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
354
355PreservedAnalyses SelectOptimizeImpl::run(Function &F,
357 TSI = TM->getSubtargetImpl(F);
358 TLI = TSI->getTargetLowering();
359
360 // If none of the select types are supported then skip this pass.
361 // This is an optimization pass. Legality issues will be handled by
362 // instruction selection.
366 return PreservedAnalyses::all();
367
368 TTI = &FAM.getResult<TargetIRAnalysis>(F);
369 if (!TTI->enableSelectOptimize())
370 return PreservedAnalyses::all();
371
373 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
374 assert(PSI && "This pass requires module analysis pass `profile-summary`!");
375 BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
376
377 // When optimizing for size, selects are preferable over branches.
378 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
379 return PreservedAnalyses::all();
380
381 LI = &FAM.getResult<LoopAnalysis>(F);
382 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
383 TSchedModel.init(TSI);
384
385 bool Changed = optimizeSelects(F);
387}
388
389bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
390 TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
391 TSI = TM->getSubtargetImpl(F);
392 TLI = TSI->getTargetLowering();
393
394 // If none of the select types are supported then skip this pass.
395 // This is an optimization pass. Legality issues will be handled by
396 // instruction selection.
400 return false;
401
402 TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
403
404 if (!TTI->enableSelectOptimize())
405 return false;
406
407 LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
408 BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
409 PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
410 ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
411 TSchedModel.init(TSI);
412
413 // When optimizing for size, selects are preferable over branches.
414 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
415 return false;
416
417 return optimizeSelects(F);
418}
419
420bool SelectOptimizeImpl::optimizeSelects(Function &F) {
421 // Determine for which select groups it is profitable converting to branches.
422 SelectGroups ProfSIGroups;
423 // Base heuristics apply only to non-loops and outer loops.
424 optimizeSelectsBase(F, ProfSIGroups);
425 // Separate heuristics for inner-most loops.
426 optimizeSelectsInnerLoops(F, ProfSIGroups);
427
428 // Convert to branches the select groups that were deemed
429 // profitable-to-convert.
430 convertProfitableSIGroups(ProfSIGroups);
431
432 // Code modified if at least one select group was converted.
433 return !ProfSIGroups.empty();
434}
435
436void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
437 SelectGroups &ProfSIGroups) {
438 // Collect all the select groups.
439 SelectGroups SIGroups;
440 for (BasicBlock &BB : F) {
441 // Base heuristics apply only to non-loops and outer loops.
442 Loop *L = LI->getLoopFor(&BB);
443 if (L && L->isInnermost())
444 continue;
445 collectSelectGroups(BB, SIGroups);
446 }
447
448 // Determine for which select groups it is profitable converting to branches.
449 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
450}
451
452void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
453 SelectGroups &ProfSIGroups) {
455 // Need to check size on each iteration as we accumulate child loops.
456 for (unsigned long i = 0; i < Loops.size(); ++i)
457 llvm::append_range(Loops, Loops[i]->getSubLoops());
458
459 for (Loop *L : Loops) {
460 if (!L->isInnermost())
461 continue;
462
463 SelectGroups SIGroups;
464 for (BasicBlock *BB : L->getBlocks())
465 collectSelectGroups(*BB, SIGroups);
466
467 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
468 }
469}
470
471/// Returns optimised value on \p IsTrue branch. For SelectInst that would be
472/// either True or False value. For (BinaryOperator) instructions, where the
473/// condition may be skipped, the operation will use a non-conditional operand.
474/// For example, for `or(V,zext(cond))` this function would return V.
475/// However, if the conditional operand on \p IsTrue branch matters, we create a
476/// clone of instruction at the end of that branch \p B and replace the
477/// condition operand with a constant.
478///
479/// Also /p OptSelects contains previously optimised select-like instructions.
480/// If the current value uses one of the optimised values, we can optimise it
481/// further by replacing it with the corresponding value on the given branch
483 SelectOptimizeImpl::SelectLike &SI, bool isTrue,
484 SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects,
485 BasicBlock *B) {
486 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
487 if (V) {
488 if (auto *IV = dyn_cast<Instruction>(V))
489 if (auto It = OptSelects.find(IV); It != OptSelects.end())
490 return isTrue ? It->second.first : It->second.second;
491 return V;
492 }
493
494 auto *BO = cast<BinaryOperator>(SI.getI());
495 assert((BO->getOpcode() == Instruction::Add ||
496 BO->getOpcode() == Instruction::Or ||
497 BO->getOpcode() == Instruction::Sub) &&
498 "Only currently handling Add, Or and Sub binary operators.");
499
500 auto *CBO = BO->clone();
501 auto CondIdx = SI.getConditionOpIndex();
502 auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
503 if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
504 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
505 } else {
506 assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
507 "Unexpected opcode");
508 CBO->setOperand(CondIdx, ConstantInt::getAllOnesValue(CBO->getType()));
509 }
510
511 unsigned OtherIdx = 1 - CondIdx;
512 if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
513 if (auto It = OptSelects.find(IV); It != OptSelects.end())
514 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
515 }
516 CBO->insertBefore(B->getTerminator()->getIterator());
517 return CBO;
518}
519
520void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
521 for (SelectGroup &ASI : ProfSIGroups) {
522 // The code transformation here is a modified version of the sinking
523 // transformation in CodeGenPrepare::optimizeSelectInst with a more
524 // aggressive strategy of which instructions to sink.
525 //
526 // TODO: eliminate the redundancy of logic transforming selects to branches
527 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
528 // selects for all cases (with and without profile information).
529
530 // Transform a sequence like this:
531 // start:
532 // %cmp = cmp uge i32 %a, %b
533 // %sel = select i1 %cmp, i32 %c, i32 %d
534 //
535 // Into:
536 // start:
537 // %cmp = cmp uge i32 %a, %b
538 // %cmp.frozen = freeze %cmp
539 // br i1 %cmp.frozen, label %select.true, label %select.false
540 // select.true:
541 // br label %select.end
542 // select.false:
543 // br label %select.end
544 // select.end:
545 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
546 //
547 // %cmp should be frozen, otherwise it may introduce undefined behavior.
548 // In addition, we may sink instructions that produce %c or %d into the
549 // destination(s) of the new branch.
550 // If the true or false blocks do not contain a sunken instruction, that
551 // block and its branch may be optimized away. In that case, one side of the
552 // first branch will point directly to select.end, and the corresponding PHI
553 // predecessor block will be the start block.
554
555 // Find all the instructions that can be soundly sunk to the true/false
556 // blocks. These are instructions that are computed solely for producing the
557 // operands of the select instructions in the group and can be sunk without
558 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
559 // with side effects).
560 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
561 typedef std::stack<Instruction *>::size_type StackSizeType;
562 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
563 for (SelectLike &SI : ASI.Selects) {
564 if (!isa<SelectInst>(SI.getI()))
565 continue;
566 // For each select, compute the sinkable dependence chains of the true and
567 // false operands.
568 if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) {
569 std::stack<Instruction *> TrueSlice;
570 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
571 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
572 TrueSlices.push_back(TrueSlice);
573 }
574 if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) {
575 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) {
576 std::stack<Instruction *> FalseSlice;
577 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
578 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
579 FalseSlices.push_back(FalseSlice);
580 }
581 }
582 }
583 // In the case of multiple select instructions in the same group, the order
584 // of non-dependent instructions (instructions of different dependence
585 // slices) in the true/false blocks appears to affect performance.
586 // Interleaving the slices seems to experimentally be the optimal approach.
587 // This interleaving scheduling allows for more ILP (with a natural downside
588 // of increasing a bit register pressure) compared to a simple ordering of
589 // one whole chain after another. One would expect that this ordering would
590 // not matter since the scheduling in the backend of the compiler would
591 // take care of it, but apparently the scheduler fails to deliver optimal
592 // ILP with a naive ordering here.
593 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
594 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
595 for (auto &S : TrueSlices) {
596 if (!S.empty()) {
597 TrueSlicesInterleaved.push_back(S.top());
598 S.pop();
599 }
600 }
601 }
602 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
603 for (auto &S : FalseSlices) {
604 if (!S.empty()) {
605 FalseSlicesInterleaved.push_back(S.top());
606 S.pop();
607 }
608 }
609 }
610
611 // We split the block containing the select(s) into two blocks.
612 SelectLike &SI = ASI.Selects.front();
613 SelectLike &LastSI = ASI.Selects.back();
614 BasicBlock *StartBlock = SI.getI()->getParent();
615 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
616 // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
617 // RemoveDIs turned on, SplitPt would instead point to the next
618 // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
619 // tell splitBasicBlock that we want to include any DbgVariableRecords
620 // attached to SplitPt in the splice.
621 SplitPt.setHeadBit(true);
622 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
623 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
624 // Delete the unconditional branch that was just created by the split.
625 StartBlock->getTerminator()->eraseFromParent();
626
627 // Move any debug/pseudo and auxiliary instructions that were in-between the
628 // select group to the newly-created end block.
630 auto DIt = SI.getI()->getIterator();
631 auto NIt = ASI.Selects.begin();
632 while (&*DIt != LastSI.getI()) {
633 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
634 ++NIt;
635 else
636 SinkInstrs.push_back(&*DIt);
637 DIt++;
638 }
639 auto InsertionPoint = EndBlock->getFirstInsertionPt();
640 for (auto *DI : SinkInstrs)
641 DI->moveBeforePreserving(InsertionPoint);
642
643 // Duplicate implementation for DbgRecords, the non-instruction debug-info
644 // format. Helper lambda for moving DbgRecords to the end block.
645 auto TransferDbgRecords = [&](Instruction &I) {
646 for (auto &DbgRecord :
647 llvm::make_early_inc_range(I.getDbgRecordRange())) {
650 EndBlock->getFirstInsertionPt());
651 }
652 };
653
654 // Iterate over all instructions in between SI and LastSI, not including
655 // SI itself. These are all the variable assignments that happen "in the
656 // middle" of the select group.
657 auto R = make_range(std::next(SI.getI()->getIterator()),
658 std::next(LastSI.getI()->getIterator()));
659 llvm::for_each(R, TransferDbgRecords);
660
661 // These are the new basic blocks for the conditional branch.
662 // At least one will become an actual new basic block.
663 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
664 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
665 // Checks if select-like instruction would materialise on the given branch
666 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
667 for (auto &SL : SG.Selects) {
668 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
669 return true;
670 }
671 return false;
672 };
673 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
674 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
675 EndBlock->getParent(), EndBlock);
676 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
677 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
678 for (Instruction *TrueInst : TrueSlicesInterleaved)
679 TrueInst->moveBefore(TrueBranch->getIterator());
680 }
681 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
682 FalseBlock =
683 BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
684 EndBlock->getParent(), EndBlock);
685 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
686 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
687 for (Instruction *FalseInst : FalseSlicesInterleaved)
688 FalseInst->moveBefore(FalseBranch->getIterator());
689 }
690 // If there was nothing to sink, then arbitrarily choose the 'false' side
691 // for a new input value to the PHI.
692 if (TrueBlock == FalseBlock) {
693 assert(TrueBlock == nullptr &&
694 "Unexpected basic block transform while optimizing select");
695
696 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
697 EndBlock->getParent(), EndBlock);
698 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
699 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
700 }
701
702 // Insert the real conditional branch based on the original condition.
703 // If we did not create a new block for one of the 'true' or 'false' paths
704 // of the condition, it means that side of the branch goes to the end block
705 // directly and the path originates from the start block from the point of
706 // view of the new PHI.
707 BasicBlock *TT, *FT;
708 if (TrueBlock == nullptr) {
709 TT = EndBlock;
710 FT = FalseBlock;
711 TrueBlock = StartBlock;
712 } else if (FalseBlock == nullptr) {
713 TT = TrueBlock;
714 FT = EndBlock;
715 FalseBlock = StartBlock;
716 } else {
717 TT = TrueBlock;
718 FT = FalseBlock;
719 }
720 IRBuilder<> IB(SI.getI());
721 auto *CondFr =
722 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
723
725
726 // Use reverse iterator because later select may use the value of the
727 // earlier select, and we need to propagate value through earlier select
728 // to get the PHI operand.
729 InsertionPoint = EndBlock->begin();
730 for (SelectLike &SI : ASI.Selects) {
731 // The select itself is replaced with a PHI Node.
732 PHINode *PN = PHINode::Create(SI.getType(), 2, "");
733 PN->insertBefore(InsertionPoint);
734 PN->takeName(SI.getI());
735 // Current instruction might be a condition of some other group, so we
736 // need to replace it there to avoid dangling pointer
737 if (PN->getType()->isIntegerTy(1)) {
738 for (auto &SG : ProfSIGroups) {
739 if (SG.Condition == SI.getI())
740 SG.Condition = PN;
741 }
742 }
743 SI.getI()->replaceAllUsesWith(PN);
744 auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock);
745 auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock);
746 INS[PN] = {TV, FV};
747 PN->addIncoming(TV, TrueBlock);
748 PN->addIncoming(FV, FalseBlock);
749 PN->setDebugLoc(SI.getI()->getDebugLoc());
750 ++NumSelectsConverted;
751 }
752 IB.CreateCondBr(CondFr, TT, FT, SI.getI());
753
754 // Remove the old select instructions, now that they are not longer used.
755 for (SelectLike &SI : ASI.Selects)
756 SI.getI()->eraseFromParent();
757 }
758}
759
760void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
761 SelectGroups &SIGroups) {
762 // Represents something that can be considered as select instruction.
763 // Auxiliary instruction are instructions that depends on a condition and have
764 // zero or some constant value on True/False branch, such as:
765 // * ZExt(1bit)
766 // * SExt(1bit)
767 // * Not(1bit)
768 // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
769 // earlier in the BB. For conditions that check the sign of the Val compiler
770 // may generate shifts instead of ZExt/SExt.
771 struct SelectLikeInfo {
772 Value *Cond;
773 bool IsAuxiliary;
774 bool IsInverted;
775 unsigned ConditionIdx;
776 };
777
779 // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
780 // instructions.
782
783 // Check if the instruction is SelectLike or might be part of SelectLike
784 // expression, put information into SelectInfo and return the iterator to the
785 // inserted position.
786 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
787 if (auto *Cmp = dyn_cast<CmpInst>(I)) {
788 SeenCmp.insert(Cmp);
789 return SelectInfo.end();
790 }
791
792 Value *Cond;
794 Cond->getType()->isIntegerTy(1)) {
795 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
796 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
797 }
798
799 if (match(I, m_Not(m_Value(Cond)))) {
800 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
801 }
802
803 // Select instruction are what we are usually looking for.
804 if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) {
805 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
806 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
807 }
808 Value *Val;
809 ConstantInt *Shift;
810 if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
811 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
812 for (auto *CmpI : SeenCmp) {
813 auto Pred = CmpI->getPredicate();
814 if (Val != CmpI->getOperand(0))
815 continue;
816 if ((Pred == CmpInst::ICMP_SGT &&
817 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
818 (Pred == CmpInst::ICMP_SGE &&
819 match(CmpI->getOperand(1), m_Zero())) ||
820 (Pred == CmpInst::ICMP_SLT &&
821 match(CmpI->getOperand(1), m_Zero())) ||
822 (Pred == CmpInst::ICMP_SLE &&
823 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
824 bool Inverted =
825 Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
826 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
827 }
828 }
829 return SelectInfo.end();
830 }
831
832 // An BinOp(Aux(X), Y) can also be treated like a select, with condition X
833 // and values Y|1 and Y.
834 // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize
835 // - 1` `BinOp` can be Add, Sub, Or
836 Value *X;
837 auto MatchZExtOrSExtPattern =
839 auto MatchShiftPattern =
841
842 // This check is unnecessary, but it prevents costly access to the
843 // SelectInfo map.
844 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
845 (match(I, MatchShiftPattern) &&
846 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
847 if (I->getOpcode() != Instruction::Add &&
848 I->getOpcode() != Instruction::Sub &&
849 I->getOpcode() != Instruction::Or)
850 return SelectInfo.end();
851
852 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
853 return SelectInfo.end();
854
855 // Iterate through operands and find dependant on recognised sign
856 // extending auxiliary select-like instructions. The operand index does
857 // not matter for Add and Or. However, for Sub, we can only safely
858 // transform when the operand is second.
859 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
860 for (; Idx < 2; Idx++) {
861 auto *Op = I->getOperand(Idx);
862 auto It = SelectInfo.find(Op);
863 if (It != SelectInfo.end() && It->second.IsAuxiliary) {
864 Cond = It->second.Cond;
865 bool Inverted = It->second.IsInverted;
866 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
867 }
868 }
869 }
870 return SelectInfo.end();
871 };
872
873 bool AlreadyProcessed = false;
874 BasicBlock::iterator BBIt = BB.begin();
876 while (BBIt != BB.end()) {
877 Instruction *I = &*BBIt++;
878 if (I->isDebugOrPseudoInst())
879 continue;
880
881 if (!AlreadyProcessed)
882 It = ProcessSelectInfo(I);
883 else
884 AlreadyProcessed = false;
885
886 if (It == SelectInfo.end() || It->second.IsAuxiliary)
887 continue;
888
889 if (!TTI->shouldTreatInstructionLikeSelect(I))
890 continue;
891
892 Value *Cond = It->second.Cond;
893 // Vector conditions are not supported.
894 if (!Cond->getType()->isIntegerTy(1))
895 continue;
896
897 SelectGroup SIGroup = {Cond, {}};
898 SIGroup.Selects.emplace_back(I, It->second.IsInverted,
899 It->second.ConditionIdx);
900
901 // If the select type is not supported, no point optimizing it.
902 // Instruction selection will take care of it.
903 if (!isSelectKindSupported(SIGroup.Selects.front()))
904 continue;
905
906 while (BBIt != BB.end()) {
907 Instruction *NI = &*BBIt;
908 // Debug/pseudo instructions should be skipped and not prevent the
909 // formation of a select group.
910 if (NI->isDebugOrPseudoInst()) {
911 ++BBIt;
912 continue;
913 }
914
915 It = ProcessSelectInfo(NI);
916 if (It == SelectInfo.end()) {
917 AlreadyProcessed = true;
918 break;
919 }
920
921 // Auxiliary with same condition
922 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
923 if (Cond != CurrCond) {
924 AlreadyProcessed = true;
925 break;
926 }
927
928 if (!IsAux)
929 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
930 ++BBIt;
931 }
932 LLVM_DEBUG({
933 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
934 for (auto &SI : SIGroup.Selects)
935 dbgs() << " " << *SI.getI() << "\n";
936 });
937
938 SIGroups.push_back(SIGroup);
939 }
940}
941
942void SelectOptimizeImpl::findProfitableSIGroupsBase(
943 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
944 for (SelectGroup &ASI : SIGroups) {
945 ++NumSelectOptAnalyzed;
946 if (isConvertToBranchProfitableBase(ASI))
947 ProfSIGroups.push_back(ASI);
948 }
949}
950
953 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
954 ORE->emit(Rem);
955}
956
957void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
958 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
959 NumSelectOptAnalyzed += SIGroups.size();
960 // For each select group in an inner-most loop,
961 // a branch is more preferable than a select/conditional-move if:
962 // i) conversion to branches for all the select groups of the loop satisfies
963 // loop-level heuristics including reducing the loop's critical path by
964 // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
965 // ii) the total cost of the select group is cheaper with a branch compared
966 // to its predicated version. The cost is in terms of latency and the cost
967 // of a select group is the cost of its most expensive select instruction
968 // (assuming infinite resources and thus fully leveraging available ILP).
969
971 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
972 {Scaled64::getZero(), Scaled64::getZero()}};
973 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
974 !checkLoopHeuristics(L, LoopCost)) {
975 return;
976 }
977
978 for (SelectGroup &ASI : SIGroups) {
979 // Assuming infinite resources, the cost of a group of instructions is the
980 // cost of the most expensive instruction of the group.
981 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
982 for (SelectLike &SI : ASI.Selects) {
983 const auto &ICM = InstCostMap[SI.getI()];
984 SelectCost = std::max(SelectCost, ICM.PredCost);
985 BranchCost = std::max(BranchCost, ICM.NonPredCost);
986 }
987 if (BranchCost < SelectCost) {
988 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti",
989 ASI.Selects.front().getI());
990 OR << "Profitable to convert to branch (loop analysis). BranchCost="
991 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
992 << ". ";
993 EmitAndPrintRemark(ORE, OR);
994 ++NumSelectConvertedLoop;
995 ProfSIGroups.push_back(ASI);
996 } else {
997 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
998 ASI.Selects.front().getI());
999 ORmiss << "Select is more profitable (loop analysis). BranchCost="
1000 << BranchCost.toString()
1001 << ", SelectCost=" << SelectCost.toString() << ". ";
1002 EmitAndPrintRemark(ORE, ORmiss);
1003 }
1004 }
1005}
1006
1007bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1008 const SelectGroup &ASI) {
1009 const SelectLike &SI = ASI.Selects.front();
1010 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1011 << "\n");
1012 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
1013 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
1014
1015 // Skip cold basic blocks. Better to optimize for size for cold blocks.
1016 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
1017 ++NumSelectColdBB;
1018 ORmiss << "Not converted to branch because of cold basic block. ";
1019 EmitAndPrintRemark(ORE, ORmiss);
1020 return false;
1021 }
1022
1023 // If unpredictable, branch form is less profitable.
1024 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1025 ++NumSelectUnPred;
1026 ORmiss << "Not converted to branch because of unpredictable branch. ";
1027 EmitAndPrintRemark(ORE, ORmiss);
1028 return false;
1029 }
1030
1031 // If highly predictable, branch form is more profitable, unless a
1032 // predictable select is inexpensive in the target architecture.
1033 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
1034 ++NumSelectConvertedHighPred;
1035 OR << "Converted to branch because of highly predictable branch. ";
1036 EmitAndPrintRemark(ORE, OR);
1037 return true;
1038 }
1039
1040 // Look for expensive instructions in the cold operand's (if any) dependence
1041 // slice of any of the selects in the group.
1042 if (hasExpensiveColdOperand(ASI)) {
1043 ++NumSelectConvertedExpColdOperand;
1044 OR << "Converted to branch because of expensive cold operand.";
1045 EmitAndPrintRemark(ORE, OR);
1046 return true;
1047 }
1048
1049 // If latch has a select group with several elements, it is usually profitable
1050 // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if
1051 // conversion is profitable for innermost loops.
1052 auto *BB = SI.getI()->getParent();
1053 auto *L = LI->getLoopFor(BB);
1054 if (L && !L->isInnermost() && L->getLoopLatch() == BB &&
1055 ASI.Selects.size() >= 3) {
1056 OR << "Converted to branch because select group in the latch block is big.";
1057 EmitAndPrintRemark(ORE, OR);
1058 return true;
1059 }
1060
1061 ORmiss << "Not profitable to convert to branch (base heuristic).";
1062 EmitAndPrintRemark(ORE, ORmiss);
1063 return false;
1064}
1065
1067 uint64_t Denominator) {
1068 return (Numerator + (Denominator / 2)) / Denominator;
1069}
1070
1071static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI,
1072 uint64_t &TrueVal, uint64_t &FalseVal) {
1073 if (isa<SelectInst>(SI.getI()))
1074 return extractBranchWeights(*SI.getI(), TrueVal, FalseVal);
1075 return false;
1076}
1077
1078bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
1079 bool ColdOperand = false;
1080 uint64_t TrueWeight, FalseWeight, TotalWeight;
1081 if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) {
1082 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1083 TotalWeight = TrueWeight + FalseWeight;
1084 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
1085 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
1086 } else if (PSI->hasProfileSummary()) {
1087 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1088 ASI.Selects.front().getI());
1089 ORmiss << "Profile data available but missing branch-weights metadata for "
1090 "select instruction. ";
1091 EmitAndPrintRemark(ORE, ORmiss);
1092 }
1093 if (!ColdOperand)
1094 return false;
1095 // Check if the cold path's dependence slice is expensive for any of the
1096 // selects of the group.
1097 for (SelectLike SI : ASI.Selects) {
1098 Instruction *ColdI = nullptr;
1099 uint64_t HotWeight;
1100 if (TrueWeight < FalseWeight) {
1101 ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue());
1102 HotWeight = FalseWeight;
1103 } else {
1104 ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue());
1105 HotWeight = TrueWeight;
1106 }
1107 if (ColdI) {
1108 std::stack<Instruction *> ColdSlice;
1109 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
1110 InstructionCost SliceCost = 0;
1111 while (!ColdSlice.empty()) {
1112 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
1114 ColdSlice.pop();
1115 }
1116 // The colder the cold value operand of the select is the more expensive
1117 // the cmov becomes for computing the cold value operand every time. Thus,
1118 // the colder the cold operand is the more its cost counts.
1119 // Get nearest integer cost adjusted for coldness.
1120 InstructionCost AdjSliceCost =
1121 divideNearest(SliceCost * HotWeight, TotalWeight);
1122 if (AdjSliceCost >=
1124 return true;
1125 }
1126 }
1127 return false;
1128}
1129
1130// Check if it is safe to move LoadI next to the SI.
1131// Conservatively assume it is safe only if there is no instruction
1132// modifying memory in-between the load and the select instruction.
1134 // Assume loads from different basic blocks are unsafe to move.
1135 if (LoadI->getParent() != SI->getParent())
1136 return false;
1137 auto It = LoadI->getIterator();
1138 while (&*It != SI) {
1139 if (It->mayWriteToMemory())
1140 return false;
1141 It++;
1142 }
1143 return true;
1144}
1145
1146// For a given source instruction, collect its backwards dependence slice
1147// consisting of instructions exclusively computed for the purpose of producing
1148// the operands of the source instruction. As an approximation
1149// (sufficiently-accurate in practice), we populate this set with the
1150// instructions of the backwards dependence slice that only have one-use and
1151// form an one-use chain that leads to the source instruction.
1152void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
1153 std::stack<Instruction *> &Slice,
1154 Instruction *SI,
1155 bool ForSinking) {
1157 std::queue<Instruction *> Worklist;
1158 Worklist.push(I);
1159 while (!Worklist.empty()) {
1160 Instruction *II = Worklist.front();
1161 Worklist.pop();
1162
1163 // Avoid cycles.
1164 if (!Visited.insert(II).second)
1165 continue;
1166
1167 if (!II->hasOneUse())
1168 continue;
1169
1170 // Cannot soundly sink instructions with side-effects.
1171 // Terminator or phi instructions cannot be sunk.
1172 // Avoid sinking other select instructions (should be handled separetely).
1173 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
1175 continue;
1176
1177 // Avoid sinking loads in order not to skip state-modifying instructions,
1178 // that may alias with the loaded address.
1179 // Only allow sinking of loads within the same basic block that are
1180 // conservatively proven to be safe.
1181 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
1182 continue;
1183
1184 // Avoid considering instructions with less frequency than the source
1185 // instruction (i.e., avoid colder code regions of the dependence slice).
1186 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
1187 continue;
1188
1189 // Eligible one-use instruction added to the dependence slice.
1190 Slice.push(II);
1191
1192 // Explore all the operands of the current instruction to expand the slice.
1193 for (Value *Op : II->operand_values())
1194 if (auto *OpI = dyn_cast<Instruction>(Op))
1195 Worklist.push(OpI);
1196 }
1197}
1198
1199bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
1200 uint64_t TrueWeight, FalseWeight;
1201 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1202 uint64_t Max = std::max(TrueWeight, FalseWeight);
1203 uint64_t Sum = TrueWeight + FalseWeight;
1204 if (Sum != 0) {
1205 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
1206 if (Probability > TTI->getPredictableBranchThreshold())
1207 return true;
1208 }
1209 }
1210 return false;
1211}
1212
1213bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1214 const CostInfo LoopCost[2]) {
1215 // Loop-level checks to determine if a non-predicated version (with branches)
1216 // of the loop is more profitable than its predicated version.
1217
1219 return true;
1220
1221 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
1222 &*L->getHeader()->getFirstNonPHIIt());
1223
1224 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1225 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1226 ORmissL << "No select conversion in the loop due to no reduction of loop's "
1227 "critical path. ";
1228 EmitAndPrintRemark(ORE, ORmissL);
1229 return false;
1230 }
1231
1232 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1233 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1234
1235 // Profitably converting to branches need to reduce the loop's critical path
1236 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
1237 // relative gain of 12.5%).
1238 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
1239 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
1240 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1241 ORmissL << "No select conversion in the loop due to small reduction of "
1242 "loop's critical path. Gain="
1243 << Gain[1].toString()
1244 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
1245 EmitAndPrintRemark(ORE, ORmissL);
1246 return false;
1247 }
1248
1249 // If the loop's critical path involves loop-carried dependences, the gradient
1250 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
1251 // This check ensures that the latency reduction for the loop's critical path
1252 // keeps decreasing with sufficient rate beyond the two analyzed loop
1253 // iterations.
1254 if (Gain[1] > Gain[0]) {
1255 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1256 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1257 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
1258 ORmissL << "No select conversion in the loop due to small gradient gain. "
1259 "GradientGain="
1260 << GradientGain.toString() << "%. ";
1261 EmitAndPrintRemark(ORE, ORmissL);
1262 return false;
1263 }
1264 }
1265 // If the gain decreases it is not profitable to convert.
1266 else if (Gain[1] < Gain[0]) {
1267 ORmissL
1268 << "No select conversion in the loop due to negative gradient gain. ";
1269 EmitAndPrintRemark(ORE, ORmissL);
1270 return false;
1271 }
1272
1273 // Non-predicated version of the loop is more profitable than its
1274 // predicated version.
1275 return true;
1276}
1277
1278// Computes instruction and loop-critical-path costs for both the predicated
1279// and non-predicated version of the given loop.
1280// Returns false if unable to compute these costs due to invalid cost of loop
1281// instruction(s).
1282bool SelectOptimizeImpl::computeLoopCosts(
1283 const Loop *L, const SelectGroups &SIGroups,
1284 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
1285 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
1286 << L->getHeader()->getName() << "\n");
1287 const auto SImap = getSImap(SIGroups);
1288 const auto SGmap = getSGmap(SIGroups);
1289 // Compute instruction and loop-critical-path costs across two iterations for
1290 // both predicated and non-predicated version.
1291 const unsigned Iterations = 2;
1292 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1293 // Cost of the loop's critical path.
1294 CostInfo &MaxCost = LoopCost[Iter];
1295 for (BasicBlock *BB : L->getBlocks()) {
1296 for (const Instruction &I : *BB) {
1297 if (I.isDebugOrPseudoInst())
1298 continue;
1299 // Compute the predicated and non-predicated cost of the instruction.
1300 Scaled64 IPredCost = Scaled64::getZero(),
1301 INonPredCost = Scaled64::getZero();
1302
1303 // Assume infinite resources that allow to fully exploit the available
1304 // instruction-level parallelism.
1305 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
1306 for (const Use &U : I.operands()) {
1307 auto UI = dyn_cast<Instruction>(U.get());
1308 if (!UI)
1309 continue;
1310 if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) {
1311 IPredCost = std::max(IPredCost, It->second.PredCost);
1312 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1313 }
1314 }
1315 auto ILatency = computeInstCost(&I);
1316 if (!ILatency) {
1317 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
1318 ORmissL << "Invalid instruction cost preventing analysis and "
1319 "optimization of the inner-most loop containing this "
1320 "instruction. ";
1321 EmitAndPrintRemark(ORE, ORmissL);
1322 return false;
1323 }
1324 IPredCost += Scaled64::get(*ILatency);
1325 INonPredCost += Scaled64::get(*ILatency);
1326
1327 // For a select that can be converted to branch,
1328 // compute its cost as a branch (non-predicated cost).
1329 //
1330 // BranchCost = PredictedPathCost + MispredictCost
1331 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
1332 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
1333 if (auto It = SImap.find(&I); It != SImap.end()) {
1334 auto SI = It->second;
1335 const auto *SG = SGmap.at(&I);
1336 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1337 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1338 Scaled64 PredictedPathCost =
1339 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1340
1341 Scaled64 CondCost = Scaled64::getZero();
1342 if (auto *CI = dyn_cast<Instruction>(SG->Condition))
1343 if (auto It = InstCostMap.find(CI); It != InstCostMap.end())
1344 CondCost = It->second.NonPredCost;
1345 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1346
1347 INonPredCost = PredictedPathCost + MispredictCost;
1348 }
1349 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1350 << INonPredCost << " for " << I << "\n");
1351
1352 InstCostMap[&I] = {IPredCost, INonPredCost};
1353 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1354 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1355 }
1356 }
1357 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
1358 << " MaxCost = " << MaxCost.PredCost << " "
1359 << MaxCost.NonPredCost << "\n");
1360 }
1361 return true;
1362}
1363
1365SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1367 for (const SelectGroup &ASI : SIGroups)
1368 for (const SelectLike &SI : ASI.Selects)
1369 SImap.try_emplace(SI.getI(), SI);
1370 return SImap;
1371}
1372
1374SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1376 for (const SelectGroup &ASI : SIGroups)
1377 for (const SelectLike &SI : ASI.Selects)
1378 SImap.try_emplace(SI.getI(), &ASI);
1379 return SImap;
1380}
1381
1382std::optional<uint64_t>
1383SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1384 InstructionCost ICost =
1385 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
1386 if (ICost.isValid())
1387 return std::optional<uint64_t>(ICost.getValue());
1388 return std::nullopt;
1389}
1390
1392SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1393 const Scaled64 CondCost) {
1394 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1395
1396 // Account for the default misprediction rate when using a branch
1397 // (conservatively set to 25% by default).
1398 uint64_t MispredictRate = MispredictDefaultRate;
1399 // If the select condition is obviously predictable, then the misprediction
1400 // rate is zero.
1401 if (isSelectHighlyPredictable(SI))
1402 MispredictRate = 0;
1403
1404 // CondCost is included to account for cases where the computation of the
1405 // condition is part of a long dependence chain (potentially loop-carried)
1406 // that would delay detection of a misprediction and increase its cost.
1407 Scaled64 MispredictCost =
1408 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1409 Scaled64::get(MispredictRate);
1410 MispredictCost /= Scaled64::get(100);
1411
1412 return MispredictCost;
1413}
1414
1415// Returns the cost of a branch when the prediction is correct.
1416// TrueCost * TrueProbability + FalseCost * FalseProbability.
1418SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1419 const SelectLike SI) {
1420 Scaled64 PredPathCost;
1421 uint64_t TrueWeight, FalseWeight;
1422 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1423 uint64_t SumWeight = TrueWeight + FalseWeight;
1424 if (SumWeight != 0) {
1425 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1426 FalseCost * Scaled64::get(FalseWeight);
1427 PredPathCost /= Scaled64::get(SumWeight);
1428 return PredPathCost;
1429 }
1430 }
1431 // Without branch weight metadata, we assume 75% for the one path and 25% for
1432 // the other, and pick the result with the biggest cost.
1433 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1434 FalseCost * Scaled64::get(3) + TrueCost);
1435 PredPathCost /= Scaled64::get(4);
1436 return PredPathCost;
1437}
1438
1439bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
1441 if (SI.getType()->isVectorTy())
1443 else
1445 return TLI->isSelectSupported(SelectKind);
1446}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Hexagon Hardware Loops
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:593
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static const uint32_t IV[8]
Definition blake3_impl.h:83
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition MathExtras.h:458
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI void initializeSelectOptimizePass(PassRegistry &)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
unsigned MispredictPenalty
Definition MCSchedule.h:311