LLVM 23.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Dominators.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instruction.h"
36#include "llvm/Pass.h"
40#include <algorithm>
41#include <queue>
42#include <stack>
43
44using namespace llvm;
45using namespace llvm::PatternMatch;
46
47#define DEBUG_TYPE "select-optimize"
48
49STATISTIC(NumSelectOptAnalyzed,
50 "Number of select groups considered for conversion to branch");
51STATISTIC(NumSelectConvertedExpColdOperand,
52 "Number of select groups converted due to expensive cold operand");
53STATISTIC(NumSelectConvertedHighPred,
54 "Number of select groups converted due to high-predictability");
55STATISTIC(NumSelectUnPred,
56 "Number of select groups not converted due to unpredictability");
57STATISTIC(NumSelectColdBB,
58 "Number of select groups not converted due to cold basic block");
59STATISTIC(NumSelectConvertedLoop,
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted, "Number of selects converted");
62
64 "cold-operand-threshold",
65 cl::desc("Maximum frequency of path for an operand to be considered cold."),
66 cl::init(20), cl::Hidden);
67
69 "cold-operand-max-cost-multiplier",
70 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
71 "slice of a cold operand to be considered inexpensive."),
73
75 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
76 cl::desc("Gradient gain threshold (%)."),
77 cl::init(25), cl::Hidden);
78
80 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
81 cl::desc("Minimum gain per loop (in cycles) threshold."),
83
85 "select-opti-loop-relative-gain-threshold",
87 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
89
91 "mispredict-default-rate", cl::Hidden, cl::init(25),
92 cl::desc("Default mispredict rate (initialized to 25%)."));
93
94static cl::opt<bool>
95 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
96 cl::init(false),
97 cl::desc("Disable loop-level heuristics."));
98
99namespace {
100
101class SelectOptimizeImpl {
102 const TargetMachine *TM = nullptr;
103 const TargetSubtargetInfo *TSI = nullptr;
104 const TargetLowering *TLI = nullptr;
105 const TargetTransformInfo *TTI = nullptr;
106 const LoopInfo *LI = nullptr;
108 ProfileSummaryInfo *PSI = nullptr;
109 OptimizationRemarkEmitter *ORE = nullptr;
110 TargetSchedModel TSchedModel;
111
112public:
113 SelectOptimizeImpl() = default;
114 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
115 PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
116 bool runOnFunction(Function &F, Pass &P);
117
118 using Scaled64 = ScaledNumber<uint64_t>;
119
120 struct CostInfo {
121 /// Predicated cost (with selects as conditional moves).
122 Scaled64 PredCost;
123 /// Non-predicated cost (with selects converted to branches).
124 Scaled64 NonPredCost;
125 };
126
127 /// SelectLike is an abstraction over SelectInst and other operations that can
128 /// act like selects. For example Or(Zext(icmp), X) can be treated like
129 /// select(icmp, X|1, X).
130 class SelectLike {
131 /// The select (/or) instruction.
132 Instruction *I;
133 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
134 /// opposed to the original condition.
135 bool Inverted = false;
136
137 /// The index of the operand that depends on condition. Only for select-like
138 /// instruction such as Or/Add.
139 unsigned CondIdx;
140
141 public:
142 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
143 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
144
145 Instruction *getI() { return I; }
146 const Instruction *getI() const { return I; }
147
148 Type *getType() const { return I->getType(); }
149
150 unsigned getConditionOpIndex() { return CondIdx; };
151
152 /// Return the true value for the SelectLike instruction. Note this may not
153 /// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
154 /// the true value would be `or(x,1)`. As this value does not exist, nullptr
155 /// is returned.
156 Value *getTrueValue(bool HonorInverts = true) const {
157 if (Inverted && HonorInverts)
158 return getFalseValue(/*HonorInverts=*/false);
159 if (auto *Sel = dyn_cast<SelectInst>(I))
160 return Sel->getTrueValue();
161 // Or(zext) case - The true value is Or(X), so return nullptr as the value
162 // does not yet exist.
163 if (isa<BinaryOperator>(I))
164 return nullptr;
165
166 llvm_unreachable("Unhandled case in getTrueValue");
167 }
168
169 /// Return the false value for the SelectLike instruction. For example the
170 /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
171 /// `select(c, x|1, x)`)
172 Value *getFalseValue(bool HonorInverts = true) const {
173 if (Inverted && HonorInverts)
174 return getTrueValue(/*HonorInverts=*/false);
175 if (auto *Sel = dyn_cast<SelectInst>(I))
176 return Sel->getFalseValue();
177 // We are on the branch where the condition is zero, which means BinOp
178 // does not perform any computation, and we can simply return the operand
179 // that is not related to the condition
180 if (auto *BO = dyn_cast<BinaryOperator>(I))
181 return BO->getOperand(1 - CondIdx);
182
183 llvm_unreachable("Unhandled case in getFalseValue");
184 }
185
186 /// Return the NonPredCost cost of the op on \p isTrue branch, given the
187 /// costs in \p InstCostMap. This may need to be generated for select-like
188 /// instructions.
189 Scaled64 getOpCostOnBranch(
190 bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,
191 const TargetTransformInfo *TTI) {
192 auto *V = IsTrue ? getTrueValue() : getFalseValue();
193 if (V) {
194 if (auto *IV = dyn_cast<Instruction>(V)) {
195 auto It = InstCostMap.find(IV);
196 return It != InstCostMap.end() ? It->second.NonPredCost
198 }
199 return Scaled64::getZero();
200 }
201 // If getTrue(False)Value() return nullptr, it means we are dealing with
202 // select-like instructions on the branch where the actual computation is
203 // happening. In that case the cost is equal to the cost of computation +
204 // cost of non-dependant on condition operand
206 getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency,
207 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
208 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
209 auto TotalCost = Scaled64::get(Cost.getValue());
210 if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
211 auto It = InstCostMap.find(OpI);
212 if (It != InstCostMap.end())
213 TotalCost += It->second.NonPredCost;
214 }
215 return TotalCost;
216 }
217 };
218
219private:
220 // Select groups consist of consecutive select-like instructions with the same
221 // condition. Between select-likes could be any number of auxiliary
222 // instructions related to the condition like not, zext, ashr/lshr
223 struct SelectGroup {
224 Value *Condition;
226 };
227 using SelectGroups = SmallVector<SelectGroup, 2>;
228
229 // Converts select instructions of a function to conditional jumps when deemed
230 // profitable. Returns true if at least one select was converted.
231 bool optimizeSelects(Function &F);
232
233 // Heuristics for determining which select instructions can be profitably
234 // conveted to branches. Separate heuristics for selects in inner-most loops
235 // and the rest of code regions (base heuristics for non-inner-most loop
236 // regions).
237 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
238 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
239
240 // Converts to branches the select groups that were deemed
241 // profitable-to-convert.
242 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
243
244 // Splits selects of a given basic block into select groups.
245 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
246
247 // Determines for which select groups it is profitable converting to branches
248 // (base and inner-most-loop heuristics).
249 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
250 SelectGroups &ProfSIGroups);
251 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
252 SelectGroups &ProfSIGroups);
253
254 // Determines if a select group should be converted to a branch (base
255 // heuristics).
256 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
257
258 // Returns true if there are expensive instructions in the cold value
259 // operand's (if any) dependence slice of any of the selects of the given
260 // group.
261 bool hasExpensiveColdOperand(const SelectGroup &ASI);
262
263 // For a given source instruction, collect its backwards dependence slice
264 // consisting of instructions exclusively computed for producing the operands
265 // of the source instruction.
266 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
267 Instruction *SI, bool ForSinking = false);
268
269 // Returns true if the condition of the select is highly predictable.
270 bool isSelectHighlyPredictable(const SelectLike SI);
271
272 // Loop-level checks to determine if a non-predicated version (with branches)
273 // of the given loop is more profitable than its predicated version.
274 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
275
276 // Computes instruction and loop-critical-path costs for both the predicated
277 // and non-predicated version of the given loop.
278 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
279 DenseMap<const Instruction *, CostInfo> &InstCostMap,
280 CostInfo *LoopCost);
281
282 // Returns a set of all the select instructions in the given select groups.
283 SmallDenseMap<const Instruction *, SelectLike, 2>
284 getSImap(const SelectGroups &SIGroups);
285
286 // Returns a map from select-like instructions to the corresponding select
287 // group.
288 SmallDenseMap<const Instruction *, const SelectGroup *, 2>
289 getSGmap(const SelectGroups &SIGroups);
290
291 // Returns the latency cost of a given instruction.
292 std::optional<uint64_t> computeInstCost(const Instruction *I);
293
294 // Returns the misprediction cost of a given select when converted to branch.
295 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
296
297 // Returns the cost of a branch when the prediction is correct.
298 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299 const SelectLike SI);
300
301 // Returns true if the target architecture supports lowering a given select.
302 bool isSelectKindSupported(const SelectLike SI);
303};
304
305class SelectOptimize : public FunctionPass {
306 SelectOptimizeImpl Impl;
307
308public:
309 static char ID;
310
311 SelectOptimize() : FunctionPass(ID) {}
312
313 bool runOnFunction(Function &F) override {
314 if (skipFunction(F))
315 return false;
316
317 return Impl.runOnFunction(F, *this);
318 }
319
320 void getAnalysisUsage(AnalysisUsage &AU) const override {
321 AU.addRequired<ProfileSummaryInfoWrapperPass>();
322 AU.addRequired<TargetPassConfig>();
323 AU.addRequired<TargetTransformInfoWrapperPass>();
324 AU.addRequired<LoopInfoWrapperPass>();
325 AU.addRequired<BlockFrequencyInfoWrapperPass>();
326 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
327 }
328};
329
330} // namespace
331
334 SelectOptimizeImpl Impl(TM);
335 return Impl.run(F, FAM);
336}
337
338char SelectOptimize::ID = 0;
339
340INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
341 false)
348INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
349 false)
350
351FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
352
353PreservedAnalyses SelectOptimizeImpl::run(Function &F,
355 TSI = TM->getSubtargetImpl(F);
356 TLI = TSI->getTargetLowering();
357
358 // If none of the select types are supported then skip this pass.
359 // This is an optimization pass. Legality issues will be handled by
360 // instruction selection.
364 return PreservedAnalyses::all();
365
366 TTI = &FAM.getResult<TargetIRAnalysis>(F);
367 if (!TTI->enableSelectOptimize())
368 return PreservedAnalyses::all();
369
371 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
372 assert(PSI && "This pass requires module analysis pass `profile-summary`!");
373 BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
374
375 // When optimizing for size, selects are preferable over branches.
376 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
377 return PreservedAnalyses::all();
378
379 LI = &FAM.getResult<LoopAnalysis>(F);
380 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
381 TSchedModel.init(TSI);
382
383 bool Changed = optimizeSelects(F);
385}
386
387bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
388 TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
389 TSI = TM->getSubtargetImpl(F);
390 TLI = TSI->getTargetLowering();
391
392 // If none of the select types are supported then skip this pass.
393 // This is an optimization pass. Legality issues will be handled by
394 // instruction selection.
398 return false;
399
400 TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
401
402 if (!TTI->enableSelectOptimize())
403 return false;
404
405 LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
406 BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
407 PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
408 ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
409 TSchedModel.init(TSI);
410
411 // When optimizing for size, selects are preferable over branches.
412 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
413 return false;
414
415 return optimizeSelects(F);
416}
417
418bool SelectOptimizeImpl::optimizeSelects(Function &F) {
419 // Determine for which select groups it is profitable converting to branches.
420 SelectGroups ProfSIGroups;
421 // Base heuristics apply only to non-loops and outer loops.
422 optimizeSelectsBase(F, ProfSIGroups);
423 // Separate heuristics for inner-most loops.
424 optimizeSelectsInnerLoops(F, ProfSIGroups);
425
426 // Convert to branches the select groups that were deemed
427 // profitable-to-convert.
428 convertProfitableSIGroups(ProfSIGroups);
429
430 // Code modified if at least one select group was converted.
431 return !ProfSIGroups.empty();
432}
433
434void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
435 SelectGroups &ProfSIGroups) {
436 // Collect all the select groups.
437 SelectGroups SIGroups;
438 for (BasicBlock &BB : F) {
439 // Base heuristics apply only to non-loops and outer loops.
440 Loop *L = LI->getLoopFor(&BB);
441 if (L && L->isInnermost())
442 continue;
443 collectSelectGroups(BB, SIGroups);
444 }
445
446 // Determine for which select groups it is profitable converting to branches.
447 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
448}
449
450void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
451 SelectGroups &ProfSIGroups) {
453 // Need to check size on each iteration as we accumulate child loops.
454 for (unsigned long i = 0; i < Loops.size(); ++i)
455 llvm::append_range(Loops, Loops[i]->getSubLoops());
456
457 for (Loop *L : Loops) {
458 if (!L->isInnermost())
459 continue;
460
461 SelectGroups SIGroups;
462 for (BasicBlock *BB : L->getBlocks())
463 collectSelectGroups(*BB, SIGroups);
464
465 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
466 }
467}
468
469/// Returns optimised value on \p IsTrue branch. For SelectInst that would be
470/// either True or False value. For (BinaryOperator) instructions, where the
471/// condition may be skipped, the operation will use a non-conditional operand.
472/// For example, for `or(V,zext(cond))` this function would return V.
473/// However, if the conditional operand on \p IsTrue branch matters, we create a
474/// clone of instruction at the end of that branch \p B and replace the
475/// condition operand with a constant.
476///
477/// Also /p OptSelects contains previously optimised select-like instructions.
478/// If the current value uses one of the optimised values, we can optimise it
479/// further by replacing it with the corresponding value on the given branch
481 SelectOptimizeImpl::SelectLike &SI, bool isTrue,
482 SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects,
483 BasicBlock *B) {
484 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
485 if (V) {
486 if (auto *IV = dyn_cast<Instruction>(V))
487 if (auto It = OptSelects.find(IV); It != OptSelects.end())
488 return isTrue ? It->second.first : It->second.second;
489 return V;
490 }
491
492 auto *BO = cast<BinaryOperator>(SI.getI());
493 assert((BO->getOpcode() == Instruction::Add ||
494 BO->getOpcode() == Instruction::Or ||
495 BO->getOpcode() == Instruction::Sub) &&
496 "Only currently handling Add, Or and Sub binary operators.");
497
498 auto *CBO = BO->clone();
499 auto CondIdx = SI.getConditionOpIndex();
500 auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
501 if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
502 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
503 } else {
504 assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
505 "Unexpected opcode");
506 CBO->setOperand(CondIdx, ConstantInt::getAllOnesValue(CBO->getType()));
507 }
508
509 unsigned OtherIdx = 1 - CondIdx;
510 if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
511 if (auto It = OptSelects.find(IV); It != OptSelects.end())
512 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
513 }
514 CBO->insertBefore(B->getTerminator()->getIterator());
515 return CBO;
516}
517
518void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
519 for (SelectGroup &ASI : ProfSIGroups) {
520 // The code transformation here is a modified version of the sinking
521 // transformation in CodeGenPrepare::optimizeSelectInst with a more
522 // aggressive strategy of which instructions to sink.
523 //
524 // TODO: eliminate the redundancy of logic transforming selects to branches
525 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
526 // selects for all cases (with and without profile information).
527
528 // Transform a sequence like this:
529 // start:
530 // %cmp = cmp uge i32 %a, %b
531 // %sel = select i1 %cmp, i32 %c, i32 %d
532 //
533 // Into:
534 // start:
535 // %cmp = cmp uge i32 %a, %b
536 // %cmp.frozen = freeze %cmp
537 // br i1 %cmp.frozen, label %select.true, label %select.false
538 // select.true:
539 // br label %select.end
540 // select.false:
541 // br label %select.end
542 // select.end:
543 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
544 //
545 // %cmp should be frozen, otherwise it may introduce undefined behavior.
546 // In addition, we may sink instructions that produce %c or %d into the
547 // destination(s) of the new branch.
548 // If the true or false blocks do not contain a sunken instruction, that
549 // block and its branch may be optimized away. In that case, one side of the
550 // first branch will point directly to select.end, and the corresponding PHI
551 // predecessor block will be the start block.
552
553 // Find all the instructions that can be soundly sunk to the true/false
554 // blocks. These are instructions that are computed solely for producing the
555 // operands of the select instructions in the group and can be sunk without
556 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
557 // with side effects).
558 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
559 typedef std::stack<Instruction *>::size_type StackSizeType;
560 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
561 for (SelectLike &SI : ASI.Selects) {
562 if (!isa<SelectInst>(SI.getI()))
563 continue;
564 // For each select, compute the sinkable dependence chains of the true and
565 // false operands.
566 if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) {
567 std::stack<Instruction *> TrueSlice;
568 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
569 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
570 TrueSlices.push_back(TrueSlice);
571 }
572 if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) {
573 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) {
574 std::stack<Instruction *> FalseSlice;
575 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
576 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
577 FalseSlices.push_back(FalseSlice);
578 }
579 }
580 }
581 // In the case of multiple select instructions in the same group, the order
582 // of non-dependent instructions (instructions of different dependence
583 // slices) in the true/false blocks appears to affect performance.
584 // Interleaving the slices seems to experimentally be the optimal approach.
585 // This interleaving scheduling allows for more ILP (with a natural downside
586 // of increasing a bit register pressure) compared to a simple ordering of
587 // one whole chain after another. One would expect that this ordering would
588 // not matter since the scheduling in the backend of the compiler would
589 // take care of it, but apparently the scheduler fails to deliver optimal
590 // ILP with a naive ordering here.
591 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
592 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
593 for (auto &S : TrueSlices) {
594 if (!S.empty()) {
595 TrueSlicesInterleaved.push_back(S.top());
596 S.pop();
597 }
598 }
599 }
600 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
601 for (auto &S : FalseSlices) {
602 if (!S.empty()) {
603 FalseSlicesInterleaved.push_back(S.top());
604 S.pop();
605 }
606 }
607 }
608
609 // We split the block containing the select(s) into two blocks.
610 SelectLike &SI = ASI.Selects.front();
611 SelectLike &LastSI = ASI.Selects.back();
612 BasicBlock *StartBlock = SI.getI()->getParent();
613 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
614 // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
615 // RemoveDIs turned on, SplitPt would instead point to the next
616 // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
617 // tell splitBasicBlock that we want to include any DbgVariableRecords
618 // attached to SplitPt in the splice.
619 SplitPt.setHeadBit(true);
620 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
621 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
622 // Delete the unconditional branch that was just created by the split.
623 StartBlock->getTerminator()->eraseFromParent();
624
625 // Move any debug/pseudo and auxiliary instructions that were in-between the
626 // select group to the newly-created end block.
628 auto DIt = SI.getI()->getIterator();
629 auto NIt = ASI.Selects.begin();
630 while (&*DIt != LastSI.getI()) {
631 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
632 ++NIt;
633 else
634 SinkInstrs.push_back(&*DIt);
635 DIt++;
636 }
637 auto InsertionPoint = EndBlock->getFirstInsertionPt();
638 for (auto *DI : SinkInstrs)
639 DI->moveBeforePreserving(InsertionPoint);
640
641 // Duplicate implementation for DbgRecords, the non-instruction debug-info
642 // format. Helper lambda for moving DbgRecords to the end block.
643 auto TransferDbgRecords = [&](Instruction &I) {
644 for (auto &DbgRecord :
645 llvm::make_early_inc_range(I.getDbgRecordRange())) {
648 EndBlock->getFirstInsertionPt());
649 }
650 };
651
652 // Iterate over all instructions in between SI and LastSI, not including
653 // SI itself. These are all the variable assignments that happen "in the
654 // middle" of the select group.
655 auto R = make_range(std::next(SI.getI()->getIterator()),
656 std::next(LastSI.getI()->getIterator()));
657 llvm::for_each(R, TransferDbgRecords);
658
659 // These are the new basic blocks for the conditional branch.
660 // At least one will become an actual new basic block.
661 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
662 BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
663 // Checks if select-like instruction would materialise on the given branch
664 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
665 for (auto &SL : SG.Selects) {
666 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
667 return true;
668 }
669 return false;
670 };
671 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
672 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
673 EndBlock->getParent(), EndBlock);
674 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
675 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
676 for (Instruction *TrueInst : TrueSlicesInterleaved)
677 TrueInst->moveBefore(TrueBranch->getIterator());
678 }
679 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
680 FalseBlock =
681 BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
682 EndBlock->getParent(), EndBlock);
683 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
684 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
685 for (Instruction *FalseInst : FalseSlicesInterleaved)
686 FalseInst->moveBefore(FalseBranch->getIterator());
687 }
688 // If there was nothing to sink, then arbitrarily choose the 'false' side
689 // for a new input value to the PHI.
690 if (TrueBlock == FalseBlock) {
691 assert(TrueBlock == nullptr &&
692 "Unexpected basic block transform while optimizing select");
693
694 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
695 EndBlock->getParent(), EndBlock);
696 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
697 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
698 }
699
700 // Insert the real conditional branch based on the original condition.
701 // If we did not create a new block for one of the 'true' or 'false' paths
702 // of the condition, it means that side of the branch goes to the end block
703 // directly and the path originates from the start block from the point of
704 // view of the new PHI.
705 BasicBlock *TT, *FT;
706 if (TrueBlock == nullptr) {
707 TT = EndBlock;
708 FT = FalseBlock;
709 TrueBlock = StartBlock;
710 } else if (FalseBlock == nullptr) {
711 TT = TrueBlock;
712 FT = EndBlock;
713 FalseBlock = StartBlock;
714 } else {
715 TT = TrueBlock;
716 FT = FalseBlock;
717 }
718 IRBuilder<> IB(SI.getI());
719 auto *CondFr =
720 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
721
723
724 // Use reverse iterator because later select may use the value of the
725 // earlier select, and we need to propagate value through earlier select
726 // to get the PHI operand.
727 InsertionPoint = EndBlock->begin();
728 for (SelectLike &SI : ASI.Selects) {
729 // The select itself is replaced with a PHI Node.
730 PHINode *PN = PHINode::Create(SI.getType(), 2, "");
731 PN->insertBefore(InsertionPoint);
732 PN->takeName(SI.getI());
733 // Current instruction might be a condition of some other group, so we
734 // need to replace it there to avoid dangling pointer
735 if (PN->getType()->isIntegerTy(1)) {
736 for (auto &SG : ProfSIGroups) {
737 if (SG.Condition == SI.getI())
738 SG.Condition = PN;
739 }
740 }
741 SI.getI()->replaceAllUsesWith(PN);
742 auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock);
743 auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock);
744 INS[PN] = {TV, FV};
745 PN->addIncoming(TV, TrueBlock);
746 PN->addIncoming(FV, FalseBlock);
747 PN->setDebugLoc(SI.getI()->getDebugLoc());
748 ++NumSelectsConverted;
749 }
750 IB.CreateCondBr(CondFr, TT, FT, SI.getI());
751
752 // Remove the old select instructions, now that they are not longer used.
753 for (SelectLike &SI : ASI.Selects)
754 SI.getI()->eraseFromParent();
755 }
756}
757
758void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
759 SelectGroups &SIGroups) {
760 // Represents something that can be considered as select instruction.
761 // Auxiliary instruction are instructions that depends on a condition and have
762 // zero or some constant value on True/False branch, such as:
763 // * ZExt(1bit)
764 // * SExt(1bit)
765 // * Not(1bit)
766 // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
767 // earlier in the BB. For conditions that check the sign of the Val compiler
768 // may generate shifts instead of ZExt/SExt.
769 struct SelectLikeInfo {
770 Value *Cond;
771 bool IsAuxiliary;
772 bool IsInverted;
773 unsigned ConditionIdx;
774 };
775
777 // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
778 // instructions.
780
781 // Check if the instruction is SelectLike or might be part of SelectLike
782 // expression, put information into SelectInfo and return the iterator to the
783 // inserted position.
784 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
785 if (auto *Cmp = dyn_cast<CmpInst>(I)) {
786 SeenCmp.insert(Cmp);
787 return SelectInfo.end();
788 }
789
790 Value *Cond;
792 Cond->getType()->isIntegerTy(1)) {
793 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
794 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
795 }
796
797 if (match(I, m_Not(m_Value(Cond)))) {
798 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
799 }
800
801 // Select instruction are what we are usually looking for.
802 if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) {
803 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
804 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
805 }
806 Value *Val;
807 ConstantInt *Shift;
808 if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
809 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
810 for (auto *CmpI : SeenCmp) {
811 auto Pred = CmpI->getPredicate();
812 if (Val != CmpI->getOperand(0))
813 continue;
814 if ((Pred == CmpInst::ICMP_SGT &&
815 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
816 (Pred == CmpInst::ICMP_SGE &&
817 match(CmpI->getOperand(1), m_Zero())) ||
818 (Pred == CmpInst::ICMP_SLT &&
819 match(CmpI->getOperand(1), m_Zero())) ||
820 (Pred == CmpInst::ICMP_SLE &&
821 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
822 bool Inverted =
823 Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
824 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
825 }
826 }
827 return SelectInfo.end();
828 }
829
830 // An BinOp(Aux(X), Y) can also be treated like a select, with condition X
831 // and values Y|1 and Y.
832 // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize
833 // - 1` `BinOp` can be Add, Sub, Or
834 Value *X;
835 auto MatchZExtOrSExtPattern =
837 auto MatchShiftPattern =
839
840 // This check is unnecessary, but it prevents costly access to the
841 // SelectInfo map.
842 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
843 (match(I, MatchShiftPattern) &&
844 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
845 if (I->getOpcode() != Instruction::Add &&
846 I->getOpcode() != Instruction::Sub &&
847 I->getOpcode() != Instruction::Or)
848 return SelectInfo.end();
849
850 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
851 return SelectInfo.end();
852
853 // Iterate through operands and find dependant on recognised sign
854 // extending auxiliary select-like instructions. The operand index does
855 // not matter for Add and Or. However, for Sub, we can only safely
856 // transform when the operand is second.
857 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
858 for (; Idx < 2; Idx++) {
859 auto *Op = I->getOperand(Idx);
860 auto It = SelectInfo.find(Op);
861 if (It != SelectInfo.end() && It->second.IsAuxiliary) {
862 Cond = It->second.Cond;
863 bool Inverted = It->second.IsInverted;
864 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
865 }
866 }
867 }
868 return SelectInfo.end();
869 };
870
871 bool AlreadyProcessed = false;
872 BasicBlock::iterator BBIt = BB.begin();
874 while (BBIt != BB.end()) {
875 Instruction *I = &*BBIt++;
876 if (I->isDebugOrPseudoInst())
877 continue;
878
879 if (!AlreadyProcessed)
880 It = ProcessSelectInfo(I);
881 else
882 AlreadyProcessed = false;
883
884 if (It == SelectInfo.end() || It->second.IsAuxiliary)
885 continue;
886
887 if (!TTI->shouldTreatInstructionLikeSelect(I))
888 continue;
889
890 Value *Cond = It->second.Cond;
891 // Vector conditions are not supported.
892 if (!Cond->getType()->isIntegerTy(1))
893 continue;
894
895 SelectGroup SIGroup = {Cond, {}};
896 SIGroup.Selects.emplace_back(I, It->second.IsInverted,
897 It->second.ConditionIdx);
898
899 // If the select type is not supported, no point optimizing it.
900 // Instruction selection will take care of it.
901 if (!isSelectKindSupported(SIGroup.Selects.front()))
902 continue;
903
904 while (BBIt != BB.end()) {
905 Instruction *NI = &*BBIt;
906 // Debug/pseudo instructions should be skipped and not prevent the
907 // formation of a select group.
908 if (NI->isDebugOrPseudoInst()) {
909 ++BBIt;
910 continue;
911 }
912
913 It = ProcessSelectInfo(NI);
914 if (It == SelectInfo.end()) {
915 AlreadyProcessed = true;
916 break;
917 }
918
919 // Auxiliary with same condition
920 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
921 if (Cond != CurrCond) {
922 AlreadyProcessed = true;
923 break;
924 }
925
926 if (!IsAux)
927 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
928 ++BBIt;
929 }
930 LLVM_DEBUG({
931 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
932 for (auto &SI : SIGroup.Selects)
933 dbgs() << " " << *SI.getI() << "\n";
934 });
935
936 SIGroups.push_back(SIGroup);
937 }
938}
939
940void SelectOptimizeImpl::findProfitableSIGroupsBase(
941 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
942 for (SelectGroup &ASI : SIGroups) {
943 ++NumSelectOptAnalyzed;
944 if (isConvertToBranchProfitableBase(ASI))
945 ProfSIGroups.push_back(ASI);
946 }
947}
948
951 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
952 ORE->emit(Rem);
953}
954
955void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
956 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
957 NumSelectOptAnalyzed += SIGroups.size();
958 // For each select group in an inner-most loop,
959 // a branch is more preferable than a select/conditional-move if:
960 // i) conversion to branches for all the select groups of the loop satisfies
961 // loop-level heuristics including reducing the loop's critical path by
962 // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
963 // ii) the total cost of the select group is cheaper with a branch compared
964 // to its predicated version. The cost is in terms of latency and the cost
965 // of a select group is the cost of its most expensive select instruction
966 // (assuming infinite resources and thus fully leveraging available ILP).
967
969 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
970 {Scaled64::getZero(), Scaled64::getZero()}};
971 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
972 !checkLoopHeuristics(L, LoopCost)) {
973 return;
974 }
975
976 for (SelectGroup &ASI : SIGroups) {
977 // Assuming infinite resources, the cost of a group of instructions is the
978 // cost of the most expensive instruction of the group.
979 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
980 for (SelectLike &SI : ASI.Selects) {
981 const auto &ICM = InstCostMap[SI.getI()];
982 SelectCost = std::max(SelectCost, ICM.PredCost);
983 BranchCost = std::max(BranchCost, ICM.NonPredCost);
984 }
985 if (BranchCost < SelectCost) {
986 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti",
987 ASI.Selects.front().getI());
988 OR << "Profitable to convert to branch (loop analysis). BranchCost="
989 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
990 << ". ";
991 EmitAndPrintRemark(ORE, OR);
992 ++NumSelectConvertedLoop;
993 ProfSIGroups.push_back(ASI);
994 } else {
995 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
996 ASI.Selects.front().getI());
997 ORmiss << "Select is more profitable (loop analysis). BranchCost="
998 << BranchCost.toString()
999 << ", SelectCost=" << SelectCost.toString() << ". ";
1000 EmitAndPrintRemark(ORE, ORmiss);
1001 }
1002 }
1003}
1004
1005bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1006 const SelectGroup &ASI) {
1007 const SelectLike &SI = ASI.Selects.front();
1008 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1009 << "\n");
1010 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
1011 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
1012
1013 // Skip cold basic blocks. Better to optimize for size for cold blocks.
1014 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
1015 ++NumSelectColdBB;
1016 ORmiss << "Not converted to branch because of cold basic block. ";
1017 EmitAndPrintRemark(ORE, ORmiss);
1018 return false;
1019 }
1020
1021 // If unpredictable, branch form is less profitable.
1022 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1023 ++NumSelectUnPred;
1024 ORmiss << "Not converted to branch because of unpredictable branch. ";
1025 EmitAndPrintRemark(ORE, ORmiss);
1026 return false;
1027 }
1028
1029 // If highly predictable, branch form is more profitable, unless a
1030 // predictable select is inexpensive in the target architecture.
1031 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
1032 ++NumSelectConvertedHighPred;
1033 OR << "Converted to branch because of highly predictable branch. ";
1034 EmitAndPrintRemark(ORE, OR);
1035 return true;
1036 }
1037
1038 // Look for expensive instructions in the cold operand's (if any) dependence
1039 // slice of any of the selects in the group.
1040 if (hasExpensiveColdOperand(ASI)) {
1041 ++NumSelectConvertedExpColdOperand;
1042 OR << "Converted to branch because of expensive cold operand.";
1043 EmitAndPrintRemark(ORE, OR);
1044 return true;
1045 }
1046
1047 // If latch has a select group with several elements, it is usually profitable
1048 // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if
1049 // conversion is profitable for innermost loops.
1050 auto *BB = SI.getI()->getParent();
1051 auto *L = LI->getLoopFor(BB);
1052 if (L && !L->isInnermost() && L->getLoopLatch() == BB &&
1053 ASI.Selects.size() >= 3) {
1054 OR << "Converted to branch because select group in the latch block is big.";
1055 EmitAndPrintRemark(ORE, OR);
1056 return true;
1057 }
1058
1059 ORmiss << "Not profitable to convert to branch (base heuristic).";
1060 EmitAndPrintRemark(ORE, ORmiss);
1061 return false;
1062}
1063
1065 uint64_t Denominator) {
1066 return (Numerator + (Denominator / 2)) / Denominator;
1067}
1068
1069static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI,
1070 uint64_t &TrueVal, uint64_t &FalseVal) {
1071 if (isa<SelectInst>(SI.getI()))
1072 return extractBranchWeights(*SI.getI(), TrueVal, FalseVal);
1073 return false;
1074}
1075
1076bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
1077 bool ColdOperand = false;
1078 uint64_t TrueWeight, FalseWeight, TotalWeight;
1079 if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) {
1080 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1081 TotalWeight = TrueWeight + FalseWeight;
1082 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
1083 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
1084 } else if (PSI->hasProfileSummary()) {
1085 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1086 ASI.Selects.front().getI());
1087 ORmiss << "Profile data available but missing branch-weights metadata for "
1088 "select instruction. ";
1089 EmitAndPrintRemark(ORE, ORmiss);
1090 }
1091 if (!ColdOperand)
1092 return false;
1093 // Check if the cold path's dependence slice is expensive for any of the
1094 // selects of the group.
1095 for (SelectLike SI : ASI.Selects) {
1096 Instruction *ColdI = nullptr;
1097 uint64_t HotWeight;
1098 if (TrueWeight < FalseWeight) {
1099 ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue());
1100 HotWeight = FalseWeight;
1101 } else {
1102 ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue());
1103 HotWeight = TrueWeight;
1104 }
1105 if (ColdI) {
1106 std::stack<Instruction *> ColdSlice;
1107 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
1108 InstructionCost SliceCost = 0;
1109 while (!ColdSlice.empty()) {
1110 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
1112 ColdSlice.pop();
1113 }
1114 // The colder the cold value operand of the select is the more expensive
1115 // the cmov becomes for computing the cold value operand every time. Thus,
1116 // the colder the cold operand is the more its cost counts.
1117 // Get nearest integer cost adjusted for coldness.
1118 InstructionCost AdjSliceCost =
1119 divideNearest(SliceCost * HotWeight, TotalWeight);
1120 if (AdjSliceCost >=
1122 return true;
1123 }
1124 }
1125 return false;
1126}
1127
1128// Check if it is safe to move LoadI next to the SI.
1129// Conservatively assume it is safe only if there is no instruction
1130// modifying memory in-between the load and the select instruction.
1132 // Assume loads from different basic blocks are unsafe to move.
1133 if (LoadI->getParent() != SI->getParent())
1134 return false;
1135 auto It = LoadI->getIterator();
1136 while (&*It != SI) {
1137 if (It->mayWriteToMemory())
1138 return false;
1139 It++;
1140 }
1141 return true;
1142}
1143
1144// For a given source instruction, collect its backwards dependence slice
1145// consisting of instructions exclusively computed for the purpose of producing
1146// the operands of the source instruction. As an approximation
1147// (sufficiently-accurate in practice), we populate this set with the
1148// instructions of the backwards dependence slice that only have one-use and
1149// form an one-use chain that leads to the source instruction.
1150void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
1151 std::stack<Instruction *> &Slice,
1152 Instruction *SI,
1153 bool ForSinking) {
1155 std::queue<Instruction *> Worklist;
1156 Worklist.push(I);
1157 while (!Worklist.empty()) {
1158 Instruction *II = Worklist.front();
1159 Worklist.pop();
1160
1161 // Avoid cycles.
1162 if (!Visited.insert(II).second)
1163 continue;
1164
1165 if (!II->hasOneUse())
1166 continue;
1167
1168 // Cannot soundly sink instructions with side-effects.
1169 // Terminator or phi instructions cannot be sunk.
1170 // Avoid sinking other select instructions (should be handled separetely).
1171 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
1173 continue;
1174
1175 // Avoid sinking loads in order not to skip state-modifying instructions,
1176 // that may alias with the loaded address.
1177 // Only allow sinking of loads within the same basic block that are
1178 // conservatively proven to be safe.
1179 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
1180 continue;
1181
1182 // Avoid considering instructions with less frequency than the source
1183 // instruction (i.e., avoid colder code regions of the dependence slice).
1184 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
1185 continue;
1186
1187 // Eligible one-use instruction added to the dependence slice.
1188 Slice.push(II);
1189
1190 // Explore all the operands of the current instruction to expand the slice.
1191 for (Value *Op : II->operand_values())
1192 if (auto *OpI = dyn_cast<Instruction>(Op))
1193 Worklist.push(OpI);
1194 }
1195}
1196
1197bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
1198 uint64_t TrueWeight, FalseWeight;
1199 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1200 uint64_t Max = std::max(TrueWeight, FalseWeight);
1201 uint64_t Sum = TrueWeight + FalseWeight;
1202 if (Sum != 0) {
1203 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
1204 if (Probability > TTI->getPredictableBranchThreshold())
1205 return true;
1206 }
1207 }
1208 return false;
1209}
1210
1211bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1212 const CostInfo LoopCost[2]) {
1213 // Loop-level checks to determine if a non-predicated version (with branches)
1214 // of the loop is more profitable than its predicated version.
1215
1217 return true;
1218
1219 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
1220 &*L->getHeader()->getFirstNonPHIIt());
1221
1222 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1223 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1224 ORmissL << "No select conversion in the loop due to no reduction of loop's "
1225 "critical path. ";
1226 EmitAndPrintRemark(ORE, ORmissL);
1227 return false;
1228 }
1229
1230 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1231 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1232
1233 // Profitably converting to branches need to reduce the loop's critical path
1234 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
1235 // relative gain of 12.5%).
1236 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
1237 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
1238 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1239 ORmissL << "No select conversion in the loop due to small reduction of "
1240 "loop's critical path. Gain="
1241 << Gain[1].toString()
1242 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
1243 EmitAndPrintRemark(ORE, ORmissL);
1244 return false;
1245 }
1246
1247 // If the loop's critical path involves loop-carried dependences, the gradient
1248 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
1249 // This check ensures that the latency reduction for the loop's critical path
1250 // keeps decreasing with sufficient rate beyond the two analyzed loop
1251 // iterations.
1252 if (Gain[1] > Gain[0]) {
1253 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1254 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1255 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
1256 ORmissL << "No select conversion in the loop due to small gradient gain. "
1257 "GradientGain="
1258 << GradientGain.toString() << "%. ";
1259 EmitAndPrintRemark(ORE, ORmissL);
1260 return false;
1261 }
1262 }
1263 // If the gain decreases it is not profitable to convert.
1264 else if (Gain[1] < Gain[0]) {
1265 ORmissL
1266 << "No select conversion in the loop due to negative gradient gain. ";
1267 EmitAndPrintRemark(ORE, ORmissL);
1268 return false;
1269 }
1270
1271 // Non-predicated version of the loop is more profitable than its
1272 // predicated version.
1273 return true;
1274}
1275
1276// Computes instruction and loop-critical-path costs for both the predicated
1277// and non-predicated version of the given loop.
1278// Returns false if unable to compute these costs due to invalid cost of loop
1279// instruction(s).
1280bool SelectOptimizeImpl::computeLoopCosts(
1281 const Loop *L, const SelectGroups &SIGroups,
1282 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
1283 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
1284 << L->getHeader()->getName() << "\n");
1285 const auto SImap = getSImap(SIGroups);
1286 const auto SGmap = getSGmap(SIGroups);
1287 // Compute instruction and loop-critical-path costs across two iterations for
1288 // both predicated and non-predicated version.
1289 const unsigned Iterations = 2;
1290 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1291 // Cost of the loop's critical path.
1292 CostInfo &MaxCost = LoopCost[Iter];
1293 for (BasicBlock *BB : L->getBlocks()) {
1294 for (const Instruction &I : *BB) {
1295 if (I.isDebugOrPseudoInst())
1296 continue;
1297 // Compute the predicated and non-predicated cost of the instruction.
1298 Scaled64 IPredCost = Scaled64::getZero(),
1299 INonPredCost = Scaled64::getZero();
1300
1301 // Assume infinite resources that allow to fully exploit the available
1302 // instruction-level parallelism.
1303 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
1304 for (const Use &U : I.operands()) {
1305 auto UI = dyn_cast<Instruction>(U.get());
1306 if (!UI)
1307 continue;
1308 if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) {
1309 IPredCost = std::max(IPredCost, It->second.PredCost);
1310 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1311 }
1312 }
1313 auto ILatency = computeInstCost(&I);
1314 if (!ILatency) {
1315 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
1316 ORmissL << "Invalid instruction cost preventing analysis and "
1317 "optimization of the inner-most loop containing this "
1318 "instruction. ";
1319 EmitAndPrintRemark(ORE, ORmissL);
1320 return false;
1321 }
1322 IPredCost += Scaled64::get(*ILatency);
1323 INonPredCost += Scaled64::get(*ILatency);
1324
1325 // For a select that can be converted to branch,
1326 // compute its cost as a branch (non-predicated cost).
1327 //
1328 // BranchCost = PredictedPathCost + MispredictCost
1329 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
1330 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
1331 if (auto It = SImap.find(&I); It != SImap.end()) {
1332 auto SI = It->second;
1333 const auto *SG = SGmap.at(&I);
1334 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1335 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1336 Scaled64 PredictedPathCost =
1337 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1338
1339 Scaled64 CondCost = Scaled64::getZero();
1340 if (auto *CI = dyn_cast<Instruction>(SG->Condition))
1341 if (auto It = InstCostMap.find(CI); It != InstCostMap.end())
1342 CondCost = It->second.NonPredCost;
1343 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1344
1345 INonPredCost = PredictedPathCost + MispredictCost;
1346 }
1347 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1348 << INonPredCost << " for " << I << "\n");
1349
1350 InstCostMap[&I] = {IPredCost, INonPredCost};
1351 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1352 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1353 }
1354 }
1355 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
1356 << " MaxCost = " << MaxCost.PredCost << " "
1357 << MaxCost.NonPredCost << "\n");
1358 }
1359 return true;
1360}
1361
1363SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1365 for (const SelectGroup &ASI : SIGroups)
1366 for (const SelectLike &SI : ASI.Selects)
1367 SImap.try_emplace(SI.getI(), SI);
1368 return SImap;
1369}
1370
1372SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1374 for (const SelectGroup &ASI : SIGroups)
1375 for (const SelectLike &SI : ASI.Selects)
1376 SImap.try_emplace(SI.getI(), &ASI);
1377 return SImap;
1378}
1379
1380std::optional<uint64_t>
1381SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1382 InstructionCost ICost =
1383 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
1384 if (ICost.isValid())
1385 return std::optional<uint64_t>(ICost.getValue());
1386 return std::nullopt;
1387}
1388
1390SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1391 const Scaled64 CondCost) {
1392 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1393
1394 // Account for the default misprediction rate when using a branch
1395 // (conservatively set to 25% by default).
1396 uint64_t MispredictRate = MispredictDefaultRate;
1397 // If the select condition is obviously predictable, then the misprediction
1398 // rate is zero.
1399 if (isSelectHighlyPredictable(SI))
1400 MispredictRate = 0;
1401
1402 // CondCost is included to account for cases where the computation of the
1403 // condition is part of a long dependence chain (potentially loop-carried)
1404 // that would delay detection of a misprediction and increase its cost.
1405 Scaled64 MispredictCost =
1406 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1407 Scaled64::get(MispredictRate);
1408 MispredictCost /= Scaled64::get(100);
1409
1410 return MispredictCost;
1411}
1412
1413// Returns the cost of a branch when the prediction is correct.
1414// TrueCost * TrueProbability + FalseCost * FalseProbability.
1416SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1417 const SelectLike SI) {
1418 Scaled64 PredPathCost;
1419 uint64_t TrueWeight, FalseWeight;
1420 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1421 uint64_t SumWeight = TrueWeight + FalseWeight;
1422 if (SumWeight != 0) {
1423 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1424 FalseCost * Scaled64::get(FalseWeight);
1425 PredPathCost /= Scaled64::get(SumWeight);
1426 return PredPathCost;
1427 }
1428 }
1429 // Without branch weight metadata, we assume 75% for the one path and 25% for
1430 // the other, and pick the result with the biggest cost.
1431 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1432 FalseCost * Scaled64::get(3) + TrueCost);
1433 PredPathCost /= Scaled64::get(4);
1434 return PredPathCost;
1435}
1436
1437bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
1439 if (SI.getType()->isVectorTy())
1441 else
1443 return TLI->isSelectSupported(SelectKind);
1444}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Hexagon Hardware Loops
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:598
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static const uint32_t IV[8]
Definition blake3_impl.h:83
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition MathExtras.h:458
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
unsigned MispredictPenalty
Definition MCSchedule.h:311