LLVM 17.0.0git
SampleProfileLoaderBaseImpl.h
Go to the documentation of this file.
1////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides the interface for the sampled PGO profile loader base
11/// implementation.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/DenseSet.h"
23#include "llvm/ADT/SmallSet.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Module.h"
44
45namespace llvm {
46using namespace sampleprof;
47using namespace sampleprofutil;
49
50namespace vfs {
51class FileSystem;
52} // namespace vfs
53
54#define DEBUG_TYPE "sample-profile-impl"
55
56namespace afdo_detail {
57
58template <typename BlockT> struct IRTraits;
59template <> struct IRTraits<BasicBlock> {
64 using LoopT = Loop;
65 using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
66 using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
68 using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
73 static Function &getFunction(Function &F) { return F; }
74 static const BasicBlock *getEntryBB(const Function *F) {
75 return &F->getEntryBlock();
76 }
78 static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
79};
80
81} // end namespace afdo_detail
82
83extern cl::opt<bool> SampleProfileUseProfi;
84
85template <typename BT> class SampleProfileLoaderBaseImpl {
86public:
87 SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName,
89 : Filename(Name), RemappingFilename(RemapName), FS(std::move(FS)) {}
90 void dump() { Reader->dump(); }
91
111
115 using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
119
120protected:
123
126 }
129 }
132 }
135 }
136
137 unsigned getFunctionLoc(FunctionT &Func);
143 virtual const FunctionSamples *
151 ArrayRef<BasicBlockT *> Descendants,
152 PostDominatorTreeT *DomTree);
155 BlockWeightMap &SampleBlockWeights,
157 uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
159 bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
160 void clearFunctionData(bool ResetDT = true);
162 bool
164 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
166 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
167 void
169 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
171
172 /// Map basic blocks to their computed weights.
173 ///
174 /// The weight of a basic block is defined to be the maximum
175 /// of all the instruction weights in that block.
177
178 /// Map edges to their computed weights.
179 ///
180 /// Edge weights are computed by propagating basic block weights in
181 /// SampleProfile::propagateWeights.
183
184 /// Set of visited blocks during propagation.
186
187 /// Set of visited edges during propagation.
189
190 /// Equivalence classes for block weights.
191 ///
192 /// Two blocks BB1 and BB2 are in the same equivalence class if they
193 /// dominate and post-dominate each other, and they are in the same loop
194 /// nest. When this happens, the two blocks are guaranteed to execute
195 /// the same number of times.
197
198 /// Dominance, post-dominance and loop information.
202
203 /// Predecessors for each basic block in the CFG.
205
206 /// Successors for each basic block in the CFG.
208
209 /// Profile coverage tracker.
211
212 /// Profile reader object.
213 std::unique_ptr<SampleProfileReader> Reader;
214
215 /// Samples collected for the body of this function.
217
218 /// Name of the profile file to load.
219 std::string Filename;
220
221 /// Name of the profile remapping file to load.
222 std::string RemappingFilename;
223
224 /// VirtualFileSystem to load profile files from.
226
227 /// Profile Summary Info computed from sample profile.
229
230 /// Optimization Remark Emitter used to emit diagnostic remarks.
232};
233
234/// Clear all the per-function data used to load samples and propagate weights.
235template <typename BT>
237 BlockWeights.clear();
238 EdgeWeights.clear();
239 VisitedBlocks.clear();
240 VisitedEdges.clear();
241 EquivalenceClass.clear();
242 if (ResetDT) {
243 DT = nullptr;
244 PDT = nullptr;
245 LI = nullptr;
246 }
247 Predecessors.clear();
248 Successors.clear();
249 CoverageTracker.clear();
250}
251
252#ifndef NDEBUG
253/// Print the weight of edge \p E on stream \p OS.
254///
255/// \param OS Stream to emit the output to.
256/// \param E Edge to print.
257template <typename BT>
259 OS << "weight[" << E.first->getName() << "->" << E.second->getName()
260 << "]: " << EdgeWeights[E] << "\n";
261}
262
263/// Print the equivalence class of block \p BB on stream \p OS.
264///
265/// \param OS Stream to emit the output to.
266/// \param BB Block to print.
267template <typename BT>
269 raw_ostream &OS, const BasicBlockT *BB) {
270 const BasicBlockT *Equiv = EquivalenceClass[BB];
271 OS << "equivalence[" << BB->getName()
272 << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
273}
274
275/// Print the weight of block \p BB on stream \p OS.
276///
277/// \param OS Stream to emit the output to.
278/// \param BB Block to print.
279template <typename BT>
281 raw_ostream &OS, const BasicBlockT *BB) const {
282 const auto &I = BlockWeights.find(BB);
283 uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
284 OS << "weight[" << BB->getName() << "]: " << W << "\n";
285}
286#endif
287
288/// Get the weight for an instruction.
289///
290/// The "weight" of an instruction \p Inst is the number of samples
291/// collected on that instruction at runtime. To retrieve it, we
292/// need to compute the line number of \p Inst relative to the start of its
293/// function. We use HeaderLineno to compute the offset. We then
294/// look up the samples collected for \p Inst using BodySamples.
295///
296/// \param Inst Instruction to query.
297///
298/// \returns the weight of \p Inst.
299template <typename BT>
302 return getInstWeightImpl(Inst);
303}
304
305template <typename BT>
308 const FunctionSamples *FS = findFunctionSamples(Inst);
309 if (!FS)
310 return std::error_code();
311
312 const DebugLoc &DLoc = Inst.getDebugLoc();
313 if (!DLoc)
314 return std::error_code();
315
316 const DILocation *DIL = DLoc;
317 uint32_t LineOffset = FunctionSamples::getOffset(DIL);
318 uint32_t Discriminator;
320 Discriminator = DIL->getDiscriminator();
321 else
322 Discriminator = DIL->getBaseDiscriminator();
323
324 ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
325 if (R) {
326 bool FirstMark =
327 CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
328 if (FirstMark) {
329 ORE->emit([&]() {
330 OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
331 Remark << "Applied " << ore::NV("NumSamples", *R);
332 Remark << " samples from profile (offset: ";
333 Remark << ore::NV("LineOffset", LineOffset);
334 if (Discriminator) {
335 Remark << ".";
336 Remark << ore::NV("Discriminator", Discriminator);
337 }
338 Remark << ")";
339 return Remark;
340 });
341 }
342 LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." << Discriminator << ":"
343 << Inst << " (line offset: " << LineOffset << "."
344 << Discriminator << " - weight: " << R.get() << ")\n");
345 }
346 return R;
347}
348
349/// Compute the weight of a basic block.
350///
351/// The weight of basic block \p BB is the maximum weight of all the
352/// instructions in BB.
353///
354/// \param BB The basic block to query.
355///
356/// \returns the weight for \p BB.
357template <typename BT>
360 uint64_t Max = 0;
361 bool HasWeight = false;
362 for (auto &I : *BB) {
363 const ErrorOr<uint64_t> &R = getInstWeight(I);
364 if (R) {
365 Max = std::max(Max, R.get());
366 HasWeight = true;
367 }
368 }
369 return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
370}
371
372/// Compute and store the weights of every basic block.
373///
374/// This populates the BlockWeights map by computing
375/// the weights of every basic block in the CFG.
376///
377/// \param F The function to query.
378template <typename BT>
380 bool Changed = false;
381 LLVM_DEBUG(dbgs() << "Block weights\n");
382 for (const auto &BB : F) {
383 ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
384 if (Weight) {
385 BlockWeights[&BB] = Weight.get();
386 VisitedBlocks.insert(&BB);
387 Changed = true;
388 }
389 LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
390 }
391
392 return Changed;
393}
394
395/// Get the FunctionSamples for an instruction.
396///
397/// The FunctionSamples of an instruction \p Inst is the inlined instance
398/// in which that instruction is coming from. We traverse the inline stack
399/// of that instruction, and match it with the tree nodes in the profile.
400///
401/// \param Inst Instruction to query.
402///
403/// \returns the FunctionSamples pointer to the inlined instance.
404template <typename BT>
406 const InstructionT &Inst) const {
407 const DILocation *DIL = Inst.getDebugLoc();
408 if (!DIL)
409 return Samples;
410
411 auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
412 if (it.second) {
413 it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
414 }
415 return it.first->second;
416}
417
418/// Find equivalence classes for the given block.
419///
420/// This finds all the blocks that are guaranteed to execute the same
421/// number of times as \p BB1. To do this, it traverses all the
422/// descendants of \p BB1 in the dominator or post-dominator tree.
423///
424/// A block BB2 will be in the same equivalence class as \p BB1 if
425/// the following holds:
426///
427/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
428/// is a descendant of \p BB1 in the dominator tree, then BB2 should
429/// dominate BB1 in the post-dominator tree.
430///
431/// 2- Both BB2 and \p BB1 must be in the same loop.
432///
433/// For every block BB2 that meets those two requirements, we set BB2's
434/// equivalence class to \p BB1.
435///
436/// \param BB1 Block to check.
437/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
438/// \param DomTree Opposite dominator tree. If \p Descendants is filled
439/// with blocks from \p BB1's dominator tree, then
440/// this is the post-dominator tree, and vice versa.
441template <typename BT>
443 BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
444 PostDominatorTreeT *DomTree) {
445 const BasicBlockT *EC = EquivalenceClass[BB1];
446 uint64_t Weight = BlockWeights[EC];
447 for (const auto *BB2 : Descendants) {
448 bool IsDomParent = DomTree->dominates(BB2, BB1);
449 bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
450 if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
451 EquivalenceClass[BB2] = EC;
452 // If BB2 is visited, then the entire EC should be marked as visited.
453 if (VisitedBlocks.count(BB2)) {
454 VisitedBlocks.insert(EC);
455 }
456
457 // If BB2 is heavier than BB1, make BB2 have the same weight
458 // as BB1.
459 //
460 // Note that we don't worry about the opposite situation here
461 // (when BB2 is lighter than BB1). We will deal with this
462 // during the propagation phase. Right now, we just want to
463 // make sure that BB1 has the largest weight of all the
464 // members of its equivalence set.
465 Weight = std::max(Weight, BlockWeights[BB2]);
466 }
467 }
468 const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
469 if (EC == EntryBB) {
470 BlockWeights[EC] = Samples->getHeadSamples() + 1;
471 } else {
472 BlockWeights[EC] = Weight;
473 }
474}
475
476/// Find equivalence classes.
477///
478/// Since samples may be missing from blocks, we can fill in the gaps by setting
479/// the weights of all the blocks in the same equivalence class to the same
480/// weight. To compute the concept of equivalence, we use dominance and loop
481/// information. Two blocks B1 and B2 are in the same equivalence class if B1
482/// dominates B2, B2 post-dominates B1 and both are in the same loop.
483///
484/// \param F The function to query.
485template <typename BT>
488 LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
489 // Find equivalence sets based on dominance and post-dominance information.
490 for (auto &BB : F) {
491 BasicBlockT *BB1 = &BB;
492
493 // Compute BB1's equivalence class once.
494 if (EquivalenceClass.count(BB1)) {
495 LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
496 continue;
497 }
498
499 // By default, blocks are in their own equivalence class.
500 EquivalenceClass[BB1] = BB1;
501
502 // Traverse all the blocks dominated by BB1. We are looking for
503 // every basic block BB2 such that:
504 //
505 // 1- BB1 dominates BB2.
506 // 2- BB2 post-dominates BB1.
507 // 3- BB1 and BB2 are in the same loop nest.
508 //
509 // If all those conditions hold, it means that BB2 is executed
510 // as many times as BB1, so they are placed in the same equivalence
511 // class by making BB2's equivalence class be BB1.
512 DominatedBBs.clear();
513 DT->getDescendants(BB1, DominatedBBs);
514 findEquivalencesFor(BB1, DominatedBBs, &*PDT);
515
516 LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
517 }
518
519 // Assign weights to equivalence classes.
520 //
521 // All the basic blocks in the same equivalence class will execute
522 // the same number of times. Since we know that the head block in
523 // each equivalence class has the largest weight, assign that weight
524 // to all the blocks in that equivalence class.
526 dbgs() << "\nAssign the same weight to all blocks in the same class\n");
527 for (auto &BI : F) {
528 const BasicBlockT *BB = &BI;
529 const BasicBlockT *EquivBB = EquivalenceClass[BB];
530 if (BB != EquivBB)
531 BlockWeights[BB] = BlockWeights[EquivBB];
532 LLVM_DEBUG(printBlockWeight(dbgs(), BB));
533 }
534}
535
536/// Visit the given edge to decide if it has a valid weight.
537///
538/// If \p E has not been visited before, we copy to \p UnknownEdge
539/// and increment the count of unknown edges.
540///
541/// \param E Edge to visit.
542/// \param NumUnknownEdges Current number of unknown edges.
543/// \param UnknownEdge Set if E has not been visited before.
544///
545/// \returns E's weight, if known. Otherwise, return 0.
546template <typename BT>
548 unsigned *NumUnknownEdges,
549 Edge *UnknownEdge) {
550 if (!VisitedEdges.count(E)) {
551 (*NumUnknownEdges)++;
552 *UnknownEdge = E;
553 return 0;
554 }
555
556 return EdgeWeights[E];
557}
558
559/// Propagate weights through incoming/outgoing edges.
560///
561/// If the weight of a basic block is known, and there is only one edge
562/// with an unknown weight, we can calculate the weight of that edge.
563///
564/// Similarly, if all the edges have a known count, we can calculate the
565/// count of the basic block, if needed.
566///
567/// \param F Function to process.
568/// \param UpdateBlockCount Whether we should update basic block counts that
569/// has already been annotated.
570///
571/// \returns True if new weights were assigned to edges or blocks.
572template <typename BT>
574 FunctionT &F, bool UpdateBlockCount) {
575 bool Changed = false;
576 LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
577 for (const auto &BI : F) {
578 const BasicBlockT *BB = &BI;
579 const BasicBlockT *EC = EquivalenceClass[BB];
580
581 // Visit all the predecessor and successor edges to determine
582 // which ones have a weight assigned already. Note that it doesn't
583 // matter that we only keep track of a single unknown edge. The
584 // only case we are interested in handling is when only a single
585 // edge is unknown (see setEdgeOrBlockWeight).
586 for (unsigned i = 0; i < 2; i++) {
587 uint64_t TotalWeight = 0;
588 unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
589 Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
590
591 if (i == 0) {
592 // First, visit all predecessor edges.
593 NumTotalEdges = Predecessors[BB].size();
594 for (auto *Pred : Predecessors[BB]) {
595 Edge E = std::make_pair(Pred, BB);
596 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
597 if (E.first == E.second)
598 SelfReferentialEdge = E;
599 }
600 if (NumTotalEdges == 1) {
601 SingleEdge = std::make_pair(Predecessors[BB][0], BB);
602 }
603 } else {
604 // On the second round, visit all successor edges.
605 NumTotalEdges = Successors[BB].size();
606 for (auto *Succ : Successors[BB]) {
607 Edge E = std::make_pair(BB, Succ);
608 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
609 }
610 if (NumTotalEdges == 1) {
611 SingleEdge = std::make_pair(BB, Successors[BB][0]);
612 }
613 }
614
615 // After visiting all the edges, there are three cases that we
616 // can handle immediately:
617 //
618 // - All the edge weights are known (i.e., NumUnknownEdges == 0).
619 // In this case, we simply check that the sum of all the edges
620 // is the same as BB's weight. If not, we change BB's weight
621 // to match. Additionally, if BB had not been visited before,
622 // we mark it visited.
623 //
624 // - Only one edge is unknown and BB has already been visited.
625 // In this case, we can compute the weight of the edge by
626 // subtracting the total block weight from all the known
627 // edge weights. If the edges weight more than BB, then the
628 // edge of the last remaining edge is set to zero.
629 //
630 // - There exists a self-referential edge and the weight of BB is
631 // known. In this case, this edge can be based on BB's weight.
632 // We add up all the other known edges and set the weight on
633 // the self-referential edge as we did in the previous case.
634 //
635 // In any other case, we must continue iterating. Eventually,
636 // all edges will get a weight, or iteration will stop when
637 // it reaches SampleProfileMaxPropagateIterations.
638 if (NumUnknownEdges <= 1) {
639 uint64_t &BBWeight = BlockWeights[EC];
640 if (NumUnknownEdges == 0) {
641 if (!VisitedBlocks.count(EC)) {
642 // If we already know the weight of all edges, the weight of the
643 // basic block can be computed. It should be no larger than the sum
644 // of all edge weights.
645 if (TotalWeight > BBWeight) {
646 BBWeight = TotalWeight;
647 Changed = true;
648 LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
649 << " known. Set weight for block: ";
650 printBlockWeight(dbgs(), BB););
651 }
652 } else if (NumTotalEdges == 1 &&
653 EdgeWeights[SingleEdge] < BlockWeights[EC]) {
654 // If there is only one edge for the visited basic block, use the
655 // block weight to adjust edge weight if edge weight is smaller.
656 EdgeWeights[SingleEdge] = BlockWeights[EC];
657 Changed = true;
658 }
659 } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
660 // If there is a single unknown edge and the block has been
661 // visited, then we can compute E's weight.
662 if (BBWeight >= TotalWeight)
663 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
664 else
665 EdgeWeights[UnknownEdge] = 0;
666 const BasicBlockT *OtherEC;
667 if (i == 0)
668 OtherEC = EquivalenceClass[UnknownEdge.first];
669 else
670 OtherEC = EquivalenceClass[UnknownEdge.second];
671 // Edge weights should never exceed the BB weights it connects.
672 if (VisitedBlocks.count(OtherEC) &&
673 EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
674 EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
675 VisitedEdges.insert(UnknownEdge);
676 Changed = true;
677 LLVM_DEBUG(dbgs() << "Set weight for edge: ";
678 printEdgeWeight(dbgs(), UnknownEdge));
679 }
680 } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
681 // If a block Weights 0, all its in/out edges should weight 0.
682 if (i == 0) {
683 for (auto *Pred : Predecessors[BB]) {
684 Edge E = std::make_pair(Pred, BB);
685 EdgeWeights[E] = 0;
686 VisitedEdges.insert(E);
687 }
688 } else {
689 for (auto *Succ : Successors[BB]) {
690 Edge E = std::make_pair(BB, Succ);
691 EdgeWeights[E] = 0;
692 VisitedEdges.insert(E);
693 }
694 }
695 } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
696 uint64_t &BBWeight = BlockWeights[BB];
697 // We have a self-referential edge and the weight of BB is known.
698 if (BBWeight >= TotalWeight)
699 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
700 else
701 EdgeWeights[SelfReferentialEdge] = 0;
702 VisitedEdges.insert(SelfReferentialEdge);
703 Changed = true;
704 LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
705 printEdgeWeight(dbgs(), SelfReferentialEdge));
706 }
707 if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
708 BlockWeights[EC] = TotalWeight;
709 VisitedBlocks.insert(EC);
710 Changed = true;
711 }
712 }
713 }
714
715 return Changed;
716}
717
718/// Build in/out edge lists for each basic block in the CFG.
719///
720/// We are interested in unique edges. If a block B1 has multiple
721/// edges to another block B2, we only add a single B1->B2 edge.
722template <typename BT>
724 for (auto &BI : F) {
725 BasicBlockT *B1 = &BI;
726
727 // Add predecessors for B1.
729 if (!Predecessors[B1].empty())
730 llvm_unreachable("Found a stale predecessors list in a basic block.");
731 for (auto *B2 : getPredecessors(B1))
732 if (Visited.insert(B2).second)
733 Predecessors[B1].push_back(B2);
734
735 // Add successors for B1.
736 Visited.clear();
737 if (!Successors[B1].empty())
738 llvm_unreachable("Found a stale successors list in a basic block.");
739 for (auto *B2 : getSuccessors(B1))
740 if (Visited.insert(B2).second)
741 Successors[B1].push_back(B2);
742 }
743}
744
745/// Propagate weights into edges
746///
747/// The following rules are applied to every block BB in the CFG:
748///
749/// - If BB has a single predecessor/successor, then the weight
750/// of that edge is the weight of the block.
751///
752/// - If all incoming or outgoing edges are known except one, and the
753/// weight of the block is already known, the weight of the unknown
754/// edge will be the weight of the block minus the sum of all the known
755/// edges. If the sum of all the known edges is larger than BB's weight,
756/// we set the unknown edge weight to zero.
757///
758/// - If there is a self-referential edge, and the weight of the block is
759/// known, the weight for that edge is set to the weight of the block
760/// minus the weight of the other incoming edges to that block (if
761/// known).
762template <typename BT>
764 // Flow-based profile inference is only usable with BasicBlock instantiation
765 // of SampleProfileLoaderBaseImpl.
767 // Prepare block sample counts for inference.
768 BlockWeightMap SampleBlockWeights;
769 for (const auto &BI : F) {
770 ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
771 if (Weight)
772 SampleBlockWeights[&BI] = Weight.get();
773 }
774 // Fill in BlockWeights and EdgeWeights using an inference algorithm.
775 applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
776 } else {
777 bool Changed = true;
778 unsigned I = 0;
779
780 // If BB weight is larger than its corresponding loop's header BB weight,
781 // use the BB weight to replace the loop header BB weight.
782 for (auto &BI : F) {
783 BasicBlockT *BB = &BI;
784 LoopT *L = LI->getLoopFor(BB);
785 if (!L) {
786 continue;
787 }
788 BasicBlockT *Header = L->getHeader();
789 if (Header && BlockWeights[BB] > BlockWeights[Header]) {
790 BlockWeights[Header] = BlockWeights[BB];
791 }
792 }
793
794 // Propagate until we converge or we go past the iteration limit.
795 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
796 Changed = propagateThroughEdges(F, false);
797 }
798
799 // The first propagation propagates BB counts from annotated BBs to unknown
800 // BBs. The 2nd propagation pass resets edges weights, and use all BB
801 // weights to propagate edge weights.
802 VisitedEdges.clear();
803 Changed = true;
804 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
805 Changed = propagateThroughEdges(F, false);
806 }
807
808 // The 3rd propagation pass allows adjust annotated BB weights that are
809 // obviously wrong.
810 Changed = true;
811 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
812 Changed = propagateThroughEdges(F, true);
813 }
814 }
815}
816
817template <typename BT>
819 FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
820 BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
821 auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
822 Infer.apply(BlockWeights, EdgeWeights);
823}
824
825/// Generate branch weight metadata for all branches in \p F.
826///
827/// Branch weights are computed out of instruction samples using a
828/// propagation heuristic. Propagation proceeds in 3 phases:
829///
830/// 1- Assignment of block weights. All the basic blocks in the function
831/// are initial assigned the same weight as their most frequently
832/// executed instruction.
833///
834/// 2- Creation of equivalence classes. Since samples may be missing from
835/// blocks, we can fill in the gaps by setting the weights of all the
836/// blocks in the same equivalence class to the same weight. To compute
837/// the concept of equivalence, we use dominance and loop information.
838/// Two blocks B1 and B2 are in the same equivalence class if B1
839/// dominates B2, B2 post-dominates B1 and both are in the same loop.
840///
841/// 3- Propagation of block weights into edges. This uses a simple
842/// propagation heuristic. The following rules are applied to every
843/// block BB in the CFG:
844///
845/// - If BB has a single predecessor/successor, then the weight
846/// of that edge is the weight of the block.
847///
848/// - If all the edges are known except one, and the weight of the
849/// block is already known, the weight of the unknown edge will
850/// be the weight of the block minus the sum of all the known
851/// edges. If the sum of all the known edges is larger than BB's weight,
852/// we set the unknown edge weight to zero.
853///
854/// - If there is a self-referential edge, and the weight of the block is
855/// known, the weight for that edge is set to the weight of the block
856/// minus the weight of the other incoming edges to that block (if
857/// known).
858///
859/// Since this propagation is not guaranteed to finalize for every CFG, we
860/// only allow it to proceed for a limited number of iterations (controlled
861/// by -sample-profile-max-propagate-iterations).
862///
863/// FIXME: Try to replace this propagation heuristic with a scheme
864/// that is guaranteed to finalize. A work-list approach similar to
865/// the standard value propagation algorithm used by SSA-CCP might
866/// work here.
867///
868/// \param F The function to query.
869///
870/// \returns true if \p F was modified. Returns false, otherwise.
871template <typename BT>
873 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
874 bool Changed = (InlinedGUIDs.size() != 0);
875
876 // Compute basic block weights.
877 Changed |= computeBlockWeights(F);
878
879 if (Changed) {
880 // Initialize propagation.
881 initWeightPropagation(F, InlinedGUIDs);
882
883 // Propagate weights to all edges.
884 propagateWeights(F);
885
886 // Post-process propagated weights.
887 finalizeWeightPropagation(F, InlinedGUIDs);
888 }
889
890 return Changed;
891}
892
893template <typename BT>
895 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
896 // Add an entry count to the function using the samples gathered at the
897 // function entry.
898 // Sets the GUIDs that are inlined in the profiled binary. This is used
899 // for ThinLink to make correct liveness analysis, and also make the IR
900 // match the profiled binary before annotation.
902 ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
903 &InlinedGUIDs);
904
906 // Compute dominance and loop info needed for propagation.
907 computeDominanceAndLoopInfo(F);
908
909 // Find equivalence classes.
910 findEquivalenceClasses(F);
911 }
912
913 // Before propagation starts, build, for each block, a list of
914 // unique predecessors and successors. This is necessary to handle
915 // identical edges in multiway branches. Since we visit all blocks and all
916 // edges of the CFG, it is cleaner to build these lists once at the start
917 // of the pass.
918 buildEdges(F);
919}
920
921template <typename BT>
923 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
924 // If we utilize a flow-based count inference, then we trust the computed
925 // counts and set the entry count as computed by the algorithm. This is
926 // primarily done to sync the counts produced by profi and BFI inference,
927 // which uses the entry count for mass propagation.
928 // If profi produces a zero-value for the entry count, we fallback to
929 // Samples->getHeadSamples() + 1 to avoid functions with zero count.
931 const BasicBlockT *EntryBB = getEntryBB(&F);
932 ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
933 if (BlockWeights[EntryBB] > 0) {
935 ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
936 &InlinedGUIDs);
937 }
938 }
939}
940
941template <typename BT>
943 // If coverage checking was requested, compute it now.
944 const Function &Func = getFunction(F);
946 unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
947 unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
948 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
949 if (Coverage < SampleProfileRecordCoverage) {
950 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
951 Func.getSubprogram()->getFilename(), getFunctionLoc(F),
952 Twine(Used) + " of " + Twine(Total) + " available profile records (" +
953 Twine(Coverage) + "%) were applied",
954 DS_Warning));
955 }
956 }
957
959 uint64_t Used = CoverageTracker.getTotalUsedSamples();
960 uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
961 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
962 if (Coverage < SampleProfileSampleCoverage) {
963 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
964 Func.getSubprogram()->getFilename(), getFunctionLoc(F),
965 Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
966 Twine(Coverage) + "%) were applied",
967 DS_Warning));
968 }
969 }
970}
971
972/// Get the line number for the function header.
973///
974/// This looks up function \p F in the current compilation unit and
975/// retrieves the line number where the function is defined. This is
976/// line 0 for all the samples read from the profile file. Every line
977/// number is relative to this line.
978///
979/// \param F Function object to query.
980///
981/// \returns the line number where \p F is defined. If it returns 0,
982/// it means that there is no debug information available for \p F.
983template <typename BT>
985 const Function &Func = getFunction(F);
986 if (DISubprogram *S = Func.getSubprogram())
987 return S->getLine();
988
990 return 0;
991
992 // If the start of \p F is missing, emit a diagnostic to inform the user
993 // about the missed opportunity.
994 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
995 "No debug information found in function " + Func.getName() +
996 ": Function profile not used",
997 DS_Warning));
998 return 0;
999}
1000
1001template <typename BT>
1003 FunctionT &F) {
1004 DT.reset(new DominatorTree);
1005 DT->recalculate(F);
1006
1007 PDT.reset(new PostDominatorTree(F));
1008
1009 LI.reset(new LoopInfo);
1010 LI->analyze(*DT);
1011}
1012
1013#undef DEBUG_TYPE
1014
1015} // namespace llvm
1016#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the RefCountedBase, ThreadSafeRefCountedBase, and IntrusiveRefCntPtr classes.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
static StringRef getName(Value *V)
raw_pwrite_stream & OS
This file provides the interface for the profile inference algorithm, profi.
This file provides the utility functions for the sampled PGO loader base implementation.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Debug location.
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:150
Class to represent profile counts.
Definition: Function.h:252
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:2063
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis providing profile information.
Sample profile inference pass.
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
typename afdo_detail::IRTraits< BT >::BlockFrequencyInfoT BlockFrequencyInfoT
std::string Filename
Name of the profile file to load.
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
const BasicBlockT * getEntryBB(const FunctionT *F)
typename afdo_detail::IRTraits< BT >::OptRemarkEmitterT OptRemarkEmitterT
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
ProfileSummaryInfo * PSI
Profile Summary Info computed from sample profile.
typename afdo_detail::IRTraits< BT >::PostDominatorTreeT PostDominatorTreeT
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
IntrusiveRefCntPtr< vfs::FileSystem > FS
VirtualFileSystem to load profile files from.
typename afdo_detail::IRTraits< BT >::DominatorTreePtrT DominatorTreePtrT
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
typename afdo_detail::IRTraits< BT >::PredRangeT PredRangeT
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
typename afdo_detail::IRTraits< BT >::PostDominatorTreePtrT PostDominatorTreePtrT
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
std::string RemappingFilename
Name of the profile remapping file to load.
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
typename afdo_detail::IRTraits< BT >::FunctionT FunctionT
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
typename afdo_detail::IRTraits< BT >::InstructionT InstructionT
BlockEdgeMap Successors
Successors for each basic block in the CFG.
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
typename afdo_detail::IRTraits< BT >::SuccRangeT SuccRangeT
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
typename afdo_detail::IRTraits< BT >::OptRemarkAnalysisT OptRemarkAnalysisT
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
FunctionSamples * Samples
Samples collected for the body of this function.
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
typename afdo_detail::IRTraits< BT >::LoopT LoopT
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
typename afdo_detail::IRTraits< BT >::BasicBlockT BasicBlockT
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
typename afdo_detail::IRTraits< BT >::LoopInfoPtrT LoopInfoPtrT
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, IntrusiveRefCntPtr< vfs::FileSystem > FS)
void propagateWeights(FunctionT &F)
Propagate weights into edges.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
size_type size() const
Definition: DenseSet.h:81
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Representation of the samples collected for a function.
Definition: SampleProf.h:726
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:216
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
cl::opt< unsigned > SampleProfileSampleCoverage
cl::opt< unsigned > SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileMaxPropagateIterations
cl::opt< bool > SampleProfileUseProfi
cl::opt< bool > EnableFSDiscriminator
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< succ_iterator > succ_range
Definition: CFG.h:244
cl::opt< bool > NoWarnSampleUnused
iterator_range< pred_iterator > pred_range
Definition: CFG.h:107
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1946
@ DS_Warning
auto predecessors(const MachineBasicBlock *BB)
Definition: BitVector.h:858
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
static pred_range getPredecessors(BasicBlock *BB)
static succ_range getSuccessors(BasicBlock *BB)
std::unique_ptr< DominatorTree > DominatorTreePtrT
static const BasicBlock * getEntryBB(const Function *F)