LLVM 23.0.0git
CtxProfAnalysis.cpp
Go to the documentation of this file.
1//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the contextual profile analysis, which maintains contextual
10// profiling info through IPO passes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Analysis/CFG.h"
18#include "llvm/IR/Analysis.h"
19#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/PassManager.h"
26#include "llvm/Support/Path.h"
27#include <deque>
28#include <memory>
29
30#define DEBUG_TYPE "ctx_prof"
31
32using namespace llvm;
33
34namespace llvm {
35
37 UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
38 cl::desc("Use the specified contextual profile file"));
39
41 "ctx-profile-printer-level",
44 "everything", "print everything - most verbose"),
46 "just the yaml representation of the profile")),
47 cl::desc("Verbosity level of the contextual profile printer pass."));
48
50 "ctx-profile-force-is-specialized", cl::init(false),
51 cl::desc("Treat the given module as-if it were containing the "
52 "post-thinlink module containing the root"));
53
54const char *AssignGUIDPass::GUIDMetadataName = "guid";
55
57 friend class ProfileAnnotator;
58 class BBInfo;
59 struct EdgeInfo {
60 BBInfo *const Src;
61 BBInfo *const Dest;
62 std::optional<uint64_t> Count;
63
64 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
65 };
66
67 class BBInfo {
68 std::optional<uint64_t> Count;
69 // OutEdges is dimensioned to match the number of terminator operands.
70 // Entries in the vector match the index in the terminator operand list. In
71 // some cases - see `shouldExcludeEdge` and its implementation - an entry
72 // will be nullptr.
73 // InEdges doesn't have the above constraint.
76 size_t UnknownCountOutEdges = 0;
77 size_t UnknownCountInEdges = 0;
78
79 // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
80 // because all the edge counters must be known.
81 // Return std::nullopt if there were no edges to sum. The user can decide
82 // how to interpret that.
83 std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
84 bool AssumeAllKnown) const {
85 std::optional<uint64_t> Sum;
86 for (const auto *E : Edges) {
87 // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
88 if (E) {
89 if (!Sum.has_value())
90 Sum = 0;
91 *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
92 }
93 }
94 return Sum;
95 }
96
97 bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
98 assert(!Count.has_value());
99 Count = getEdgeSum(Edges, true);
100 return Count.has_value();
101 }
102
103 void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
104 uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
105 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
106 EdgeInfo *E = nullptr;
107 for (auto *I : Edges)
108 if (I && !I->Count.has_value()) {
109 E = I;
110#ifdef NDEBUG
111 break;
112#else
113 assert((!E || E == I) &&
114 "Expected exactly one edge to have an unknown count, "
115 "found a second one");
116 continue;
117#endif
118 }
119 assert(E && "Expected exactly one edge to have an unknown count");
120 assert(!E->Count.has_value());
121 E->Count = EdgeVal;
122 assert(E->Src->UnknownCountOutEdges > 0);
123 assert(E->Dest->UnknownCountInEdges > 0);
124 --E->Src->UnknownCountOutEdges;
125 --E->Dest->UnknownCountInEdges;
126 }
127
128 public:
129 BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
130 : Count(Count) {
131 // For in edges, we just want to pre-allocate enough space, since we know
132 // it at this stage. For out edges, we will insert edges at the indices
133 // corresponding to positions in this BB's terminator instruction, so we
134 // construct a default (nullptr values)-initialized vector. A nullptr edge
135 // corresponds to those that are excluded (see shouldExcludeEdge).
136 InEdges.reserve(NumInEdges);
137 OutEdges.resize(NumOutEdges);
138 }
139
140 bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
141 if (!UnknownCountOutEdges) {
142 return computeCountFrom(OutEdges);
143 }
144 return false;
145 }
146
147 bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
148 if (!UnknownCountInEdges) {
149 return computeCountFrom(InEdges);
150 }
151 return false;
152 }
153
154 void addInEdge(EdgeInfo &Info) {
155 InEdges.push_back(&Info);
156 ++UnknownCountInEdges;
157 }
158
159 // For the out edges, we care about the position we place them in, which is
160 // the position in terminator instruction's list (at construction). Later,
161 // we build branch_weights metadata with edge frequency values matching
162 // these positions.
163 void addOutEdge(size_t Index, EdgeInfo &Info) {
164 OutEdges[Index] = &Info;
165 ++UnknownCountOutEdges;
166 }
167
168 bool hasCount() const { return Count.has_value(); }
169
170 uint64_t getCount() const { return *Count; }
171
172 bool trySetSingleUnknownInEdgeCount() {
173 if (UnknownCountInEdges == 1) {
174 setSingleUnknownEdgeCount(InEdges);
175 return true;
176 }
177 return false;
178 }
179
180 bool trySetSingleUnknownOutEdgeCount() {
181 if (UnknownCountOutEdges == 1) {
182 setSingleUnknownEdgeCount(OutEdges);
183 return true;
184 }
185 return false;
186 }
187 size_t getNumOutEdges() const { return OutEdges.size(); }
188
189 uint64_t getEdgeCount(size_t Index) const {
190 if (auto *E = OutEdges[Index])
191 return *E->Count;
192 return 0U;
193 }
194 };
195
196 const Function &F;
197 ArrayRef<uint64_t> Counters;
198 // To be accessed through getBBInfo() after construction.
199 std::map<const BasicBlock *, BBInfo> BBInfos;
200 std::vector<EdgeInfo> EdgeInfos;
201
202 // The only criteria for exclusion is faux suspend -> exit edges in presplit
203 // coroutines. The API serves for readability, currently.
204 bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
205 return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
206 }
207
208 BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
209
210 const BBInfo &getBBInfo(const BasicBlock &BB) const {
211 return BBInfos.find(&BB)->second;
212 }
213
214 // validation function after we propagate the counters: all BBs and edges'
215 // counters must have a value.
216 bool allCountersAreAssigned() const {
217 for (const auto &BBInfo : BBInfos)
218 if (!BBInfo.second.hasCount())
219 return false;
220 for (const auto &EdgeInfo : EdgeInfos)
221 if (!EdgeInfo.Count.has_value())
222 return false;
223 return true;
224 }
225
226 /// Check that all paths from the entry basic block that use edges with
227 /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
228 bool allTakenPathsExit() const {
229 std::deque<const BasicBlock *> Worklist;
230 DenseSet<const BasicBlock *> Visited;
231 Worklist.push_back(&F.getEntryBlock());
232 bool HitExit = false;
233 while (!Worklist.empty()) {
234 const auto *BB = Worklist.front();
235 Worklist.pop_front();
236 if (!Visited.insert(BB).second)
237 continue;
238 if (succ_size(BB) == 0) {
240 return false;
241 HitExit = true;
242 continue;
243 }
244 if (succ_size(BB) == 1) {
245 Worklist.push_back(BB->getUniqueSuccessor());
246 continue;
247 }
248 const auto &BBInfo = getBBInfo(*BB);
249 bool HasAWayOut = false;
250 for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
251 const auto *Succ = BB->getTerminator()->getSuccessor(I);
252 if (!shouldExcludeEdge(*BB, *Succ)) {
253 if (BBInfo.getEdgeCount(I) > 0) {
254 HasAWayOut = true;
255 Worklist.push_back(Succ);
256 }
257 }
258 }
259 if (!HasAWayOut)
260 return false;
261 }
262 return HitExit;
263 }
264
265 bool allNonColdSelectsHaveProfile() const {
266 for (const auto &BB : F) {
267 if (getBBInfo(BB).getCount() > 0) {
268 for (const auto &I : BB) {
269 if (const auto *SI = dyn_cast<SelectInst>(&I)) {
270 if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(
271 *const_cast<SelectInst *>(SI))) {
272 auto Index = Inst->getIndex()->getZExtValue();
273 assert(Index < Counters.size());
274 if (Counters[Index] == 0)
275 return false;
276 }
277 }
278 }
279 }
280 }
281 return true;
282 }
283
284 // This is an adaptation of PGOUseFunc::populateCounters.
285 // FIXME(mtrofin): look into factoring the code to share one implementation.
286 void propagateCounterValues() {
287 bool KeepGoing = true;
288 while (KeepGoing) {
289 KeepGoing = false;
290 for (const auto &BB : F) {
291 auto &Info = getBBInfo(BB);
292 if (!Info.hasCount())
293 KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
294 Info.tryTakeCountFromKnownInEdges(BB);
295 if (Info.hasCount()) {
296 KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
297 KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
298 }
299 }
300 }
301 assert(allCountersAreAssigned() &&
302 "[ctx-prof] Expected all counters have been assigned.");
303 assert(allTakenPathsExit() &&
304 "[ctx-prof] Encountered a BB with more than one successor, where "
305 "all outgoing edges have a 0 count. This occurs in non-exiting "
306 "functions (message pumps, usually) which are not supported in the "
307 "contextual profiling case");
308 assert(allNonColdSelectsHaveProfile() &&
309 "[ctx-prof] All non-cold select instructions were expected to have "
310 "a profile.");
311 }
312
313public:
315 : F(F), Counters(Counters) {
316 assert(!F.isDeclaration());
317 assert(!Counters.empty());
318 size_t NrEdges = 0;
319 for (const auto &BB : F) {
320 std::optional<uint64_t> Count;
322 const_cast<BasicBlock &>(BB))) {
323 auto Index = Ins->getIndex()->getZExtValue();
324 assert(Index < Counters.size() &&
325 "The index must be inside the counters vector by construction - "
326 "tripping this assertion indicates a bug in how the contextual "
327 "profile is managed by IPO transforms");
328 (void)Index;
329 Count = Counters[Ins->getIndex()->getZExtValue()];
330 } else if (isa<UnreachableInst>(BB.getTerminator())) {
331 // The program presumably didn't crash.
332 Count = 0;
333 }
334 auto [It, Ins] =
335 BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
336 (void)Ins;
337 assert(Ins && "We iterate through the function's BBs, no reason to "
338 "insert one more than once");
339 NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
340 return !shouldExcludeEdge(BB, *Succ);
341 });
342 }
343 // Pre-allocate the vector, we want references to its contents to be stable.
344 EdgeInfos.reserve(NrEdges);
345 for (const auto &BB : F) {
346 auto &Info = getBBInfo(BB);
347 for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
348 const auto *Succ = BB.getTerminator()->getSuccessor(I);
349 if (!shouldExcludeEdge(BB, *Succ)) {
350 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
351 Info.addOutEdge(I, EI);
352 getBBInfo(*Succ).addInEdge(EI);
353 }
354 }
355 }
356 assert(EdgeInfos.capacity() == NrEdges &&
357 "The capacity of EdgeInfos should have stayed unchanged it was "
358 "populated, because we need pointers to its contents to be stable");
359 propagateCounterValues();
360 }
361
362 uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }
363};
364
365} // namespace llvm
366
368 ArrayRef<uint64_t> RawCounters)
369 : PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}
370
372
374 return PImpl->getBBCount(BB);
375}
376
378 uint64_t &TrueCount,
379 uint64_t &FalseCount) const {
380 const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
381 TrueCount = FalseCount = 0;
382 if (BBInfo.getCount() == 0)
383 return false;
384
386 if (!Step)
387 return false;
388 auto Index = Step->getIndex()->getZExtValue();
389 assert(Index < PImpl->Counters.size() &&
390 "The index of the step instruction must be inside the "
391 "counters vector by "
392 "construction - tripping this assertion indicates a bug in "
393 "how the contextual profile is managed by IPO transforms");
394 auto TotalCount = BBInfo.getCount();
395 TrueCount = PImpl->Counters[Index];
396 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
397 return true;
398}
399
402 uint64_t &MaxCount) const {
403 Profile.clear();
404
405 if (succ_size(&BB) < 2)
406 return false;
407
408 auto *Term = BB.getTerminator();
409 Profile.resize(Term->getNumSuccessors());
410
411 const auto &BBInfo = PImpl->getBBInfo(BB);
412 MaxCount = 0;
413 for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
414 ++SuccIdx) {
415 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
416 if (EdgeCount > MaxCount)
417 MaxCount = EdgeCount;
418 Profile[SuccIdx] = EdgeCount;
419 }
420 return MaxCount > 0;
421}
422
424 for (auto &F : M.functions()) {
425 if (F.isDeclaration())
426 continue;
427 if (F.getMetadata(GUIDMetadataName))
428 continue;
429 const GlobalValue::GUID GUID = F.getGUID();
430 F.setMetadata(GUIDMetadataName,
431 MDNode::get(M.getContext(),
432 {ConstantAsMetadata::get(ConstantInt::get(
433 Type::getInt64Ty(M.getContext()), GUID))}));
434 }
436}
437
439 if (F.isDeclaration()) {
441 return F.getGUID();
442 }
443 auto *MD = F.getMetadata(GUIDMetadataName);
444 if (!MD)
445 reportFatalUsageError("this pass requires GUID metadata to be available.");
446 return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
447 ->getValue()
448 ->stripPointerCasts())
449 ->getZExtValue();
450}
452
453CtxProfAnalysis::CtxProfAnalysis(std::optional<StringRef> Profile)
454 : Profile([&]() -> std::optional<StringRef> {
455 if (Profile)
456 return *Profile;
457 if (UseCtxProfile.getNumOccurrences())
458 return UseCtxProfile;
459 return std::nullopt;
460 }()) {}
461
464 if (!Profile)
465 return {};
467 if (auto EC = MB.getError()) {
468 M.getContext().emitError("could not open contextual profile file: " +
469 EC.message());
470 return {};
471 }
472 PGOCtxProfileReader Reader(MB.get()->getBuffer());
473 auto MaybeProfiles = Reader.loadProfiles();
474 if (!MaybeProfiles) {
475 M.getContext().emitError("contextual profile file is invalid: " +
476 toString(MaybeProfiles.takeError()));
477 return {};
478 }
479
480 // FIXME: We should drive this from ThinLTO, but for the time being, use the
481 // module name as indicator.
482 // We want to *only* keep the contextual profiles in modules that capture
483 // context trees. That allows us to compute specific PSIs, for example.
484 auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {
485 DenseSet<GlobalValue::GUID> ProfileRootsInModule;
486 auto ModName = M.getName();
487 auto Filename = sys::path::filename(ModName);
488 // Drop the file extension.
489 Filename = Filename.substr(0, Filename.find_last_of('.'));
490 // See if it parses
491 APInt Guid;
492 // getAsInteger returns true if there are more chars to read other than the
493 // integer. So the "false" test is what we want.
494 if (!Filename.getAsInteger(0, Guid))
495 ProfileRootsInModule.insert(Guid.getZExtValue());
496 return ProfileRootsInModule;
497 };
498 const auto ProfileRootsInModule = DetermineRootsInModule();
500
501 // the logic from here on allows for modules that contain - by design - more
502 // than one root. We currently don't support that, because the determination
503 // happens based on the module name matching the root guid, but the logic can
504 // avoid assuming that.
505 if (!ProfileRootsInModule.empty()) {
506 Result.IsInSpecializedModule = true;
507 // Trim first the roots that aren't in this module.
508 for (auto &[RootGuid, _] :
509 llvm::make_early_inc_range(MaybeProfiles->Contexts))
510 if (!ProfileRootsInModule.contains(RootGuid))
511 MaybeProfiles->Contexts.erase(RootGuid);
512 // we can also drop the flat profiles
513 MaybeProfiles->FlatProfiles.clear();
514 }
515
516 for (const auto &F : M) {
517 if (F.isDeclaration())
518 continue;
519 auto GUID = AssignGUIDPass::getGUID(F);
520 assert(GUID && "guid not found for defined function");
521 const auto &Entry = F.begin();
522 uint32_t MaxCounters = 0; // we expect at least a counter.
523 for (const auto &I : *Entry)
524 if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
525 MaxCounters =
526 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
527 break;
528 }
529 if (!MaxCounters)
530 continue;
531 uint32_t MaxCallsites = 0;
532 for (const auto &BB : F)
533 for (const auto &I : BB)
534 if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
535 MaxCallsites =
536 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
537 break;
538 }
539 auto [It, Ins] = Result.FuncInfo.insert(
540 {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
541 (void)Ins;
542 assert(Ins);
543 It->second.NextCallsiteIndex = MaxCallsites;
544 It->second.NextCounterIndex = MaxCounters;
545 }
546 // If we made it this far, the Result is valid - which we mark by setting
547 // .Profiles.
548 Result.Profiles = std::move(*MaybeProfiles);
549 Result.initIndex();
550 return Result;
551}
552
554PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
555 if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
556 return It->first;
557 return 0;
558}
559
562
566 if (C.contexts().empty()) {
567 OS << "No contextual profile was provided.\n";
568 return PreservedAnalyses::all();
569 }
570
571 if (Mode == PrintMode::Everything) {
572 OS << "Function Info:\n";
573 for (const auto &[Guid, FuncInfo] : C.FuncInfo)
574 OS << Guid << " : " << FuncInfo.Name
575 << ". MaxCounterID: " << FuncInfo.NextCounterIndex
576 << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
577 }
578
579 if (Mode == PrintMode::Everything)
580 OS << "\nCurrent Profile:\n";
581 convertCtxProfToYaml(OS, C.profiles());
582 OS << "\n";
583 if (Mode == PrintMode::YAML)
584 return PreservedAnalyses::all();
585
586 OS << "\nFlat Profile:\n";
587 auto Flat = C.flatten();
588 for (const auto &[Guid, Counters] : Flat) {
589 OS << Guid << " : ";
590 for (auto V : Counters)
591 OS << V << " ";
592 OS << "\n";
593 }
594 return PreservedAnalyses::all();
595}
596
599 return nullptr;
600 for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
601 if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
602 return IPC;
603 assert(!isa<CallBase>(Prev) &&
604 "didn't expect to find another call, that's not the callsite "
605 "instrumentation, before an instrumentable callsite");
606 }
607 return nullptr;
608}
609
611 for (auto &I : BB)
612 if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))
614 return Incr;
615 return nullptr;
616}
617
620 Instruction *Prev = &SI;
621 while ((Prev = Prev->getPrevNode()))
622 if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
623 return Step;
624 return nullptr;
625}
626
627template <class ProfTy>
628static void preorderVisitOneRoot(ProfTy &Profile,
629 function_ref<void(ProfTy &)> Visitor) {
630 std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
631 Visitor(Ctx);
632 for (auto &[_, SubCtxSet] : Ctx.callsites())
633 for (auto &[__, Subctx] : SubCtxSet)
634 Traverser(Subctx);
635 };
636 Traverser(Profile);
637}
638
639template <class ProfilesTy, class ProfTy>
640static void preorderVisit(ProfilesTy &Profiles,
641 function_ref<void(ProfTy &)> Visitor) {
642 for (auto &[_, P] : Profiles)
644}
645
646void PGOContextualProfile::initIndex() {
647 // Initialize the head of the index list for each function. We don't need it
648 // after this point.
649 DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
650 for (auto &[Guid, FI] : FuncInfo)
651 InsertionPoints[Guid] = &FI.Index;
653 Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
654 auto InsertIt = InsertionPoints.find(Ctx.guid());
655 if (InsertIt == InsertionPoints.end())
656 return;
657 // Insert at the end of the list. Since we traverse in preorder, it
658 // means that when we iterate the list from the beginning, we'd
659 // encounter the contexts in the order we would have, should we have
660 // performed a full preorder traversal.
661 InsertIt->second->Next = &Ctx;
662 Ctx.Previous = InsertIt->second;
663 InsertIt->second = &Ctx;
664 });
665}
666
668 return ForceIsInSpecializedModule.getNumOccurrences() > 0
670 : IsInSpecializedModule;
671}
672
675 GlobalValue::GUID G = getDefinedFunctionGUID(F);
676 for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
677 Node = Node->Next)
678 V(*reinterpret_cast<PGOCtxProfContext *>(Node));
679}
680
682 if (!F)
684 const PGOCtxProfContext>(Profiles.Contexts, V);
686 GlobalValue::GUID G = getDefinedFunctionGUID(*F);
687 for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
688 Node = Node->Next)
689 V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
690}
691
694 auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
695 const SmallVectorImpl<uint64_t> &From,
696 uint64_t SamplingRate) {
697 if (Into.empty())
698 Into.resize(From.size());
699 assert(Into.size() == From.size() &&
700 "All contexts corresponding to a function should have the exact "
701 "same number of counters.");
702 for (size_t I = 0, E = Into.size(); I < E; ++I)
703 Into[I] += From[I] * SamplingRate;
704 };
705
706 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
707 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
709 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
710 Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
711 });
712
713 for (const auto &[G, Unh] : CtxRoot.getUnhandled())
714 Accummulate(Flat[G], Unh, SamplingFactor);
715 }
716 // We don't sample "Flat" currently, so sampling rate is 1.
717 for (const auto &[G, FC] : Profiles.FlatProfiles)
718 Accummulate(Flat[G], FC, /*SamplingRate=*/1);
719 return Flat;
720}
721
725 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
726 const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
728 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
729 auto &Targets = Ret[Ctx.guid()];
730 for (const auto &[ID, SubctxSet] : Ctx.callsites())
731 for (const auto &Subctx : SubctxSet)
732 Targets[ID][Subctx.first] +=
733 Subctx.second.getEntrycount() * TotalRootEntryCount;
734 });
735 }
736 return Ret;
737}
738
740 CallBase &IC, Result &Profile,
741 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
742 const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
743 if (!Instr)
744 return;
745 Module &M = *IC.getParent()->getModule();
746 const uint32_t CallID = Instr->getIndex()->getZExtValue();
747 Profile.visit(
748 [&](const PGOCtxProfContext &Ctx) {
749 const auto &Targets = Ctx.callsites().find(CallID);
750 if (Targets == Ctx.callsites().end())
751 return;
752 for (const auto &[Guid, _] : Targets->second)
753 if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
754 if (auto *Target = M.getFunction(Name))
755 if (Target->hasFnAttribute(Attribute::AlwaysInline))
756 Candidates.insert({&IC, Target});
757 },
758 IC.getCaller());
759}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
#define _
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Load MIR Sample Profile
static constexpr StringLiteral Filename
#define P(N)
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
This file contains some templates that are useful if you are working with the STL at all.
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
static LLVM_ABI uint64_t getGUID(const Function &F)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Assign a GUID if one is not already assign, as a function metadata named GUIDMetadataName.
static LLVM_ABI const char * GUIDMetadataName
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction & front() const
Definition BasicBlock.h:484
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static bool isExternalLinkage(LinkageTypes Linkage)
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
function_ref< void(PGOCtxProfContext &)> Visitor
function_ref< void(const PGOCtxProfContext &)> ConstVisitor
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void resize(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Pass manager infrastructure for declaring and invalidating analyses.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:594
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
auto successors(const MachineBasicBlock *BB)
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
auto pred_size(const MachineBasicBlock *BB)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
DenseMap< GlobalValue::GUID, DenseMap< uint32_t, FlatIndirectTargets > > CtxProfFlatIndirectCallProfile
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
Definition CFG.cpp:424
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
@ TotalRootEntryCount
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29