LLVM 23.0.0git
CtxProfAnalysis.cpp
Go to the documentation of this file.
1//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the contextual profile analysis, which maintains contextual
10// profiling info through IPO passes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Analysis/CFG.h"
18#include "llvm/IR/Analysis.h"
19#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/PassManager.h"
26#include "llvm/Support/Path.h"
27#include <deque>
28#include <memory>
29
30#define DEBUG_TYPE "ctx_prof"
31
32using namespace llvm;
33
34namespace llvm {
35
37 UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
38 cl::desc("Use the specified contextual profile file"));
39
41 "ctx-profile-printer-level",
44 "everything", "print everything - most verbose"),
46 "just the yaml representation of the profile")),
47 cl::desc("Verbosity level of the contextual profile printer pass."));
48
50 "ctx-profile-force-is-specialized", cl::init(false),
51 cl::desc("Treat the given module as-if it were containing the "
52 "post-thinlink module containing the root"));
53
55 friend class ProfileAnnotator;
56 class BBInfo;
57 struct EdgeInfo {
58 BBInfo *const Src;
59 BBInfo *const Dest;
60 std::optional<uint64_t> Count;
61
62 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
63 };
64
65 class BBInfo {
66 std::optional<uint64_t> Count;
67 // OutEdges is dimensioned to match the number of terminator operands.
68 // Entries in the vector match the index in the terminator operand list. In
69 // some cases - see `shouldExcludeEdge` and its implementation - an entry
70 // will be nullptr.
71 // InEdges doesn't have the above constraint.
74 size_t UnknownCountOutEdges = 0;
75 size_t UnknownCountInEdges = 0;
76
77 // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
78 // because all the edge counters must be known.
79 // Return std::nullopt if there were no edges to sum. The user can decide
80 // how to interpret that.
81 std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
82 bool AssumeAllKnown) const {
83 std::optional<uint64_t> Sum;
84 for (const auto *E : Edges) {
85 // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
86 if (E) {
87 if (!Sum.has_value())
88 Sum = 0;
89 *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
90 }
91 }
92 return Sum;
93 }
94
95 bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
96 assert(!Count.has_value());
97 Count = getEdgeSum(Edges, true);
98 return Count.has_value();
99 }
100
101 void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
102 uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
103 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
104 EdgeInfo *E = nullptr;
105 for (auto *I : Edges)
106 if (I && !I->Count.has_value()) {
107 E = I;
108#ifdef NDEBUG
109 break;
110#else
111 assert((!E || E == I) &&
112 "Expected exactly one edge to have an unknown count, "
113 "found a second one");
114 continue;
115#endif
116 }
117 assert(E && "Expected exactly one edge to have an unknown count");
118 assert(!E->Count.has_value());
119 E->Count = EdgeVal;
120 assert(E->Src->UnknownCountOutEdges > 0);
121 assert(E->Dest->UnknownCountInEdges > 0);
122 --E->Src->UnknownCountOutEdges;
123 --E->Dest->UnknownCountInEdges;
124 }
125
126 public:
127 BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
128 : Count(Count) {
129 // For in edges, we just want to pre-allocate enough space, since we know
130 // it at this stage. For out edges, we will insert edges at the indices
131 // corresponding to positions in this BB's terminator instruction, so we
132 // construct a default (nullptr values)-initialized vector. A nullptr edge
133 // corresponds to those that are excluded (see shouldExcludeEdge).
134 InEdges.reserve(NumInEdges);
135 OutEdges.resize(NumOutEdges);
136 }
137
138 bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
139 if (!UnknownCountOutEdges) {
140 return computeCountFrom(OutEdges);
141 }
142 return false;
143 }
144
145 bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
146 if (!UnknownCountInEdges) {
147 return computeCountFrom(InEdges);
148 }
149 return false;
150 }
151
152 void addInEdge(EdgeInfo &Info) {
153 InEdges.push_back(&Info);
154 ++UnknownCountInEdges;
155 }
156
157 // For the out edges, we care about the position we place them in, which is
158 // the position in terminator instruction's list (at construction). Later,
159 // we build branch_weights metadata with edge frequency values matching
160 // these positions.
161 void addOutEdge(size_t Index, EdgeInfo &Info) {
162 OutEdges[Index] = &Info;
163 ++UnknownCountOutEdges;
164 }
165
166 bool hasCount() const { return Count.has_value(); }
167
168 uint64_t getCount() const { return *Count; }
169
170 bool trySetSingleUnknownInEdgeCount() {
171 if (UnknownCountInEdges == 1) {
172 setSingleUnknownEdgeCount(InEdges);
173 return true;
174 }
175 return false;
176 }
177
178 bool trySetSingleUnknownOutEdgeCount() {
179 if (UnknownCountOutEdges == 1) {
180 setSingleUnknownEdgeCount(OutEdges);
181 return true;
182 }
183 return false;
184 }
185 size_t getNumOutEdges() const { return OutEdges.size(); }
186
187 uint64_t getEdgeCount(size_t Index) const {
188 if (auto *E = OutEdges[Index])
189 return *E->Count;
190 return 0U;
191 }
192 };
193
194 const Function &F;
195 ArrayRef<uint64_t> Counters;
196 // To be accessed through getBBInfo() after construction.
197 std::map<const BasicBlock *, BBInfo> BBInfos;
198 std::vector<EdgeInfo> EdgeInfos;
199
200 // The only criteria for exclusion is faux suspend -> exit edges in presplit
201 // coroutines. The API serves for readability, currently.
202 bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
203 return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
204 }
205
206 BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
207
208 const BBInfo &getBBInfo(const BasicBlock &BB) const {
209 return BBInfos.find(&BB)->second;
210 }
211
212 // validation function after we propagate the counters: all BBs and edges'
213 // counters must have a value.
214 bool allCountersAreAssigned() const {
215 for (const auto &BBInfo : BBInfos)
216 if (!BBInfo.second.hasCount())
217 return false;
218 for (const auto &EdgeInfo : EdgeInfos)
219 if (!EdgeInfo.Count.has_value())
220 return false;
221 return true;
222 }
223
224 /// Check that all paths from the entry basic block that use edges with
225 /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
226 bool allTakenPathsExit() const {
227 std::deque<const BasicBlock *> Worklist;
228 DenseSet<const BasicBlock *> Visited;
229 Worklist.push_back(&F.getEntryBlock());
230 bool HitExit = false;
231 while (!Worklist.empty()) {
232 const auto *BB = Worklist.front();
233 Worklist.pop_front();
234 if (!Visited.insert(BB).second)
235 continue;
236 if (succ_size(BB) == 0) {
238 return false;
239 HitExit = true;
240 continue;
241 }
242 if (succ_size(BB) == 1) {
243 Worklist.push_back(BB->getUniqueSuccessor());
244 continue;
245 }
246 const auto &BBInfo = getBBInfo(*BB);
247 bool HasAWayOut = false;
248 for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
249 const auto *Succ = BB->getTerminator()->getSuccessor(I);
250 if (!shouldExcludeEdge(*BB, *Succ)) {
251 if (BBInfo.getEdgeCount(I) > 0) {
252 HasAWayOut = true;
253 Worklist.push_back(Succ);
254 }
255 }
256 }
257 if (!HasAWayOut)
258 return false;
259 }
260 return HitExit;
261 }
262
263 bool allNonColdSelectsHaveProfile() const {
264 for (const auto &BB : F) {
265 if (getBBInfo(BB).getCount() > 0) {
266 for (const auto &I : BB) {
267 if (const auto *SI = dyn_cast<SelectInst>(&I)) {
268 if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(
269 *const_cast<SelectInst *>(SI))) {
270 auto Index = Inst->getIndex()->getZExtValue();
271 assert(Index < Counters.size());
272 if (Counters[Index] == 0)
273 return false;
274 }
275 }
276 }
277 }
278 }
279 return true;
280 }
281
282 // This is an adaptation of PGOUseFunc::populateCounters.
283 // FIXME(mtrofin): look into factoring the code to share one implementation.
284 void propagateCounterValues() {
285 bool KeepGoing = true;
286 while (KeepGoing) {
287 KeepGoing = false;
288 for (const auto &BB : F) {
289 auto &Info = getBBInfo(BB);
290 if (!Info.hasCount())
291 KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
292 Info.tryTakeCountFromKnownInEdges(BB);
293 if (Info.hasCount()) {
294 KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
295 KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
296 }
297 }
298 }
299 assert(allCountersAreAssigned() &&
300 "[ctx-prof] Expected all counters have been assigned.");
301 assert(allTakenPathsExit() &&
302 "[ctx-prof] Encountered a BB with more than one successor, where "
303 "all outgoing edges have a 0 count. This occurs in non-exiting "
304 "functions (message pumps, usually) which are not supported in the "
305 "contextual profiling case");
306 assert(allNonColdSelectsHaveProfile() &&
307 "[ctx-prof] All non-cold select instructions were expected to have "
308 "a profile.");
309 }
310
311public:
313 : F(F), Counters(Counters) {
314 assert(!F.isDeclaration());
315 assert(!Counters.empty());
316 size_t NrEdges = 0;
317 for (const auto &BB : F) {
318 std::optional<uint64_t> Count;
320 const_cast<BasicBlock &>(BB))) {
321 auto Index = Ins->getIndex()->getZExtValue();
322 assert(Index < Counters.size() &&
323 "The index must be inside the counters vector by construction - "
324 "tripping this assertion indicates a bug in how the contextual "
325 "profile is managed by IPO transforms");
326 (void)Index;
327 Count = Counters[Ins->getIndex()->getZExtValue()];
328 } else if (isa<UnreachableInst>(BB.getTerminator())) {
329 // The program presumably didn't crash.
330 Count = 0;
331 }
332 auto [It, Ins] =
333 BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
334 (void)Ins;
335 assert(Ins && "We iterate through the function's BBs, no reason to "
336 "insert one more than once");
337 NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
338 return !shouldExcludeEdge(BB, *Succ);
339 });
340 }
341 // Pre-allocate the vector, we want references to its contents to be stable.
342 EdgeInfos.reserve(NrEdges);
343 for (const auto &BB : F) {
344 auto &Info = getBBInfo(BB);
345 for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
346 const auto *Succ = BB.getTerminator()->getSuccessor(I);
347 if (!shouldExcludeEdge(BB, *Succ)) {
348 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349 Info.addOutEdge(I, EI);
350 getBBInfo(*Succ).addInEdge(EI);
351 }
352 }
353 }
354 assert(EdgeInfos.capacity() == NrEdges &&
355 "The capacity of EdgeInfos should have stayed unchanged it was "
356 "populated, because we need pointers to its contents to be stable");
357 propagateCounterValues();
358 }
359
360 uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }
361};
362
363} // namespace llvm
364
366 ArrayRef<uint64_t> RawCounters)
367 : PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}
368
370
372 return PImpl->getBBCount(BB);
373}
374
376 uint64_t &TrueCount,
377 uint64_t &FalseCount) const {
378 const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
379 TrueCount = FalseCount = 0;
380 if (BBInfo.getCount() == 0)
381 return false;
382
384 if (!Step)
385 return false;
386 auto Index = Step->getIndex()->getZExtValue();
387 assert(Index < PImpl->Counters.size() &&
388 "The index of the step instruction must be inside the "
389 "counters vector by "
390 "construction - tripping this assertion indicates a bug in "
391 "how the contextual profile is managed by IPO transforms");
392 auto TotalCount = BBInfo.getCount();
393 TrueCount = PImpl->Counters[Index];
394 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
395 return true;
396}
397
400 uint64_t &MaxCount) const {
401 Profile.clear();
402
403 if (succ_size(&BB) < 2)
404 return false;
405
406 auto *Term = BB.getTerminator();
407 Profile.resize(Term->getNumSuccessors());
408
409 const auto &BBInfo = PImpl->getBBInfo(BB);
410 MaxCount = 0;
411 for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
412 ++SuccIdx) {
413 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414 if (EdgeCount > MaxCount)
415 MaxCount = EdgeCount;
416 Profile[SuccIdx] = EdgeCount;
417 }
418 return MaxCount > 0;
419}
420
422
423CtxProfAnalysis::CtxProfAnalysis(std::optional<StringRef> Profile)
424 : Profile([&]() -> std::optional<StringRef> {
425 if (Profile)
426 return *Profile;
427 if (UseCtxProfile.getNumOccurrences())
428 return UseCtxProfile;
429 return std::nullopt;
430 }()) {}
431
434 if (!Profile)
435 return {};
437 if (auto EC = MB.getError()) {
438 M.getContext().emitError("could not open contextual profile file: " +
439 EC.message());
440 return {};
441 }
442 PGOCtxProfileReader Reader(MB.get()->getBuffer());
443 auto MaybeProfiles = Reader.loadProfiles();
444 if (!MaybeProfiles) {
445 M.getContext().emitError("contextual profile file is invalid: " +
446 toString(MaybeProfiles.takeError()));
447 return {};
448 }
449
450 // FIXME: We should drive this from ThinLTO, but for the time being, use the
451 // module name as indicator.
452 // We want to *only* keep the contextual profiles in modules that capture
453 // context trees. That allows us to compute specific PSIs, for example.
454 auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {
455 DenseSet<GlobalValue::GUID> ProfileRootsInModule;
456 auto ModName = M.getName();
457 auto Filename = sys::path::filename(ModName);
458 // Drop the file extension.
459 Filename = Filename.substr(0, Filename.find_last_of('.'));
460 // See if it parses
461 APInt Guid;
462 // getAsInteger returns true if there are more chars to read other than the
463 // integer. So the "false" test is what we want.
464 if (!Filename.getAsInteger(0, Guid))
465 ProfileRootsInModule.insert(Guid.getZExtValue());
466 return ProfileRootsInModule;
467 };
468 const auto ProfileRootsInModule = DetermineRootsInModule();
470
471 // the logic from here on allows for modules that contain - by design - more
472 // than one root. We currently don't support that, because the determination
473 // happens based on the module name matching the root guid, but the logic can
474 // avoid assuming that.
475 if (!ProfileRootsInModule.empty()) {
476 Result.IsInSpecializedModule = true;
477 // Trim first the roots that aren't in this module.
478 for (auto &[RootGuid, _] :
479 llvm::make_early_inc_range(MaybeProfiles->Contexts))
480 if (!ProfileRootsInModule.contains(RootGuid))
481 MaybeProfiles->Contexts.erase(RootGuid);
482 // we can also drop the flat profiles
483 MaybeProfiles->FlatProfiles.clear();
484 }
485
486 for (const auto &F : M) {
487 if (F.isDeclaration())
488 continue;
489 auto GUID = F.getGUID();
490 assert(GUID && "guid not found for defined function");
491 const auto &Entry = F.begin();
492 uint32_t MaxCounters = 0; // we expect at least a counter.
493 for (const auto &I : *Entry)
494 if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
495 MaxCounters =
496 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
497 break;
498 }
499 if (!MaxCounters)
500 continue;
501 uint32_t MaxCallsites = 0;
502 for (const auto &BB : F)
503 for (const auto &I : BB)
504 if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
505 MaxCallsites =
506 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
507 break;
508 }
509 auto [It, Ins] = Result.FuncInfo.insert(
510 {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
511 (void)Ins;
512 assert(Ins);
513 It->second.NextCallsiteIndex = MaxCallsites;
514 It->second.NextCounterIndex = MaxCounters;
515 }
516 // If we made it this far, the Result is valid - which we mark by setting
517 // .Profiles.
518 Result.Profiles = std::move(*MaybeProfiles);
519 Result.initIndex();
520 return Result;
521}
522
525
529 if (C.contexts().empty()) {
530 OS << "No contextual profile was provided.\n";
531 return PreservedAnalyses::all();
532 }
533
534 if (Mode == PrintMode::Everything) {
535 OS << "Function Info:\n";
536 for (const auto &[Guid, FuncInfo] : C.FuncInfo)
537 OS << Guid << " : " << FuncInfo.Name
538 << ". MaxCounterID: " << FuncInfo.NextCounterIndex
539 << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
540 }
541
542 if (Mode == PrintMode::Everything)
543 OS << "\nCurrent Profile:\n";
544 convertCtxProfToYaml(OS, C.profiles());
545 OS << "\n";
546 if (Mode == PrintMode::YAML)
547 return PreservedAnalyses::all();
548
549 OS << "\nFlat Profile:\n";
550 auto Flat = C.flatten();
551 for (const auto &[Guid, Counters] : Flat) {
552 OS << Guid << " : ";
553 for (auto V : Counters)
554 OS << V << " ";
555 OS << "\n";
556 }
557 return PreservedAnalyses::all();
558}
559
562 return nullptr;
563 for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
564 if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
565 return IPC;
566 assert(!isa<CallBase>(Prev) &&
567 "didn't expect to find another call, that's not the callsite "
568 "instrumentation, before an instrumentable callsite");
569 }
570 return nullptr;
571}
572
574 for (auto &I : BB)
575 if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))
577 return Incr;
578 return nullptr;
579}
580
583 Instruction *Prev = &SI;
584 while ((Prev = Prev->getPrevNode()))
585 if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
586 return Step;
587 return nullptr;
588}
589
590template <class ProfTy>
591static void preorderVisitOneRoot(ProfTy &Profile,
592 function_ref<void(ProfTy &)> Visitor) {
593 std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
594 Visitor(Ctx);
595 for (auto &[_, SubCtxSet] : Ctx.callsites())
596 for (auto &[__, Subctx] : SubCtxSet)
597 Traverser(Subctx);
598 };
599 Traverser(Profile);
600}
601
602template <class ProfilesTy, class ProfTy>
603static void preorderVisit(ProfilesTy &Profiles,
604 function_ref<void(ProfTy &)> Visitor) {
605 for (auto &[_, P] : Profiles)
607}
608
609void PGOContextualProfile::initIndex() {
610 // Initialize the head of the index list for each function. We don't need it
611 // after this point.
612 DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
613 for (auto &[Guid, FI] : FuncInfo)
614 InsertionPoints[Guid] = &FI.Index;
616 Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
617 auto InsertIt = InsertionPoints.find(Ctx.guid());
618 if (InsertIt == InsertionPoints.end())
619 return;
620 // Insert at the end of the list. Since we traverse in preorder, it
621 // means that when we iterate the list from the beginning, we'd
622 // encounter the contexts in the order we would have, should we have
623 // performed a full preorder traversal.
624 InsertIt->second->Next = &Ctx;
625 Ctx.Previous = InsertIt->second;
626 InsertIt->second = &Ctx;
627 });
628}
629
631 return ForceIsInSpecializedModule.getNumOccurrences() > 0
633 : IsInSpecializedModule;
634}
635
638 GlobalValue::GUID G = F.getGUID();
639 for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
640 Node = Node->Next)
641 V(*reinterpret_cast<PGOCtxProfContext *>(Node));
642}
643
645 if (!F)
647 const PGOCtxProfContext>(Profiles.Contexts, V);
649 GlobalValue::GUID G = F->getGUID();
650 for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
651 Node = Node->Next)
652 V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
653}
654
657 auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
658 const SmallVectorImpl<uint64_t> &From,
659 uint64_t SamplingRate) {
660 if (Into.empty())
661 Into.resize(From.size());
662 assert(Into.size() == From.size() &&
663 "All contexts corresponding to a function should have the exact "
664 "same number of counters.");
665 for (size_t I = 0, E = Into.size(); I < E; ++I)
666 Into[I] += From[I] * SamplingRate;
667 };
668
669 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
670 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
672 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
673 Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
674 });
675
676 for (const auto &[G, Unh] : CtxRoot.getUnhandled())
677 Accummulate(Flat[G], Unh, SamplingFactor);
678 }
679 // We don't sample "Flat" currently, so sampling rate is 1.
680 for (const auto &[G, FC] : Profiles.FlatProfiles)
681 Accummulate(Flat[G], FC, /*SamplingRate=*/1);
682 return Flat;
683}
684
688 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
689 const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
691 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
692 auto &Targets = Ret[Ctx.guid()];
693 for (const auto &[ID, SubctxSet] : Ctx.callsites())
694 for (const auto &Subctx : SubctxSet)
695 Targets[ID][Subctx.first] +=
696 Subctx.second.getEntrycount() * TotalRootEntryCount;
697 });
698 }
699 return Ret;
700}
701
703 CallBase &IC, Result &Profile,
704 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
705 const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
706 if (!Instr)
707 return;
708 Module &M = *IC.getParent()->getModule();
709 const uint32_t CallID = Instr->getIndex()->getZExtValue();
710 Profile.visit(
711 [&](const PGOCtxProfContext &Ctx) {
712 const auto &Targets = Ctx.callsites().find(CallID);
713 if (Targets == Ctx.callsites().end())
714 return;
715 for (const auto &[Guid, _] : Targets->second)
716 if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
717 if (auto *Target = M.getFunction(Name))
718 if (Target->hasFnAttribute(Attribute::AlwaysInline))
719 Candidates.insert({&IC, Target});
720 },
721 IC.getCaller());
722}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
#define _
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Load MIR Sample Profile
static constexpr StringLiteral Filename
#define P(N)
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
This file contains some templates that are useful if you are working with the STL at all.
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction & front() const
Definition BasicBlock.h:484
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
function_ref< void(PGOCtxProfContext &)> Visitor
function_ref< void(const PGOCtxProfContext &)> ConstVisitor
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void resize(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Pass manager infrastructure for declaring and invalidating analyses.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:594
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
auto successors(const MachineBasicBlock *BB)
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
auto pred_size(const MachineBasicBlock *BB)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
DenseMap< GlobalValue::GUID, DenseMap< uint32_t, FlatIndirectTargets > > CtxProfFlatIndirectCallProfile
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
Definition CFG.cpp:424
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
@ TotalRootEntryCount
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29