30#define DEBUG_TYPE "ctx_prof"
38 cl::desc(
"Use the specified contextual profile file"));
41 "ctx-profile-printer-level",
44 "everything",
"print everything - most verbose"),
46 "just the yaml representation of the profile")),
47 cl::desc(
"Verbosity level of the contextual profile printer pass."));
50 "ctx-profile-force-is-specialized",
cl::init(
false),
51 cl::desc(
"Treat the given module as-if it were containing the "
52 "post-thinlink module containing the root"));
60 std::optional<uint64_t>
Count;
62 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
66 std::optional<uint64_t>
Count;
74 size_t UnknownCountOutEdges = 0;
75 size_t UnknownCountInEdges = 0;
82 bool AssumeAllKnown)
const {
83 std::optional<uint64_t> Sum;
84 for (
const auto *
E : Edges) {
89 *Sum += (AssumeAllKnown ? *
E->Count :
E->Count.value_or(0U));
95 bool computeCountFrom(
const SmallVector<EdgeInfo *> &Edges) {
96 assert(!Count.has_value());
97 Count = getEdgeSum(Edges,
true);
98 return Count.has_value();
101 void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
102 uint64_t KnownSum = getEdgeSum(Edges,
false).value_or(0U);
103 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0
U;
104 EdgeInfo *
E =
nullptr;
105 for (
auto *
I : Edges)
106 if (
I && !
I->Count.has_value()) {
112 "Expected exactly one edge to have an unknown count, "
113 "found a second one");
117 assert(
E &&
"Expected exactly one edge to have an unknown count");
120 assert(
E->Src->UnknownCountOutEdges > 0);
121 assert(
E->Dest->UnknownCountInEdges > 0);
122 --
E->Src->UnknownCountOutEdges;
123 --
E->Dest->UnknownCountInEdges;
127 BBInfo(
size_t NumInEdges,
size_t NumOutEdges, std::optional<uint64_t> Count)
134 InEdges.reserve(NumInEdges);
135 OutEdges.resize(NumOutEdges);
138 bool tryTakeCountFromKnownOutEdges(
const BasicBlock &BB) {
139 if (!UnknownCountOutEdges) {
140 return computeCountFrom(OutEdges);
145 bool tryTakeCountFromKnownInEdges(
const BasicBlock &BB) {
146 if (!UnknownCountInEdges) {
147 return computeCountFrom(InEdges);
152 void addInEdge(EdgeInfo &Info) {
153 InEdges.push_back(&Info);
154 ++UnknownCountInEdges;
161 void addOutEdge(
size_t Index, EdgeInfo &Info) {
163 ++UnknownCountOutEdges;
166 bool hasCount()
const {
return Count.has_value(); }
168 uint64_t getCount()
const {
return *Count; }
170 bool trySetSingleUnknownInEdgeCount() {
171 if (UnknownCountInEdges == 1) {
172 setSingleUnknownEdgeCount(InEdges);
178 bool trySetSingleUnknownOutEdgeCount() {
179 if (UnknownCountOutEdges == 1) {
180 setSingleUnknownEdgeCount(OutEdges);
185 size_t getNumOutEdges()
const {
return OutEdges.size(); }
187 uint64_t getEdgeCount(
size_t Index)
const {
188 if (
auto *
E = OutEdges[Index])
195 ArrayRef<uint64_t> Counters;
197 std::map<const BasicBlock *, BBInfo> BBInfos;
198 std::vector<EdgeInfo> EdgeInfos;
202 bool shouldExcludeEdge(
const BasicBlock &Src,
const BasicBlock &Dest)
const {
206 BBInfo &getBBInfo(
const BasicBlock &BB) {
return BBInfos.find(&BB)->second; }
208 const BBInfo &getBBInfo(
const BasicBlock &BB)
const {
209 return BBInfos.find(&BB)->second;
214 bool allCountersAreAssigned()
const {
215 for (
const auto &BBInfo : BBInfos)
216 if (!BBInfo.second.hasCount())
218 for (
const auto &EdgeInfo : EdgeInfos)
219 if (!EdgeInfo.Count.has_value())
226 bool allTakenPathsExit()
const {
227 std::deque<const BasicBlock *> Worklist;
228 DenseSet<const BasicBlock *> Visited;
229 Worklist.push_back(&F.getEntryBlock());
230 bool HitExit =
false;
231 while (!Worklist.empty()) {
232 const auto *BB = Worklist.
front();
233 Worklist.pop_front();
234 if (!Visited.
insert(BB).second)
246 const auto &BBInfo = getBBInfo(*BB);
247 bool HasAWayOut =
false;
250 if (!shouldExcludeEdge(*BB, *Succ)) {
251 if (BBInfo.getEdgeCount(
I) > 0) {
253 Worklist.push_back(Succ);
263 bool allNonColdSelectsHaveProfile()
const {
264 for (
const auto &BB : F) {
265 if (getBBInfo(BB).getCount() > 0) {
266 for (
const auto &
I : BB) {
269 *
const_cast<SelectInst *
>(SI))) {
270 auto Index = Inst->getIndex()->getZExtValue();
271 assert(Index < Counters.size());
272 if (Counters[Index] == 0)
284 void propagateCounterValues() {
285 bool KeepGoing =
true;
288 for (
const auto &BB : F) {
289 auto &
Info = getBBInfo(BB);
290 if (!
Info.hasCount())
291 KeepGoing |=
Info.tryTakeCountFromKnownOutEdges(BB) ||
292 Info.tryTakeCountFromKnownInEdges(BB);
293 if (
Info.hasCount()) {
294 KeepGoing |=
Info.trySetSingleUnknownOutEdgeCount();
295 KeepGoing |=
Info.trySetSingleUnknownInEdgeCount();
299 assert(allCountersAreAssigned() &&
300 "[ctx-prof] Expected all counters have been assigned.");
301 assert(allTakenPathsExit() &&
302 "[ctx-prof] Encountered a BB with more than one successor, where "
303 "all outgoing edges have a 0 count. This occurs in non-exiting "
304 "functions (message pumps, usually) which are not supported in the "
305 "contextual profiling case");
306 assert(allNonColdSelectsHaveProfile() &&
307 "[ctx-prof] All non-cold select instructions were expected to have "
313 : F(F), Counters(Counters) {
314 assert(!F.isDeclaration());
315 assert(!Counters.empty());
317 for (
const auto &BB : F) {
318 std::optional<uint64_t>
Count;
321 auto Index = Ins->getIndex()->getZExtValue();
322 assert(Index < Counters.size() &&
323 "The index must be inside the counters vector by construction - "
324 "tripping this assertion indicates a bug in how the contextual "
325 "profile is managed by IPO transforms");
327 Count = Counters[Ins->getIndex()->getZExtValue()];
335 assert(Ins &&
"We iterate through the function's BBs, no reason to "
336 "insert one more than once");
338 return !shouldExcludeEdge(BB, *Succ);
342 EdgeInfos.reserve(NrEdges);
343 for (
const auto &BB : F) {
344 auto &Info = getBBInfo(BB);
345 for (
auto I = 0U;
I < BB.getTerminator()->getNumSuccessors(); ++
I) {
346 const auto *Succ = BB.getTerminator()->getSuccessor(
I);
347 if (!shouldExcludeEdge(BB, *Succ)) {
348 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349 Info.addOutEdge(
I, EI);
350 getBBInfo(*Succ).addInEdge(EI);
354 assert(EdgeInfos.capacity() == NrEdges &&
355 "The capacity of EdgeInfos should have stayed unchanged it was "
356 "populated, because we need pointers to its contents to be stable");
357 propagateCounterValues();
372 return PImpl->getBBCount(BB);
378 const auto &BBInfo = PImpl->getBBInfo(*
SI.getParent());
379 TrueCount = FalseCount = 0;
380 if (BBInfo.getCount() == 0)
386 auto Index = Step->getIndex()->getZExtValue();
388 "The index of the step instruction must be inside the "
389 "counters vector by "
390 "construction - tripping this assertion indicates a bug in "
391 "how the contextual profile is managed by IPO transforms");
392 auto TotalCount = BBInfo.getCount();
393 TrueCount = PImpl->Counters[Index];
394 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
407 Profile.resize(Term->getNumSuccessors());
409 const auto &BBInfo = PImpl->getBBInfo(BB);
411 for (
unsigned SuccIdx = 0,
Size = BBInfo.getNumOutEdges(); SuccIdx <
Size;
413 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414 if (EdgeCount > MaxCount)
415 MaxCount = EdgeCount;
438 M.getContext().emitError(
"could not open contextual profile file: " +
444 if (!MaybeProfiles) {
445 M.getContext().emitError(
"contextual profile file is invalid: " +
446 toString(MaybeProfiles.takeError()));
456 auto ModName = M.getName();
465 ProfileRootsInModule.
insert(
Guid.getZExtValue());
466 return ProfileRootsInModule;
468 const auto ProfileRootsInModule = DetermineRootsInModule();
475 if (!ProfileRootsInModule.empty()) {
476 Result.IsInSpecializedModule =
true;
478 for (
auto &[RootGuid,
_] :
480 if (!ProfileRootsInModule.contains(RootGuid))
481 MaybeProfiles->Contexts.erase(RootGuid);
483 MaybeProfiles->FlatProfiles.clear();
486 for (
const auto &
F : M) {
487 if (
F.isDeclaration())
489 auto GUID =
F.getGUID();
490 assert(GUID &&
"guid not found for defined function");
491 const auto &Entry =
F.begin();
493 for (
const auto &
I : *Entry)
496 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
502 for (
const auto &BB :
F)
503 for (
const auto &
I : BB)
506 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
509 auto [It, Ins] =
Result.FuncInfo.insert(
510 {GUID, PGOContextualProfile::FunctionInfo(
F.getName())});
513 It->second.NextCallsiteIndex = MaxCallsites;
514 It->second.NextCounterIndex = MaxCounters;
518 Result.Profiles = std::move(*MaybeProfiles);
529 if (
C.contexts().empty()) {
530 OS <<
"No contextual profile was provided.\n";
535 OS <<
"Function Info:\n";
536 for (
const auto &[
Guid, FuncInfo] :
C.FuncInfo)
537 OS <<
Guid <<
" : " << FuncInfo.Name
538 <<
". MaxCounterID: " << FuncInfo.NextCounterIndex
539 <<
". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex <<
"\n";
543 OS <<
"\nCurrent Profile:\n";
549 OS <<
"\nFlat Profile:\n";
550 auto Flat =
C.flatten();
567 "didn't expect to find another call, that's not the callsite "
568 "instrumentation, before an instrumentable callsite");
590template <
class ProfTy>
593 std::function<void(ProfTy &)> Traverser = [&](
auto &Ctx) {
595 for (
auto &[
_, SubCtxSet] : Ctx.callsites())
596 for (
auto &[__, Subctx] : SubCtxSet)
602template <
class ProfilesTy,
class ProfTy>
605 for (
auto &[
_,
P] : Profiles)
609void PGOContextualProfile::initIndex() {
612 DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
613 for (
auto &[
Guid, FI] : FuncInfo)
614 InsertionPoints[
Guid] = &FI.Index;
616 Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
617 auto InsertIt = InsertionPoints.find(Ctx.guid());
618 if (InsertIt == InsertionPoints.end())
624 InsertIt->second->Next = &Ctx;
625 Ctx.Previous = InsertIt->second;
626 InsertIt->second = &Ctx;
633 : IsInSpecializedModule;
639 for (
auto *
Node = FuncInfo.find(
G)->second.Index.Next;
Node;
650 for (
const auto *
Node = FuncInfo.find(
G)->second.Index.Next;
Node;
663 "All contexts corresponding to a function should have the exact "
664 "same number of counters.");
665 for (
size_t I = 0, E = Into.
size();
I < E; ++
I)
666 Into[
I] += From[
I] * SamplingRate;
669 for (
const auto &[
_, CtxRoot] : Profiles.Contexts) {
670 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
673 Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
676 for (
const auto &[
G, Unh] : CtxRoot.getUnhandled())
677 Accummulate(Flat[
G], Unh, SamplingFactor);
680 for (
const auto &[
G, FC] : Profiles.FlatProfiles)
681 Accummulate(Flat[
G], FC, 1);
688 for (
const auto &[
_, CtxRoot] : Profiles.Contexts) {
692 auto &Targets = Ret[Ctx.guid()];
693 for (
const auto &[
ID, SubctxSet] : Ctx.callsites())
694 for (
const auto &Subctx : SubctxSet)
695 Targets[
ID][Subctx.first] +=
704 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
709 const uint32_t CallID = Instr->getIndex()->getZExtValue();
712 const auto &Targets = Ctx.callsites().find(CallID);
713 if (Targets == Ctx.callsites().end())
715 for (
const auto &[
Guid,
_] : Targets->second)
716 if (
auto Name = Profile.getFunctionName(
Guid); !Name.empty())
717 if (
auto *
Target = M.getFunction(Name))
718 if (
Target->hasFnAttribute(Attribute::AlwaysInline))
719 Candidates.insert({&IC,
Target});
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static constexpr StringLiteral Filename
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction & front() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Represents either an error or a value T.
std::error_code getError() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
function_ref< void(PGOCtxProfContext &)> Visitor
function_ref< void(const PGOCtxProfContext &)> ConstVisitor
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
friend class ProfileAnnotator
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
std::pair< iterator, bool > insert(const ValueT &V)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
Pass manager infrastructure for declaring and invalidating analyses.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
auto successors(const MachineBasicBlock *BB)
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto pred_size(const MachineBasicBlock *BB)
FunctionAddr VTableAddr Count
auto succ_size(const MachineBasicBlock *BB)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
DenseMap< GlobalValue::GUID, DenseMap< uint32_t, FlatIndirectTargets > > CtxProfFlatIndirectCallProfile
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Implement std::hash so that hash_code can be used in STL containers.
A special type used by analysis passes to provide an address that identifies that particular analysis...