30#define DEBUG_TYPE "ctx_prof"
38 cl::desc(
"Use the specified contextual profile file"));
41 "ctx-profile-printer-level",
44 "everything",
"print everything - most verbose"),
46 "just the yaml representation of the profile")),
47 cl::desc(
"Verbosity level of the contextual profile printer pass."));
50 "ctx-profile-force-is-specialized",
cl::init(
false),
51 cl::desc(
"Treat the given module as-if it were containing the "
52 "post-thinlink module containing the root"));
62 std::optional<uint64_t>
Count;
64 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
68 std::optional<uint64_t>
Count;
76 size_t UnknownCountOutEdges = 0;
77 size_t UnknownCountInEdges = 0;
84 bool AssumeAllKnown)
const {
85 std::optional<uint64_t> Sum;
86 for (
const auto *
E : Edges) {
91 *Sum += (AssumeAllKnown ? *
E->Count :
E->Count.value_or(0U));
97 bool computeCountFrom(
const SmallVector<EdgeInfo *> &Edges) {
98 assert(!Count.has_value());
99 Count = getEdgeSum(Edges,
true);
100 return Count.has_value();
103 void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
104 uint64_t KnownSum = getEdgeSum(Edges,
false).value_or(0U);
105 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0
U;
106 EdgeInfo *
E =
nullptr;
107 for (
auto *
I : Edges)
108 if (
I && !
I->Count.has_value()) {
114 "Expected exactly one edge to have an unknown count, "
115 "found a second one");
119 assert(
E &&
"Expected exactly one edge to have an unknown count");
122 assert(
E->Src->UnknownCountOutEdges > 0);
123 assert(
E->Dest->UnknownCountInEdges > 0);
124 --
E->Src->UnknownCountOutEdges;
125 --
E->Dest->UnknownCountInEdges;
129 BBInfo(
size_t NumInEdges,
size_t NumOutEdges, std::optional<uint64_t> Count)
136 InEdges.reserve(NumInEdges);
137 OutEdges.resize(NumOutEdges);
140 bool tryTakeCountFromKnownOutEdges(
const BasicBlock &BB) {
141 if (!UnknownCountOutEdges) {
142 return computeCountFrom(OutEdges);
147 bool tryTakeCountFromKnownInEdges(
const BasicBlock &BB) {
148 if (!UnknownCountInEdges) {
149 return computeCountFrom(InEdges);
154 void addInEdge(EdgeInfo &
Info) {
155 InEdges.push_back(&
Info);
156 ++UnknownCountInEdges;
163 void addOutEdge(
size_t Index, EdgeInfo &
Info) {
165 ++UnknownCountOutEdges;
168 bool hasCount()
const {
return Count.has_value(); }
170 uint64_t getCount()
const {
return *Count; }
172 bool trySetSingleUnknownInEdgeCount() {
173 if (UnknownCountInEdges == 1) {
174 setSingleUnknownEdgeCount(InEdges);
180 bool trySetSingleUnknownOutEdgeCount() {
181 if (UnknownCountOutEdges == 1) {
182 setSingleUnknownEdgeCount(OutEdges);
187 size_t getNumOutEdges()
const {
return OutEdges.size(); }
189 uint64_t getEdgeCount(
size_t Index)
const {
190 if (
auto *
E = OutEdges[Index])
197 ArrayRef<uint64_t> Counters;
199 std::map<const BasicBlock *, BBInfo> BBInfos;
200 std::vector<EdgeInfo> EdgeInfos;
204 bool shouldExcludeEdge(
const BasicBlock &Src,
const BasicBlock &Dest)
const {
208 BBInfo &getBBInfo(
const BasicBlock &BB) {
return BBInfos.find(&BB)->second; }
210 const BBInfo &getBBInfo(
const BasicBlock &BB)
const {
211 return BBInfos.find(&BB)->second;
216 bool allCountersAreAssigned()
const {
217 for (
const auto &BBInfo : BBInfos)
218 if (!BBInfo.second.hasCount())
220 for (
const auto &EdgeInfo : EdgeInfos)
221 if (!EdgeInfo.Count.has_value())
228 bool allTakenPathsExit()
const {
229 std::deque<const BasicBlock *> Worklist;
230 DenseSet<const BasicBlock *> Visited;
231 Worklist.push_back(&F.getEntryBlock());
232 bool HitExit =
false;
233 while (!Worklist.empty()) {
234 const auto *BB = Worklist.
front();
235 Worklist.pop_front();
236 if (!Visited.
insert(BB).second)
248 const auto &BBInfo = getBBInfo(*BB);
249 bool HasAWayOut =
false;
252 if (!shouldExcludeEdge(*BB, *Succ)) {
253 if (BBInfo.getEdgeCount(
I) > 0) {
255 Worklist.push_back(Succ);
265 bool allNonColdSelectsHaveProfile()
const {
266 for (
const auto &BB : F) {
267 if (getBBInfo(BB).getCount() > 0) {
268 for (
const auto &
I : BB) {
271 *
const_cast<SelectInst *
>(SI))) {
272 auto Index = Inst->getIndex()->getZExtValue();
273 assert(Index < Counters.size());
274 if (Counters[Index] == 0)
286 void propagateCounterValues() {
287 bool KeepGoing =
true;
290 for (
const auto &BB : F) {
291 auto &
Info = getBBInfo(BB);
292 if (!
Info.hasCount())
293 KeepGoing |=
Info.tryTakeCountFromKnownOutEdges(BB) ||
294 Info.tryTakeCountFromKnownInEdges(BB);
295 if (
Info.hasCount()) {
296 KeepGoing |=
Info.trySetSingleUnknownOutEdgeCount();
297 KeepGoing |=
Info.trySetSingleUnknownInEdgeCount();
301 assert(allCountersAreAssigned() &&
302 "[ctx-prof] Expected all counters have been assigned.");
303 assert(allTakenPathsExit() &&
304 "[ctx-prof] Encountered a BB with more than one successor, where "
305 "all outgoing edges have a 0 count. This occurs in non-exiting "
306 "functions (message pumps, usually) which are not supported in the "
307 "contextual profiling case");
308 assert(allNonColdSelectsHaveProfile() &&
309 "[ctx-prof] All non-cold select instructions were expected to have "
315 : F(F), Counters(Counters) {
316 assert(!F.isDeclaration());
317 assert(!Counters.empty());
319 for (
const auto &BB : F) {
320 std::optional<uint64_t>
Count;
323 auto Index = Ins->getIndex()->getZExtValue();
324 assert(Index < Counters.size() &&
325 "The index must be inside the counters vector by construction - "
326 "tripping this assertion indicates a bug in how the contextual "
327 "profile is managed by IPO transforms");
329 Count = Counters[Ins->getIndex()->getZExtValue()];
337 assert(Ins &&
"We iterate through the function's BBs, no reason to "
338 "insert one more than once");
340 return !shouldExcludeEdge(BB, *Succ);
344 EdgeInfos.reserve(NrEdges);
345 for (
const auto &BB : F) {
346 auto &Info = getBBInfo(BB);
347 for (
auto I = 0U;
I < BB.getTerminator()->getNumSuccessors(); ++
I) {
348 const auto *Succ = BB.getTerminator()->getSuccessor(
I);
349 if (!shouldExcludeEdge(BB, *Succ)) {
350 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
351 Info.addOutEdge(
I, EI);
352 getBBInfo(*Succ).addInEdge(EI);
356 assert(EdgeInfos.capacity() == NrEdges &&
357 "The capacity of EdgeInfos should have stayed unchanged it was "
358 "populated, because we need pointers to its contents to be stable");
359 propagateCounterValues();
374 return PImpl->getBBCount(BB);
380 const auto &BBInfo = PImpl->getBBInfo(*
SI.getParent());
381 TrueCount = FalseCount = 0;
382 if (BBInfo.getCount() == 0)
388 auto Index = Step->getIndex()->getZExtValue();
390 "The index of the step instruction must be inside the "
391 "counters vector by "
392 "construction - tripping this assertion indicates a bug in "
393 "how the contextual profile is managed by IPO transforms");
394 auto TotalCount = BBInfo.getCount();
395 TrueCount = PImpl->Counters[Index];
396 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
409 Profile.resize(Term->getNumSuccessors());
411 const auto &BBInfo = PImpl->getBBInfo(BB);
413 for (
unsigned SuccIdx = 0,
Size = BBInfo.getNumOutEdges(); SuccIdx <
Size;
415 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
416 if (EdgeCount > MaxCount)
417 MaxCount = EdgeCount;
424 for (
auto &
F : M.functions()) {
425 if (
F.isDeclaration())
432 {ConstantAsMetadata::get(ConstantInt::get(
433 Type::getInt64Ty(M.getContext()), GUID))}));
439 if (
F.isDeclaration()) {
444 assert(MD &&
"guid not found for defined function");
447 ->stripPointerCasts())
467 M.getContext().emitError(
"could not open contextual profile file: " +
473 if (!MaybeProfiles) {
474 M.getContext().emitError(
"contextual profile file is invalid: " +
475 toString(MaybeProfiles.takeError()));
485 auto ModName = M.getName();
488 Filename = Filename.substr(0, Filename.find_last_of(
'.'));
493 if (!Filename.getAsInteger(0,
Guid))
494 ProfileRootsInModule.
insert(
Guid.getZExtValue());
495 return ProfileRootsInModule;
497 const auto ProfileRootsInModule = DetermineRootsInModule();
504 if (!ProfileRootsInModule.empty()) {
505 Result.IsInSpecializedModule =
true;
507 for (
auto &[RootGuid,
_] :
509 if (!ProfileRootsInModule.contains(RootGuid))
510 MaybeProfiles->Contexts.erase(RootGuid);
512 MaybeProfiles->FlatProfiles.clear();
515 for (
const auto &
F : M) {
516 if (
F.isDeclaration())
519 assert(GUID &&
"guid not found for defined function");
520 const auto &Entry =
F.begin();
522 for (
const auto &
I : *Entry)
525 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
531 for (
const auto &BB :
F)
532 for (
const auto &
I : BB)
535 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
538 auto [It, Ins] =
Result.FuncInfo.insert(
539 {GUID, PGOContextualProfile::FunctionInfo(
F.getName())});
542 It->second.NextCallsiteIndex = MaxCallsites;
543 It->second.NextCounterIndex = MaxCounters;
547 Result.Profiles = std::move(*MaybeProfiles);
553PGOContextualProfile::getDefinedFunctionGUID(
const Function &
F)
const {
565 if (
C.contexts().empty()) {
566 OS <<
"No contextual profile was provided.\n";
571 OS <<
"Function Info:\n";
572 for (
const auto &[
Guid, FuncInfo] :
C.FuncInfo)
573 OS <<
Guid <<
" : " << FuncInfo.Name
574 <<
". MaxCounterID: " << FuncInfo.NextCounterIndex
575 <<
". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex <<
"\n";
579 OS <<
"\nCurrent Profile:\n";
585 OS <<
"\nFlat Profile:\n";
586 auto Flat =
C.flatten();
603 "didn't expect to find another call, that's not the callsite "
604 "instrumentation, before an instrumentable callsite");
626template <
class ProfTy>
629 std::function<void(ProfTy &)> Traverser = [&](
auto &Ctx) {
631 for (
auto &[
_, SubCtxSet] : Ctx.callsites())
632 for (
auto &[__, Subctx] : SubCtxSet)
638template <
class ProfilesTy,
class ProfTy>
641 for (
auto &[
_,
P] : Profiles)
645void PGOContextualProfile::initIndex() {
648 DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
649 for (
auto &[
Guid, FI] : FuncInfo)
650 InsertionPoints[
Guid] = &FI.Index;
652 Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
653 auto InsertIt = InsertionPoints.find(Ctx.guid());
654 if (InsertIt == InsertionPoints.end())
660 InsertIt->second->Next = &Ctx;
661 Ctx.Previous = InsertIt->second;
662 InsertIt->second = &Ctx;
669 : IsInSpecializedModule;
675 for (
auto *
Node = FuncInfo.find(
G)->second.Index.Next;
Node;
686 for (
const auto *
Node = FuncInfo.find(
G)->second.Index.Next;
Node;
699 "All contexts corresponding to a function should have the exact "
700 "same number of counters.");
701 for (
size_t I = 0, E = Into.
size();
I < E; ++
I)
702 Into[
I] += From[
I] * SamplingRate;
705 for (
const auto &[
_, CtxRoot] : Profiles.Contexts) {
706 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
709 Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
712 for (
const auto &[
G, Unh] : CtxRoot.getUnhandled())
713 Accummulate(Flat[
G], Unh, SamplingFactor);
716 for (
const auto &[
G, FC] : Profiles.FlatProfiles)
717 Accummulate(Flat[
G], FC, 1);
724 for (
const auto &[
_, CtxRoot] : Profiles.Contexts) {
728 auto &Targets = Ret[Ctx.guid()];
729 for (
const auto &[
ID, SubctxSet] : Ctx.callsites())
730 for (
const auto &Subctx : SubctxSet)
731 Targets[
ID][Subctx.first] +=
740 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
745 const uint32_t CallID = Instr->getIndex()->getZExtValue();
748 const auto &Targets = Ctx.callsites().find(CallID);
749 if (Targets == Ctx.callsites().end())
751 for (
const auto &[
Guid,
_] : Targets->second)
752 if (
auto Name = Profile.getFunctionName(
Guid); !Name.empty())
753 if (
auto *
Target = M.getFunction(Name))
754 if (
Target->hasFnAttribute(Attribute::AlwaysInline))
755 Candidates.insert({&IC,
Target});
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static LLVM_ABI uint64_t getGUID(const Function &F)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Assign a GUID if one is not already assign, as a function metadata named GUIDMetadataName.
static LLVM_ABI const char * GUIDMetadataName
LLVM Basic Block Representation.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction & front() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Represents either an error or a value T.
std::error_code getError() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static bool isExternalLinkage(LinkageTypes Linkage)
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
function_ref< void(PGOCtxProfContext &)> Visitor
function_ref< void(const PGOCtxProfContext &)> ConstVisitor
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
friend class ProfileAnnotator
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
std::pair< iterator, bool > insert(const ValueT &V)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
Pass manager infrastructure for declaring and invalidating analyses.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
auto successors(const MachineBasicBlock *BB)
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto pred_size(const MachineBasicBlock *BB)
FunctionAddr VTableAddr Count
auto succ_size(const MachineBasicBlock *BB)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
DenseMap< GlobalValue::GUID, DenseMap< uint32_t, FlatIndirectTargets > > CtxProfFlatIndirectCallProfile
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Implement std::hash so that hash_code can be used in STL containers.
A special type used by analysis passes to provide an address that identifies that particular analysis...