56#define DEBUG_TYPE "memprof-context-disambiguation"
59 "Number of function clones created during whole program analysis");
61 "Number of function clones created during ThinLTO backend");
63 "Number of functions that had clones created during ThinLTO backend");
65 FunctionCloneDuplicatesThinBackend,
66 "Number of function clone duplicates detected during ThinLTO backend");
67STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
68 "cloned) during whole program analysis");
69STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
70 "during whole program analysis");
72 "Number of not cold static allocations (possibly cloned) during "
74STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
75 "(possibly cloned) during ThinLTO backend");
77 "Number of original (not cloned) allocations with memprof profiles "
78 "during ThinLTO backend");
80 AllocVersionsThinBackend,
81 "Number of allocation versions (including clones) during ThinLTO backend");
83 "Maximum number of allocation versions created for an original "
84 "allocation during ThinLTO backend");
86 "Number of unclonable ambigous allocations during ThinLTO backend");
88 "Number of edges removed due to mismatched callees (profiled vs IR)");
90 "Number of profiled callees found via tail calls");
92 "Aggregate depth of profiled callees found via tail calls");
94 "Maximum depth of profiled callees found via tail calls");
96 "Number of profiled callees found via multiple tail call chains");
97STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
98STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
99STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
101 "Number of missing alloc nodes for context ids");
103 "Number of calls skipped during cloning due to unexpected operand");
105 "Number of callsites assigned to call multiple non-matching clones");
106STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
107STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
108STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
109STATISTIC(NumImportantContextIds,
"Number of important context ids");
110STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
111STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
112STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
114 "Number of aliasees prevailing in a different module than its alias");
119 cl::desc(
"Specify the path prefix of the MemProf dot files."));
123 cl::desc(
"Export graph to dot files."));
128 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
138 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
143 "Export only nodes with contexts feeding given "
144 "-memprof-dot-alloc-id"),
146 "Export only nodes with given -memprof-dot-context-id")));
150 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
151 "or to highlight if -memprof-dot-scope=all"));
155 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
156 "highlight otherwise"));
160 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
164 cl::desc(
"Perform verification checks on CallingContextGraph."));
168 cl::desc(
"Perform frequent verification checks on nodes."));
171 "memprof-import-summary",
172 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
178 cl::desc(
"Max depth to recursively search for missing "
179 "frames through tail calls."));
184 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
188 cl::desc(
"Allow cloning of contexts through recursive cycles"));
195 cl::desc(
"Merge clones before assigning functions"));
204 cl::desc(
"Allow cloning of contexts having recursive cycles"));
210 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
214 "enable-memprof-context-disambiguation",
cl::Hidden,
215 cl::desc(
"Enable MemProf context disambiguation"));
221 cl::desc(
"Linking with hot/cold operator new interfaces"));
226 "Require target function definition when promoting indirect calls"));
233 cl::desc(
"Number of largest cold contexts to consider important"));
237 cl::desc(
"Enables edge fixup for important contexts"));
259template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
260class CallsiteContextGraph {
262 CallsiteContextGraph() =
default;
263 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
264 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
268 EmitRemark =
nullptr,
269 bool AllowExtraAnalysis =
false);
273 void identifyClones();
280 bool assignFunctions();
286 EmitRemark =
nullptr)
const;
289 const CallsiteContextGraph &CCG) {
295 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
297 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
299 void exportToDot(std::string Label)
const;
302 struct FuncInfo final
303 :
public std::pair<FuncTy *, unsigned > {
304 using Base = std::pair<FuncTy *, unsigned>;
306 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
307 explicit operator bool()
const {
return this->first !=
nullptr; }
308 FuncTy *func()
const {
return this->first; }
309 unsigned cloneNo()
const {
return this->second; }
313 struct CallInfo final :
public std::pair<CallTy, unsigned > {
314 using Base = std::pair<CallTy, unsigned>;
316 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
318 explicit operator bool()
const {
return (
bool)this->first; }
319 CallTy call()
const {
return this->first; }
320 unsigned cloneNo()
const {
return this->second; }
321 void setCloneNo(
unsigned N) { this->second =
N; }
323 if (!
operator bool()) {
329 OS <<
"\t(clone " << cloneNo() <<
")";
355 bool Recursive =
false;
382 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
386 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
390 bool useCallerEdgesForContextInfo()
const {
395 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
413 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
414 Count += Edge->getContextIds().size();
418 CalleeEdges, useCallerEdgesForContextInfo()
420 : std::vector<std::shared_ptr<ContextEdge>>());
421 for (
const auto &Edge : Edges)
428 uint8_t computeAllocType()
const {
433 CalleeEdges, useCallerEdgesForContextInfo()
435 : std::vector<std::shared_ptr<ContextEdge>>());
436 for (
const auto &Edge : Edges) {
447 bool emptyContextIds()
const {
449 CalleeEdges, useCallerEdgesForContextInfo()
451 : std::vector<std::shared_ptr<ContextEdge>>());
452 for (
const auto &Edge : Edges) {
453 if (!Edge->getContextIds().empty())
460 std::vector<ContextNode *> Clones;
463 ContextNode *CloneOf =
nullptr;
465 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
467 ContextNode(
bool IsAllocation, CallInfo
C)
468 : IsAllocation(IsAllocation),
Call(
C) {}
470 void addClone(ContextNode *Clone) {
472 CloneOf->Clones.push_back(Clone);
473 Clone->CloneOf = CloneOf;
475 Clones.push_back(Clone);
477 Clone->CloneOf =
this;
481 ContextNode *getOrigNode() {
488 unsigned int ContextId);
490 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
491 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
492 void eraseCalleeEdge(
const ContextEdge *Edge);
493 void eraseCallerEdge(
const ContextEdge *Edge);
495 void setCall(CallInfo
C) {
Call = std::move(
C); }
497 bool hasCall()
const {
return (
bool)
Call.call(); }
503 bool isRemoved()
const {
539 bool IsBackedge =
false;
546 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
547 ContextIds(std::move(ContextIds)) {}
553 inline void clear() {
563 inline bool isRemoved()
const {
564 if (Callee || Caller)
585 void removeNoneTypeCalleeEdges(ContextNode *
Node);
586 void removeNoneTypeCallerEdges(ContextNode *
Node);
588 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
594 template <
class NodeT,
class IteratorT>
595 std::vector<uint64_t>
600 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
603 template <
class NodeT,
class IteratorT>
604 void addStackNodesForMIB(
608 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
613 void updateStackNodes();
622 void fixupImportantContexts();
626 void handleCallsitesWithMultipleTargets();
629 void markBackedges();
639 bool partitionCallsByCallee(
641 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
648 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
655 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
660 struct CallContextInfo {
664 std::vector<uint64_t> StackIds;
678 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
679 bool CalleeIter =
true);
687 void assignStackNodesPostOrder(
701 void propagateDuplicateContextIds(
707 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
715 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
725 calleesMatch(CallTy
Call, EdgeIter &EI,
730 const FuncTy *getCalleeFunc(CallTy
Call) {
731 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
737 bool calleeMatchesFunc(
738 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
739 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
740 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
741 Call, Func, CallerFunc, FoundCalleeChain);
745 bool sameCallee(CallTy Call1, CallTy Call2) {
746 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
751 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
752 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
758 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
764 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
769 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
774 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
775 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
781 FuncInfo cloneFunctionForCallsite(
783 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
784 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
785 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
790 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
791 unsigned CloneNo)
const {
792 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
796 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
797 CallInfo
C = CallInfo()) {
798 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
799 auto *NewNode = NodeOwner.back().get();
801 NodeToCallingFunc[NewNode] =
F;
802 NewNode->NodeId = NodeOwner.size();
807 ContextNode *getNodeForInst(
const CallInfo &
C);
808 ContextNode *getNodeForAlloc(
const CallInfo &
C);
809 ContextNode *getNodeForStackId(
uint64_t StackId);
831 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
838 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
839 ContextNode *NewCallee,
840 bool NewClone =
false,
848 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
849 ContextNode *NewCaller);
860 void mergeNodeCalleeClones(
865 void findOtherCallersToShareMerge(
866 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
894 struct ImportantContextInfo {
896 std::vector<uint64_t> StackIds;
899 unsigned MaxLength = 0;
903 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
912 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
926 auto Size = StackIds.size();
927 for (
auto Id : Ids) {
928 auto &Entry = ImportantContextIdInfo[Id];
929 Entry.StackIdsToNode[StackIds] =
Node;
931 if (
Size > Entry.MaxLength)
932 Entry.MaxLength =
Size;
941 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
947 unsigned int LastContextId = 0;
950template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
952 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
953template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
955 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
958 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
959template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
961 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
964class ModuleCallsiteContextGraph
965 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
968 ModuleCallsiteContextGraph(
970 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
973 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
976 uint64_t getStackId(uint64_t IdOrIndex)
const;
978 bool calleeMatchesFunc(
979 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
980 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
981 bool sameCallee(Instruction *Call1, Instruction *Call2);
982 bool findProfiledCalleeThroughTailCalls(
983 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
984 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
985 bool &FoundMultipleCalleeChains);
986 uint64_t getLastStackId(Instruction *
Call);
987 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
990 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
991 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
993 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
994 DenseMap<CallInfo, CallInfo> &CallMap,
995 std::vector<CallInfo> &CallsWithMetadataInFunc,
997 std::string getLabel(
const Function *Func,
const Instruction *
Call,
998 unsigned CloneNo)
const;
1001 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1007struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1008 IndexCall() : PointerUnion() {}
1009 IndexCall(std::nullptr_t) : IndexCall() {}
1010 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1011 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1012 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1014 IndexCall *operator->() {
return this; }
1016 void print(raw_ostream &OS)
const {
1017 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1042class IndexCallsiteContextGraph
1043 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1046 IndexCallsiteContextGraph(
1047 ModuleSummaryIndex &Index,
1051 ~IndexCallsiteContextGraph() {
1056 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1058 for (
auto &Callsite :
I.second)
1059 FS->addCallsite(std::move(*Callsite.second));
1064 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1067 uint64_t getStackId(uint64_t IdOrIndex)
const;
1068 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1069 bool calleeMatchesFunc(
1070 IndexCall &
Call,
const FunctionSummary *Func,
1071 const FunctionSummary *CallerFunc,
1072 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1073 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1074 bool findProfiledCalleeThroughTailCalls(
1075 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1076 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1077 bool &FoundMultipleCalleeChains);
1078 uint64_t getLastStackId(IndexCall &
Call);
1079 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1082 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1083 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1084 IndexCall>::FuncInfo
1085 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1086 DenseMap<CallInfo, CallInfo> &CallMap,
1087 std::vector<CallInfo> &CallsWithMetadataInFunc,
1089 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1090 unsigned CloneNo)
const;
1091 DenseSet<GlobalValue::GUID> findAliaseeGUIDsPrevailingInDifferentModule();
1095 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1097 const ModuleSummaryIndex &Index;
1105 DenseMap<FunctionSummary *,
1106 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1107 FunctionCalleesToSynthesizedCallsiteInfos;
1118 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1121 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1142template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1143bool allocTypesMatch(
1144 const std::vector<uint8_t> &InAllocTypes,
1145 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1149 assert(InAllocTypes.size() == Edges.size());
1151 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1153 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1157 if (l == (uint8_t)AllocationType::None ||
1158 r->AllocTypes == (uint8_t)AllocationType::None)
1160 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1169template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1170bool allocTypesMatchClone(
1171 const std::vector<uint8_t> &InAllocTypes,
1172 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1173 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1177 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1181 for (
const auto &
E : Clone->CalleeEdges) {
1183 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1187 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1188 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1190 if (Iter == EdgeCalleeMap.
end())
1198 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1207typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1208CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1209 const CallInfo &
C) {
1210 ContextNode *
Node = getNodeForAlloc(
C);
1214 return NonAllocationCallToContextNodeMap.lookup(
C);
1217template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1218typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1219CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1220 const CallInfo &
C) {
1221 return AllocationCallToContextNodeMap.lookup(
C);
1224template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1225typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1226CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1228 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1229 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1230 return StackEntryNode->second;
1234template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1235void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1237 unsigned int ContextId) {
1238 for (
auto &
Edge : CallerEdges) {
1239 if (
Edge->Caller == Caller) {
1241 Edge->getContextIds().insert(ContextId);
1245 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1246 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1247 CallerEdges.push_back(
Edge);
1251template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1253 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1269 auto CalleeCallerCount =
Callee->CallerEdges.size();
1270 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1275 }
else if (CalleeIter) {
1277 *EI =
Caller->CalleeEdges.erase(*EI);
1280 *EI =
Callee->CallerEdges.erase(*EI);
1282 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1283 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1286template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1287void CallsiteContextGraph<
1288 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1289 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1291 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1293 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1299template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1300void CallsiteContextGraph<
1301 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1302 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1304 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1306 Edge->Caller->eraseCalleeEdge(
Edge.get());
1307 EI =
Node->CallerEdges.erase(EI);
1313template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1314typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1315CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1316 findEdgeFromCallee(
const ContextNode *Callee) {
1317 for (
const auto &
Edge : CalleeEdges)
1318 if (
Edge->Callee == Callee)
1323template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1324typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1325CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1326 findEdgeFromCaller(
const ContextNode *Caller) {
1327 for (
const auto &
Edge : CallerEdges)
1328 if (
Edge->Caller == Caller)
1333template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1334void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1335 eraseCalleeEdge(
const ContextEdge *
Edge) {
1337 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1338 return CalleeEdge.get() ==
Edge;
1340 assert(EI != CalleeEdges.end());
1341 CalleeEdges.erase(EI);
1344template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1346 eraseCallerEdge(
const ContextEdge *
Edge) {
1348 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1349 return CallerEdge.get() ==
Edge;
1351 assert(EI != CallerEdges.end());
1352 CallerEdges.erase(EI);
1355template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1356uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1357 DenseSet<uint32_t> &ContextIds)
const {
1359 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1360 uint8_t
AllocType = (uint8_t)AllocationType::None;
1361 for (
auto Id : ContextIds) {
1362 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1370template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1372CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1373 const DenseSet<uint32_t> &Node1Ids,
1374 const DenseSet<uint32_t> &Node2Ids)
const {
1376 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1377 uint8_t
AllocType = (uint8_t)AllocationType::None;
1378 for (
auto Id : Node1Ids) {
1379 if (!Node2Ids.
count(Id))
1381 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1389template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1390uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1391 const DenseSet<uint32_t> &Node1Ids,
1392 const DenseSet<uint32_t> &Node2Ids)
const {
1393 if (Node1Ids.
size() < Node2Ids.
size())
1394 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1396 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1399template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1400typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1401CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1402 CallInfo
Call,
const FuncTy *
F) {
1404 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1405 AllocationCallToContextNodeMap[
Call] = AllocNode;
1407 AllocNode->OrigStackOrAllocId = LastContextId;
1410 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1426template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1427template <
class NodeT,
class IteratorT>
1428void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1429 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1432 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1438 ContextIdToAllocationType[++LastContextId] =
AllocType;
1440 bool IsImportant =
false;
1441 if (!ContextSizeInfo.
empty()) {
1442 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1446 uint64_t TotalCold = 0;
1447 for (
auto &CSI : ContextSizeInfo)
1448 TotalCold += CSI.TotalSize;
1454 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1457 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1458 TotalSizeToContextIdTopNCold.erase(
1459 TotalSizeToContextIdTopNCold.begin());
1460 assert(ImportantContextIdInfo.count(IdToRemove));
1461 ImportantContextIdInfo.erase(IdToRemove);
1463 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1467 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1471 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1476 ContextNode *PrevNode = AllocNode;
1480 SmallSet<uint64_t, 8> StackIdSet;
1483 ContextIter != StackContext.
end(); ++ContextIter) {
1484 auto StackId = getStackId(*ContextIter);
1486 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1487 ContextNode *StackNode = getNodeForStackId(StackId);
1489 StackNode = createNewNode(
false);
1490 StackEntryIdToContextNodeMap[StackId] = StackNode;
1491 StackNode->OrigStackOrAllocId = StackId;
1496 auto Ins = StackIdSet.
insert(StackId);
1498 StackNode->Recursive =
true;
1500 StackNode->AllocTypes |= (uint8_t)
AllocType;
1501 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1502 PrevNode = StackNode;
1506template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1508CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1509 const DenseSet<uint32_t> &StackSequenceContextIds,
1510 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1511 DenseSet<uint32_t> NewContextIds;
1512 for (
auto OldId : StackSequenceContextIds) {
1513 NewContextIds.
insert(++LastContextId);
1514 OldToNewContextIds[OldId].insert(LastContextId);
1515 assert(ContextIdToAllocationType.count(OldId));
1517 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1518 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1519 if (CSI != ContextIdToContextSizeInfos.end())
1520 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1521 if (DotAllocContextIds.
contains(OldId))
1522 DotAllocContextIds.
insert(LastContextId);
1524 return NewContextIds;
1527template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1528void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1529 propagateDuplicateContextIds(
1530 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1532 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1533 DenseSet<uint32_t> NewIds;
1534 for (
auto Id : ContextIds)
1535 if (
auto NewId = OldToNewContextIds.find(Id);
1536 NewId != OldToNewContextIds.end())
1542 auto UpdateCallers = [&](ContextNode *
Node,
1543 DenseSet<const ContextEdge *> &Visited,
1544 auto &&UpdateCallers) ->
void {
1545 for (
const auto &
Edge :
Node->CallerEdges) {
1549 ContextNode *NextNode =
Edge->Caller;
1550 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1553 if (!NewIdsToAdd.
empty()) {
1554 Edge->getContextIds().insert_range(NewIdsToAdd);
1555 UpdateCallers(NextNode, Visited, UpdateCallers);
1560 DenseSet<const ContextEdge *> Visited;
1561 for (
auto &Entry : AllocationCallToContextNodeMap) {
1563 UpdateCallers(Node, Visited, UpdateCallers);
1567template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1568void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1569 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1572 DenseSet<uint32_t> RemainingContextIds) {
1574 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1575 DenseSet<uint32_t> RecursiveContextIds;
1576 DenseSet<uint32_t> AllCallerContextIds;
1581 for (
auto &CE : OrigEdges) {
1582 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1583 for (
auto Id :
CE->getContextIds())
1584 if (!AllCallerContextIds.
insert(Id).second)
1585 RecursiveContextIds.
insert(Id);
1589 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1591 DenseSet<uint32_t> NewEdgeContextIds;
1592 DenseSet<uint32_t> NotFoundContextIds;
1596 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1597 NotFoundContextIds);
1600 if (RecursiveContextIds.
empty()) {
1603 RemainingContextIds.
swap(NotFoundContextIds);
1613 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1615 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1618 if (NewEdgeContextIds.
empty()) {
1622 if (TowardsCallee) {
1623 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1624 auto NewEdge = std::make_shared<ContextEdge>(
1625 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1626 NewNode->CalleeEdges.push_back(NewEdge);
1627 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1629 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1630 auto NewEdge = std::make_shared<ContextEdge>(
1631 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1632 NewNode->CallerEdges.push_back(NewEdge);
1633 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1636 if (
Edge->getContextIds().empty()) {
1637 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1644template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1646 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1650 assert(!Edge->ContextIds.empty());
1653template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1655 bool CheckEdges =
true) {
1656 if (
Node->isRemoved())
1660 auto NodeContextIds =
Node->getContextIds();
1664 if (
Node->CallerEdges.size()) {
1666 Node->CallerEdges.front()->ContextIds);
1670 set_union(CallerEdgeContextIds, Edge->ContextIds);
1677 NodeContextIds == CallerEdgeContextIds ||
1680 if (
Node->CalleeEdges.size()) {
1682 Node->CalleeEdges.front()->ContextIds);
1686 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1692 NodeContextIds == CalleeEdgeContextIds);
1701 for (
const auto &
E :
Node->CalleeEdges)
1707template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1708void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1709 assignStackNodesPostOrder(ContextNode *Node,
1710 DenseSet<const ContextNode *> &Visited,
1711 DenseMap<uint64_t, std::vector<CallContextInfo>>
1712 &StackIdToMatchingCalls,
1713 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1714 const DenseSet<uint32_t> &ImportantContextIds) {
1722 auto CallerEdges =
Node->CallerEdges;
1723 for (
auto &
Edge : CallerEdges) {
1725 if (
Edge->isRemoved()) {
1729 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1730 CallToMatchingCall, ImportantContextIds);
1739 if (
Node->IsAllocation ||
1740 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1743 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1747 if (Calls.size() == 1) {
1748 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1749 if (Ids.size() == 1) {
1750 assert(SavedContextIds.empty());
1752 assert(Node == getNodeForStackId(Ids[0]));
1753 if (
Node->Recursive)
1756 NonAllocationCallToContextNodeMap[
Call] =
Node;
1758 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1766 uint64_t LastId =
Node->OrigStackOrAllocId;
1767 ContextNode *LastNode = getNodeForStackId(LastId);
1770 assert(LastNode == Node);
1772 ContextNode *LastNode =
Node;
1777 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1779 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1780 bool CreatedNode =
false;
1781 for (
unsigned I = 0;
I < Calls.size();
1782 I++, PrevIterCreatedNode = CreatedNode) {
1783 CreatedNode =
false;
1784 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1787 if (SavedContextIds.empty()) {
1794 auto MatchingCall = CallToMatchingCall[
Call];
1795 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1799 assert(
I > 0 && !PrevIterCreatedNode);
1802 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1807 assert(LastId == Ids.back());
1816 ContextNode *PrevNode = LastNode;
1820 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1822 ContextNode *CurNode = getNodeForStackId(Id);
1826 assert(!CurNode->Recursive);
1828 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1840 if (SavedContextIds.empty()) {
1849 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1850 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1852 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1854 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1860 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1865 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1870 for (
auto Id : Ids) {
1871 ContextNode *CurNode = getNodeForStackId(Id);
1878 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1885 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1886 if (PrevEdge->getContextIds().empty())
1887 removeEdgeFromGraph(PrevEdge);
1892 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1893 ? (uint8_t)AllocationType::None
1894 : CurNode->computeAllocType();
1898 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1902 for (
auto Id : Ids) {
1903 ContextNode *CurNode = getNodeForStackId(Id);
1912template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1913void CallsiteContextGraph<DerivedCCG, FuncTy,
1914 CallTy>::fixupImportantContexts() {
1915 if (ImportantContextIdInfo.empty())
1919 NumImportantContextIds = ImportantContextIdInfo.size();
1925 exportToDot(
"beforestackfixup");
1950 for (
auto &[CurContextId, Info] : ImportantContextIdInfo) {
1951 if (
Info.StackIdsToNode.empty())
1954 ContextNode *PrevNode =
nullptr;
1955 ContextNode *CurNode =
nullptr;
1956 DenseSet<const ContextEdge *> VisitedEdges;
1957 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1960 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1964 auto LenToEnd = AllStackIds.size() -
I;
1972 auto CheckStackIds = AllStackIds.slice(
I, Len);
1973 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1974 if (EntryIt ==
Info.StackIdsToNode.end())
1976 CurNode = EntryIt->second;
1993 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1996 if (CurEdge->getContextIds().insert(CurContextId).second) {
1997 NumFixupEdgeIdsInserted++;
2002 NumFixupEdgesAdded++;
2003 DenseSet<uint32_t> ContextIds({CurContextId});
2004 auto AllocType = computeAllocType(ContextIds);
2005 auto NewEdge = std::make_shared<ContextEdge>(
2006 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2007 PrevNode->CallerEdges.push_back(NewEdge);
2008 CurNode->CalleeEdges.push_back(NewEdge);
2010 CurEdge = NewEdge.get();
2013 VisitedEdges.
insert(CurEdge);
2016 for (
auto &
Edge : PrevNode->CallerEdges) {
2020 Edge->getContextIds().erase(CurContextId);
2028template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2029void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2037 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2038 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2039 for (
auto &
Call : CallsWithMetadata) {
2041 if (AllocationCallToContextNodeMap.count(
Call))
2043 auto StackIdsWithContextNodes =
2044 getStackIdsWithContextNodesForCall(
Call.call());
2047 if (StackIdsWithContextNodes.empty())
2051 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2052 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2062 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2066 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2067 for (
auto &It : StackIdToMatchingCalls) {
2068 auto &Calls = It.getSecond();
2070 if (Calls.size() == 1) {
2071 auto &Ids = Calls[0].StackIds;
2072 if (Ids.size() == 1)
2085 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2086 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2087 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2090 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2091 return A.StackIds.size() >
B.StackIds.size() ||
2092 (
A.StackIds.size() ==
B.StackIds.size() &&
2093 (
A.StackIds <
B.StackIds ||
2094 (
A.StackIds ==
B.StackIds &&
2095 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2101 uint64_t LastId = It.getFirst();
2102 ContextNode *LastNode = getNodeForStackId(LastId);
2106 if (LastNode->Recursive)
2111 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2119 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2122 for (
unsigned I = 0;
I < Calls.size();
I++) {
2123 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2124 assert(SavedContextIds.empty());
2125 assert(LastId == Ids.back());
2130 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2131 MatchingIdsFuncSet.
clear();
2138 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2140 ContextNode *PrevNode = LastNode;
2141 ContextNode *CurNode = LastNode;
2146 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2148 CurNode = getNodeForStackId(Id);
2152 if (CurNode->Recursive) {
2157 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2178 if (StackSequenceContextIds.
empty()) {
2191 if (Ids.back() != getLastStackId(
Call)) {
2192 for (
const auto &PE : LastNode->CallerEdges) {
2193 set_subtract(StackSequenceContextIds, PE->getContextIds());
2194 if (StackSequenceContextIds.
empty())
2198 if (StackSequenceContextIds.
empty())
2210 MatchingIdsFuncSet.
insert(Func);
2217 bool DuplicateContextIds =
false;
2218 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2219 auto &CallCtxInfo = Calls[J];
2220 auto &NextIds = CallCtxInfo.StackIds;
2223 auto *NextFunc = CallCtxInfo.Func;
2224 if (NextFunc != Func) {
2227 DuplicateContextIds =
true;
2230 auto &NextCall = CallCtxInfo.Call;
2231 CallToMatchingCall[NextCall] =
Call;
2242 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2243 StackSequenceContextIds.
size());
2246 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2247 : StackSequenceContextIds;
2248 assert(!SavedContextIds.empty());
2250 if (!DuplicateContextIds) {
2254 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2255 if (LastNodeContextIds.
empty())
2262 propagateDuplicateContextIds(OldToNewContextIds);
2272 DenseSet<const ContextNode *> Visited;
2274 ImportantContextIdInfo.keys());
2275 for (
auto &Entry : AllocationCallToContextNodeMap)
2276 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2277 CallToMatchingCall, ImportantContextIds);
2279 fixupImportantContexts();
2285uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2286 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2288 return CallsiteContext.
back();
2291uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2293 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2296 return Index.getStackIdAtIndex(CallsiteContext.
back());
2318 auto Pos =
F.getName().find_last_of(
'.');
2321 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2327std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2328 const Instruction *
Call,
2329 unsigned CloneNo)
const {
2335std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2336 const IndexCall &
Call,
2337 unsigned CloneNo)
const {
2338 auto VI = FSToVIMap.find(Func);
2339 assert(VI != FSToVIMap.end());
2342 return CallerName +
" -> alloc";
2345 return CallerName +
" -> " +
2347 Callsite->Clones[CloneNo]);
2351std::vector<uint64_t>
2352ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2353 Instruction *
Call) {
2354 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2356 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2360std::vector<uint64_t>
2361IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2363 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2365 return getStackIdsWithContextNodes<CallsiteInfo,
2366 SmallVector<unsigned>::const_iterator>(
2370template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2371template <
class NodeT,
class IteratorT>
2372std::vector<uint64_t>
2373CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2374 CallStack<NodeT, IteratorT> &CallsiteContext) {
2375 std::vector<uint64_t> StackIds;
2376 for (
auto IdOrIndex : CallsiteContext) {
2377 auto StackId = getStackId(IdOrIndex);
2378 ContextNode *
Node = getNodeForStackId(StackId);
2381 StackIds.push_back(StackId);
2386ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2388 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2389 :
Mod(
M), OREGetter(OREGetter) {
2393 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2395 std::vector<CallInfo> CallsWithMetadata;
2396 for (
auto &BB :
F) {
2397 for (
auto &
I : BB) {
2400 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2401 CallsWithMetadata.push_back(&
I);
2402 auto *AllocNode = addAllocNode(&
I, &
F);
2403 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2407 for (
auto &MDOp : MemProfMD->operands()) {
2409 std::vector<ContextTotalSize> ContextSizeInfo;
2411 if (MIBMD->getNumOperands() > 2) {
2412 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2413 MDNode *ContextSizePair =
2422 ContextSizeInfo.push_back({FullStackId, TotalSize});
2428 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2429 AllocNode, StackContext, CallsiteContext,
2431 TotalSizeToContextIdTopNCold);
2436 DotAllocContextIds = AllocNode->getContextIds();
2440 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2441 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2444 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2445 CallsWithMetadata.push_back(&
I);
2449 if (!CallsWithMetadata.empty())
2450 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2454 dbgs() <<
"CCG before updating call stack chains:\n";
2459 exportToDot(
"prestackupdate");
2464 exportToDot(
"poststackupdate");
2466 handleCallsitesWithMultipleTargets();
2471 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2472 for (
auto &
Call : FuncEntry.second)
2473 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2479IndexCallsiteContextGraph::findAliaseeGUIDsPrevailingInDifferentModule() {
2480 DenseSet<GlobalValue::GUID> AliaseeGUIDs;
2481 for (
auto &
I : Index) {
2483 for (
auto &S :
VI.getSummaryList()) {
2488 auto *AliaseeSummary = &AS->getAliasee();
2496 !isPrevailing(
VI.getGUID(), S.get()))
2501 auto AliaseeGUID = AS->getAliaseeGUID();
2503 if (!isPrevailing(AliaseeGUID, AliaseeSummary))
2504 AliaseeGUIDs.
insert(AliaseeGUID);
2507 AliaseesPrevailingInDiffModuleFromAlias += AliaseeGUIDs.
size();
2508 return AliaseeGUIDs;
2511IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2512 ModuleSummaryIndex &Index,
2522 findAliaseeGUIDsPrevailingInDifferentModule();
2526 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2527 for (
auto &
I : Index) {
2528 auto VI = Index.getValueInfo(
I);
2529 if (GUIDsToSkip.
contains(VI.getGUID()))
2531 for (
auto &S : VI.getSummaryList()) {
2540 !isPrevailing(VI.getGUID(), S.get()))
2545 std::vector<CallInfo> CallsWithMetadata;
2546 if (!
FS->allocs().empty()) {
2547 for (
auto &AN :
FS->mutableAllocs()) {
2552 if (AN.MIBs.empty())
2554 IndexCall AllocCall(&AN);
2555 CallsWithMetadata.push_back(AllocCall);
2556 auto *AllocNode = addAllocNode(AllocCall, FS);
2564 AN.ContextSizeInfos.size() == AN.MIBs.size());
2566 for (
auto &MIB : AN.MIBs) {
2569 std::vector<ContextTotalSize> ContextSizeInfo;
2570 if (!AN.ContextSizeInfos.empty()) {
2571 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2572 ContextSizeInfo.push_back({FullStackId, TotalSize});
2574 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2575 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2576 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2582 DotAllocContextIds = AllocNode->getContextIds();
2588 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2592 if (!
FS->callsites().empty())
2593 for (
auto &SN :
FS->mutableCallsites()) {
2594 IndexCall StackNodeCall(&SN);
2595 CallsWithMetadata.push_back(StackNodeCall);
2598 if (!CallsWithMetadata.empty())
2599 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2601 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2607 dbgs() <<
"CCG before updating call stack chains:\n";
2612 exportToDot(
"prestackupdate");
2617 exportToDot(
"poststackupdate");
2619 handleCallsitesWithMultipleTargets();
2624template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2625void CallsiteContextGraph<DerivedCCG, FuncTy,
2626 CallTy>::handleCallsitesWithMultipleTargets() {
2641 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2642 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2643 auto *
Node = Entry.second;
2652 std::vector<CallInfo> AllCalls;
2653 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2654 AllCalls.push_back(
Node->Call);
2668 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2671 auto It = AllCalls.begin();
2673 for (; It != AllCalls.end(); ++It) {
2676 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2679 if (!Edge->Callee->hasCall())
2681 assert(NodeToCallingFunc.count(Edge->Callee));
2683 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2692 if (
Node->Call != ThisCall) {
2693 Node->setCall(ThisCall);
2704 Node->MatchingCalls.clear();
2707 if (It == AllCalls.end()) {
2708 RemovedEdgesWithMismatchedCallees++;
2712 Node->setCall(CallInfo());
2717 for (++It; It != AllCalls.end(); ++It) {
2721 Node->MatchingCalls.push_back(ThisCall);
2730 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2731 return !it.second->hasCall() || it.second->Call != it.first;
2735 for (
auto &[
Call,
Node] : NewCallToNode)
2736 NonAllocationCallToContextNodeMap[
Call] =
Node;
2740 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2741 NonAllocationCallToContextNodeMap[
Call] =
Node;
2744template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2745bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2747 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2751 struct CallsWithSameCallee {
2752 std::vector<CallInfo> Calls;
2753 ContextNode *
Node =
nullptr;
2759 for (
auto ThisCall : AllCalls) {
2760 auto *
F = getCalleeFunc(
ThisCall.call());
2762 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2771 for (
const auto &Edge :
Node->CalleeEdges) {
2772 if (!Edge->Callee->hasCall())
2774 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2775 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2776 CalleeNodeToCallInfo[Edge->Callee] =
2777 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2783 if (CalleeNodeToCallInfo.
empty())
2795 ContextNode *UnmatchedCalleesNode =
nullptr;
2797 bool UsedOrigNode =
false;
2802 auto CalleeEdges =
Node->CalleeEdges;
2803 for (
auto &Edge : CalleeEdges) {
2804 if (!Edge->Callee->hasCall())
2809 ContextNode *CallerNodeToUse =
nullptr;
2813 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2814 if (!UnmatchedCalleesNode)
2815 UnmatchedCalleesNode =
2816 createNewNode(
false, NodeToCallingFunc[
Node]);
2817 CallerNodeToUse = UnmatchedCalleesNode;
2821 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2824 if (!UsedOrigNode) {
2827 Node->MatchingCalls.clear();
2828 UsedOrigNode =
true;
2831 createNewNode(
false, NodeToCallingFunc[
Node]);
2832 assert(!Info->Calls.empty());
2835 Info->Node->setCall(Info->Calls.front());
2841 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2843 CallerNodeToUse = Info->Node;
2847 if (CallerNodeToUse ==
Node)
2850 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2857 for (
auto &
I : CalleeNodeToCallInfo)
2858 removeNoneTypeCallerEdges(
I.second->Node);
2859 if (UnmatchedCalleesNode)
2860 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2861 removeNoneTypeCallerEdges(
Node);
2871uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2874 return Index.getStackIdAtIndex(IdOrIndex);
2877template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2878bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2879 CallTy
Call, EdgeIter &EI,
2880 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2882 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2883 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2886 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2887 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2892 if (FoundCalleeChain.empty())
2896 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2900 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2901 CurEdge->AllocTypes |=
Edge->AllocTypes;
2906 auto NewEdge = std::make_shared<ContextEdge>(
2907 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2908 Callee->CallerEdges.push_back(NewEdge);
2909 if (Caller ==
Edge->Caller) {
2913 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2916 "Iterator position not restored after insert and increment");
2918 Caller->CalleeEdges.push_back(NewEdge);
2923 auto *CurCalleeNode =
Edge->Callee;
2924 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2925 ContextNode *NewNode =
nullptr;
2927 if (TailCallToContextNodeMap.
count(NewCall)) {
2928 NewNode = TailCallToContextNodeMap[NewCall];
2929 NewNode->AllocTypes |=
Edge->AllocTypes;
2931 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2933 NewNode = createNewNode(
false, Func, NewCall);
2934 TailCallToContextNodeMap[NewCall] = NewNode;
2935 NewNode->AllocTypes =
Edge->AllocTypes;
2939 AddEdge(NewNode, CurCalleeNode);
2941 CurCalleeNode = NewNode;
2945 AddEdge(
Edge->Caller, CurCalleeNode);
2953 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2965bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2966 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2967 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2968 bool &FoundMultipleCalleeChains) {
2975 FoundCalleeChain.push_back({Callsite,
F});
2990 bool FoundSingleCalleeChain =
false;
2991 for (
auto &BB : *CalleeFunc) {
2992 for (
auto &
I : BB) {
2994 if (!CB || !CB->isTailCall())
2996 auto *CalledValue = CB->getCalledOperand();
2997 auto *CalledFunction = CB->getCalledFunction();
2998 if (CalledValue && !CalledFunction) {
2999 CalledValue = CalledValue->stripPointerCasts();
3006 assert(!CalledFunction &&
3007 "Expected null called function in callsite for alias");
3010 if (!CalledFunction)
3012 if (CalledFunction == ProfiledCallee) {
3013 if (FoundSingleCalleeChain) {
3014 FoundMultipleCalleeChains =
true;
3017 FoundSingleCalleeChain =
true;
3018 FoundProfiledCalleeCount++;
3019 FoundProfiledCalleeDepth +=
Depth;
3020 if (
Depth > FoundProfiledCalleeMaxDepth)
3021 FoundProfiledCalleeMaxDepth =
Depth;
3022 SaveCallsiteInfo(&
I, CalleeFunc);
3023 }
else if (findProfiledCalleeThroughTailCalls(
3024 ProfiledCallee, CalledFunction,
Depth + 1,
3025 FoundCalleeChain, FoundMultipleCalleeChains)) {
3028 assert(!FoundMultipleCalleeChains);
3029 if (FoundSingleCalleeChain) {
3030 FoundMultipleCalleeChains =
true;
3033 FoundSingleCalleeChain =
true;
3034 SaveCallsiteInfo(&
I, CalleeFunc);
3035 }
else if (FoundMultipleCalleeChains)
3040 return FoundSingleCalleeChain;
3043const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
3045 if (!CB->getCalledOperand() || CB->isIndirectCall())
3047 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3054bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3055 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3056 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3058 if (!CB->getCalledOperand() || CB->isIndirectCall())
3060 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3062 if (CalleeFunc == Func)
3065 if (Alias && Alias->getAliasee() == Func)
3076 bool FoundMultipleCalleeChains =
false;
3077 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3079 FoundMultipleCalleeChains)) {
3080 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3081 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3082 <<
" that actually called " << CalleeVal->getName()
3083 << (FoundMultipleCalleeChains
3084 ?
" (found multiple possible chains)"
3087 if (FoundMultipleCalleeChains)
3088 FoundProfiledCalleeNonUniquelyCount++;
3095bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3096 Instruction *Call2) {
3098 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3100 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3103 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3105 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3107 return CalleeFunc1 == CalleeFunc2;
3110bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3111 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3112 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3113 bool &FoundMultipleCalleeChains) {
3119 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3122 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3123 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3126 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3127 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3128 CallsiteInfo *NewCallsiteInfo =
3129 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3130 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3137 bool FoundSingleCalleeChain =
false;
3140 !isPrevailing(CurCallee.
getGUID(), S.get()))
3145 auto FSVI = CurCallee;
3148 FSVI = AS->getAliaseeVI();
3149 for (
auto &CallEdge :
FS->calls()) {
3150 if (!CallEdge.second.hasTailCall())
3152 if (CallEdge.first == ProfiledCallee) {
3153 if (FoundSingleCalleeChain) {
3154 FoundMultipleCalleeChains =
true;
3157 FoundSingleCalleeChain =
true;
3158 FoundProfiledCalleeCount++;
3159 FoundProfiledCalleeDepth +=
Depth;
3160 if (
Depth > FoundProfiledCalleeMaxDepth)
3161 FoundProfiledCalleeMaxDepth =
Depth;
3162 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3164 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3165 FSToVIMap[
FS] = FSVI;
3166 }
else if (findProfiledCalleeThroughTailCalls(
3167 ProfiledCallee, CallEdge.first,
Depth + 1,
3168 FoundCalleeChain, FoundMultipleCalleeChains)) {
3171 assert(!FoundMultipleCalleeChains);
3172 if (FoundSingleCalleeChain) {
3173 FoundMultipleCalleeChains =
true;
3176 FoundSingleCalleeChain =
true;
3177 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3179 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3180 FSToVIMap[
FS] = FSVI;
3181 }
else if (FoundMultipleCalleeChains)
3186 return FoundSingleCalleeChain;
3189const FunctionSummary *
3190IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3192 if (
Callee.getSummaryList().empty())
3197bool IndexCallsiteContextGraph::calleeMatchesFunc(
3198 IndexCall &
Call,
const FunctionSummary *Func,
3199 const FunctionSummary *CallerFunc,
3200 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3204 AliasSummary *Alias =
3205 Callee.getSummaryList().empty()
3208 assert(FSToVIMap.count(Func));
3209 auto FuncVI = FSToVIMap[
Func];
3210 if (Callee == FuncVI ||
3225 bool FoundMultipleCalleeChains =
false;
3226 if (!findProfiledCalleeThroughTailCalls(
3227 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3228 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3229 <<
" from " << FSToVIMap[CallerFunc]
3230 <<
" that actually called " << Callee
3231 << (FoundMultipleCalleeChains
3232 ?
" (found multiple possible chains)"
3235 if (FoundMultipleCalleeChains)
3236 FoundProfiledCalleeNonUniquelyCount++;
3243bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3246 return Callee1 == Callee2;
3249template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3250void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3257void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3258 raw_ostream &OS)
const {
3259 OS <<
"Node " <<
this <<
"\n";
3263 OS <<
" (recursive)";
3265 if (!MatchingCalls.empty()) {
3266 OS <<
"\tMatchingCalls:\n";
3267 for (
auto &MatchingCall : MatchingCalls) {
3269 MatchingCall.print(OS);
3273 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3275 OS <<
"\tContextIds:";
3277 auto ContextIds = getContextIds();
3278 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3279 std::sort(SortedIds.begin(), SortedIds.end());
3280 for (
auto Id : SortedIds)
3283 OS <<
"\tCalleeEdges:\n";
3284 for (
auto &
Edge : CalleeEdges)
3285 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3287 OS <<
"\tCallerEdges:\n";
3288 for (
auto &
Edge : CallerEdges)
3289 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3291 if (!Clones.empty()) {
3294 for (
auto *
C : Clones)
3295 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3297 }
else if (CloneOf) {
3298 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3302template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3303void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3309template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3310void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3311 raw_ostream &OS)
const {
3312 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3313 << (IsBackedge ?
" (BE)" :
"")
3315 OS <<
" ContextIds:";
3316 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3317 std::sort(SortedIds.begin(), SortedIds.end());
3318 for (
auto Id : SortedIds)
3322template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3323void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3329 raw_ostream &OS)
const {
3330 OS <<
"Callsite Context Graph:\n";
3331 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3333 if (
Node->isRemoved())
3340template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3341void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3343 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark)
const {
3344 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3346 if (
Node->isRemoved())
3348 if (!
Node->IsAllocation)
3350 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3351 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3352 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3353 std::sort(SortedIds.begin(), SortedIds.end());
3354 for (
auto Id : SortedIds) {
3355 auto TypeI = ContextIdToAllocationType.find(Id);
3356 assert(TypeI != ContextIdToAllocationType.end());
3357 auto CSI = ContextIdToContextSizeInfos.find(Id);
3358 if (CSI != ContextIdToContextSizeInfos.end()) {
3359 for (
auto &Info : CSI->second) {
3362 " full allocation context " + std::to_string(
Info.FullStackId) +
3363 " with total size " + std::to_string(
Info.TotalSize) +
" is " +
3365 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3367 " due to cold byte percent";
3369 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3373 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3381 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3383 " due to cold byte percent";
3385 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3389 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3395template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3396void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3397 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3400 for (
auto &
Edge :
Node->CallerEdges)
3405template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3407 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3408 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3410 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3426 return G->NodeOwner.begin()->get();
3429 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3430 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3449template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3463 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3469 std::string LabelString =
3470 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3473 LabelString +=
"\n";
3474 if (
Node->hasCall()) {
3475 auto Func =
G->NodeToCallingFunc.find(
Node);
3476 assert(Func !=
G->NodeToCallingFunc.end());
3478 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3479 for (
auto &MatchingCall :
Node->MatchingCalls) {
3480 LabelString +=
"\n";
3481 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3482 MatchingCall.cloneNo());
3485 LabelString +=
"null call";
3486 if (
Node->Recursive)
3487 LabelString +=
" (recursive)";
3489 LabelString +=
" (external)";
3495 auto ContextIds =
Node->getContextIds();
3499 bool Highlight =
false;
3508 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3509 getContextIds(ContextIds) +
"\"")
3513 AttributeString +=
",fontsize=\"30\"";
3515 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3517 if (
Node->CloneOf) {
3518 AttributeString +=
",color=\"blue\"";
3519 AttributeString +=
",style=\"filled,bold,dashed\"";
3521 AttributeString +=
",style=\"filled\"";
3522 return AttributeString;
3527 auto &Edge = *(ChildIter.getCurrent());
3532 bool Highlight =
false;
3541 auto Color = getColor(Edge->AllocTypes, Highlight);
3542 std::string AttributeString =
3543 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3545 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3548 if (Edge->IsBackedge)
3549 AttributeString +=
",style=\"dotted\"";
3552 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3553 return AttributeString;
3559 if (
Node->isRemoved())
3572 std::string IdString =
"ContextIds:";
3573 if (ContextIds.
size() < 100) {
3574 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3575 std::sort(SortedIds.begin(), SortedIds.end());
3576 for (
auto Id : SortedIds)
3577 IdString += (
" " +
Twine(Id)).str();
3579 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3584 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3590 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3592 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3593 if (AllocTypes == (uint8_t)AllocationType::Cold)
3594 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3596 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3597 return Highlight ?
"magenta" :
"mediumorchid1";
3601 static std::string getNodeId(NodeRef Node) {
3602 std::stringstream SStream;
3603 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3604 std::string
Result = SStream.str();
3613template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3618template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3619void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3620 std::string Label)
const {
3625template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3626typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3627CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3628 const std::shared_ptr<ContextEdge> &
Edge,
3629 DenseSet<uint32_t> ContextIdsToMove) {
3631 assert(NodeToCallingFunc.count(Node));
3632 ContextNode *Clone =
3633 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3634 Node->addClone(Clone);
3635 Clone->MatchingCalls =
Node->MatchingCalls;
3636 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3641template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3642void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3643 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3644 ContextNode *NewCallee,
bool NewClone,
3645 DenseSet<uint32_t> ContextIdsToMove) {
3648 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3650 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3652 ContextNode *OldCallee =
Edge->Callee;
3656 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3660 if (ContextIdsToMove.
empty())
3661 ContextIdsToMove =
Edge->getContextIds();
3665 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3668 NewCallee->AllocTypes |=
Edge->AllocTypes;
3670 if (ExistingEdgeToNewCallee) {
3673 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3674 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3675 assert(
Edge->ContextIds == ContextIdsToMove);
3676 removeEdgeFromGraph(
Edge.get());
3679 Edge->Callee = NewCallee;
3680 NewCallee->CallerEdges.push_back(
Edge);
3682 OldCallee->eraseCallerEdge(
Edge.get());
3689 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3690 if (ExistingEdgeToNewCallee) {
3693 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3694 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3697 auto NewEdge = std::make_shared<ContextEdge>(
3698 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3699 Edge->Caller->CalleeEdges.push_back(NewEdge);
3700 NewCallee->CallerEdges.push_back(NewEdge);
3704 NewCallee->AllocTypes |= CallerEdgeAllocType;
3706 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3711 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3712 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3716 if (CalleeToUse == OldCallee) {
3720 if (EdgeIsRecursive) {
3724 CalleeToUse = NewCallee;
3728 DenseSet<uint32_t> EdgeContextIdsToMove =
3730 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3731 OldCalleeEdge->AllocTypes =
3732 computeAllocType(OldCalleeEdge->getContextIds());
3739 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3740 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3741 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3745 auto NewEdge = std::make_shared<ContextEdge>(
3746 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3747 EdgeContextIdsToMove);
3748 NewCallee->CalleeEdges.push_back(NewEdge);
3749 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3753 OldCallee->AllocTypes = OldCallee->computeAllocType();
3755 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3756 OldCallee->emptyContextIds());
3760 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3763 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3769template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3770void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3771 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3772 ContextNode *NewCaller) {
3773 auto *OldCallee =
Edge->Callee;
3774 auto *NewCallee = OldCallee;
3777 bool Recursive =
Edge->Caller ==
Edge->Callee;
3779 NewCallee = NewCaller;
3781 ContextNode *OldCaller =
Edge->Caller;
3782 OldCaller->eraseCalleeEdge(
Edge.get());
3786 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3788 if (ExistingEdgeToNewCaller) {
3791 ExistingEdgeToNewCaller->getContextIds().insert_range(
3792 Edge->getContextIds());
3793 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3794 Edge->ContextIds.clear();
3795 Edge->AllocTypes = (uint8_t)AllocationType::None;
3796 OldCallee->eraseCallerEdge(
Edge.get());
3799 Edge->Caller = NewCaller;
3800 NewCaller->CalleeEdges.push_back(
Edge);
3802 assert(NewCallee == NewCaller);
3805 Edge->Callee = NewCallee;
3806 NewCallee->CallerEdges.push_back(
Edge);
3807 OldCallee->eraseCallerEdge(
Edge.get());
3813 NewCaller->AllocTypes |=
Edge->AllocTypes;
3820 bool IsNewNode = NewCaller->CallerEdges.empty();
3829 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3830 auto OldCallerCaller = OldCallerEdge->Caller;
3834 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3835 if (OldCaller == OldCallerCaller) {
3836 OldCallerCaller = NewCaller;
3842 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3843 OldCallerEdge->AllocTypes =
3844 computeAllocType(OldCallerEdge->getContextIds());
3849 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3853 if (ExistingCallerEdge) {
3854 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3855 ExistingCallerEdge->AllocTypes |=
3856 computeAllocType(EdgeContextIdsToMove);
3859 auto NewEdge = std::make_shared<ContextEdge>(
3860 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3861 EdgeContextIdsToMove);
3862 NewCaller->CallerEdges.push_back(NewEdge);
3863 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3868 OldCaller->AllocTypes = OldCaller->computeAllocType();
3870 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3871 OldCaller->emptyContextIds());
3875 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3878 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3884template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3885void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3886 recursivelyRemoveNoneTypeCalleeEdges(
3887 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3892 removeNoneTypeCalleeEdges(Node);
3894 for (
auto *Clone :
Node->Clones)
3895 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3899 auto CallerEdges =
Node->CallerEdges;
3900 for (
auto &
Edge : CallerEdges) {
3902 if (
Edge->isRemoved()) {
3906 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3911template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3912void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3917 DenseSet<const ContextNode *> Visited;
3918 DenseSet<const ContextNode *> CurrentStack;
3919 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3921 if (
Node->isRemoved())
3924 if (!
Node->CallerEdges.empty())
3926 markBackedges(Node, Visited, CurrentStack);
3932template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3933void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3934 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3935 DenseSet<const ContextNode *> &CurrentStack) {
3936 auto I = Visited.
insert(Node);
3940 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3941 auto *
Callee = CalleeEdge->Callee;
3942 if (Visited.
count(Callee)) {
3945 if (CurrentStack.
count(Callee))
3946 CalleeEdge->IsBackedge =
true;
3949 CurrentStack.
insert(Callee);
3950 markBackedges(Callee, Visited, CurrentStack);
3951 CurrentStack.
erase(Callee);
3955template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3956void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3957 DenseSet<const ContextNode *> Visited;
3958 for (
auto &Entry : AllocationCallToContextNodeMap) {
3960 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3963 for (
auto &Entry : AllocationCallToContextNodeMap)
3964 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3977template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3978void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3979 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3980 const DenseSet<uint32_t> &AllocContextIds) {
3990 if (!
Node->hasCall())
4009 auto CallerEdges =
Node->CallerEdges;
4010 for (
auto &
Edge : CallerEdges) {
4012 if (
Edge->isRemoved()) {
4018 if (
Edge->IsBackedge) {
4025 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
4026 identifyClones(
Edge->Caller, Visited, AllocContextIds);
4049 const unsigned AllocTypeCloningPriority[] = { 3, 4,
4053 [&](
const std::shared_ptr<ContextEdge> &
A,
4054 const std::shared_ptr<ContextEdge> &
B) {
4057 if (A->ContextIds.empty())
4063 if (B->ContextIds.empty())
4066 if (A->AllocTypes == B->AllocTypes)
4069 return *A->ContextIds.begin() < *B->ContextIds.begin();
4070 return AllocTypeCloningPriority[A->AllocTypes] <
4071 AllocTypeCloningPriority[B->AllocTypes];
4074 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4076 DenseSet<uint32_t> RecursiveContextIds;
4081 DenseSet<uint32_t> AllCallerContextIds;
4082 for (
auto &CE :
Node->CallerEdges) {
4085 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4086 for (
auto Id :
CE->getContextIds())
4087 if (!AllCallerContextIds.
insert(Id).second)
4088 RecursiveContextIds.
insert(Id);
4098 auto CallerEdges =
Node->CallerEdges;
4099 for (
auto &CallerEdge : CallerEdges) {
4101 if (CallerEdge->isRemoved()) {
4105 assert(CallerEdge->Callee == Node);
4114 if (!CallerEdge->Caller->hasCall())
4119 auto CallerEdgeContextsForAlloc =
4121 if (!RecursiveContextIds.
empty())
4122 CallerEdgeContextsForAlloc =
4124 if (CallerEdgeContextsForAlloc.empty())
4127 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4131 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4132 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4133 for (
auto &CalleeEdge :
Node->CalleeEdges)
4134 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4135 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4151 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4152 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4153 if (!CallerEdge->IsBackedge &&
4154 allocTypeToUse(CallerAllocTypeForAlloc) ==
4155 allocTypeToUse(
Node->AllocTypes) &&
4156 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4157 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4161 if (CallerEdge->IsBackedge) {
4165 DeferredBackedges++;
4178 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4179 !Visited.
count(CallerEdge->Caller)) {
4180 const auto OrigIdCount = CallerEdge->getContextIds().size();
4183 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4184 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4188 bool UpdatedEdge =
false;
4189 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4190 for (
auto E :
Node->CallerEdges) {
4192 if (
E->Caller->CloneOf != CallerEdge->Caller)
4196 auto CallerEdgeContextsForAllocNew =
4198 if (CallerEdgeContextsForAllocNew.empty())
4208 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4218 if (CallerEdge->isRemoved())
4228 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4229 if (CallerEdgeContextsForAlloc.empty())
4234 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4235 CalleeEdgeAllocTypesForCallerEdge.clear();
4236 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4237 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4238 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4244 ContextNode *Clone =
nullptr;
4245 for (
auto *CurClone :
Node->Clones) {
4246 if (allocTypeToUse(CurClone->AllocTypes) !=
4247 allocTypeToUse(CallerAllocTypeForAlloc))
4254 assert(!BothSingleAlloc ||
4255 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4261 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4262 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4270 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4271 CallerEdgeContextsForAlloc);
4273 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4276 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4283 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4289void ModuleCallsiteContextGraph::updateAllocationCall(
4294 "memprof", AllocTypeString);
4297 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4298 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4300 <<
" marked with memprof allocation attribute "
4301 <<
ore::NV(
"Attribute", AllocTypeString));
4304void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4308 assert(AI->Versions.size() >
Call.cloneNo());
4313ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4315 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4316 return AllocationType::None;
4317 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4318 ? AllocationType::Cold
4319 : AllocationType::NotCold;
4323IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4325 assert(AI->Versions.size() >
Call.cloneNo());
4329void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4330 FuncInfo CalleeFunc) {
4331 auto *CurF = getCalleeFunc(CallerCall.call());
4332 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4339 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4341 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4343 MismatchedCloneAssignments++;
4346 if (NewCalleeCloneNo > 0)
4347 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4348 OREGetter(CallerCall.call()->getFunction())
4349 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4350 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4351 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4352 <<
" assigned to call function clone "
4353 <<
ore::NV(
"Callee", CalleeFunc.func()));
4356void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4357 FuncInfo CalleeFunc) {
4360 "Caller cannot be an allocation which should not have profiled calls");
4361 assert(CI->Clones.size() > CallerCall.cloneNo());
4362 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4363 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4368 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4370 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4372 MismatchedCloneAssignments++;
4374 CurCalleeCloneNo = NewCalleeCloneNo;
4386 SP->replaceLinkageName(MDName);
4390 TempDISubprogram NewDecl = Decl->
clone();
4391 NewDecl->replaceLinkageName(MDName);
4395CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4397ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4398 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4399 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4404 assert(!
Func.func()->getParent()->getFunction(Name));
4405 NewFunc->setName(Name);
4407 for (
auto &Inst : CallsWithMetadataInFunc) {
4409 assert(Inst.cloneNo() == 0);
4412 OREGetter(
Func.func())
4413 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4414 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4415 return {NewFunc, CloneNo};
4418CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4419 IndexCall>::FuncInfo
4420IndexCallsiteContextGraph::cloneFunctionForCallsite(
4421 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4422 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4436 for (
auto &Inst : CallsWithMetadataInFunc) {
4438 assert(Inst.cloneNo() == 0);
4440 assert(AI->Versions.size() == CloneNo);
4443 AI->Versions.push_back(0);
4446 assert(CI && CI->Clones.size() == CloneNo);
4449 CI->Clones.push_back(0);
4451 CallMap[Inst] = {Inst.call(), CloneNo};
4453 return {
Func.func(), CloneNo};
4470template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4471void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4477 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4478 for (
auto &Entry : AllocationCallToContextNodeMap) {
4480 for (
auto Id :
Node->getContextIds())
4481 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4482 for (
auto *Clone :
Node->Clones) {
4483 for (
auto Id : Clone->getContextIds())
4484 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4491 DenseSet<const ContextNode *> Visited;
4492 for (
auto &Entry : AllocationCallToContextNodeMap) {
4495 mergeClones(Node, Visited, ContextIdToAllocationNode);
4501 auto Clones =
Node->Clones;
4502 for (
auto *Clone : Clones)
4503 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4507 dbgs() <<
"CCG after merging:\n";
4511 exportToDot(
"aftermerge");
4519template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4520void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4521 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4522 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4532 bool FoundUnvisited =
true;
4534 while (FoundUnvisited) {
4536 FoundUnvisited =
false;
4539 auto CallerEdges =
Node->CallerEdges;
4540 for (
auto CallerEdge : CallerEdges) {
4542 if (CallerEdge->Callee != Node)
4547 FoundUnvisited =
true;
4548 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4552 TotalMergeInvokes++;
4553 TotalMergeIters += Iters;
4554 if (Iters > MaxMergeIters)
4555 MaxMergeIters = Iters;
4558 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4561template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4562void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4563 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4564 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4566 if (
Node->emptyContextIds())
4571 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4572 OrigNodeToCloneEdges;
4573 for (
const auto &
E :
Node->CalleeEdges) {
4578 OrigNodeToCloneEdges[
Base].push_back(
E);
4584 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4585 const std::shared_ptr<ContextEdge> &
B) {
4586 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4587 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4588 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4590 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4594 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4599 for (
auto Entry : OrigNodeToCloneEdges) {
4602 auto &CalleeEdges =
Entry.second;
4603 auto NumCalleeClones = CalleeEdges.size();
4605 if (NumCalleeClones == 1)
4616 DenseSet<ContextNode *> OtherCallersToShareMerge;
4617 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4618 OtherCallersToShareMerge);
4623 ContextNode *MergeNode =
nullptr;
4624 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4625 for (
auto CalleeEdge : CalleeEdges) {
4626 auto *OrigCallee = CalleeEdge->Callee;
4632 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4633 MergeNode = OrigCallee;
4634 NonNewMergedNodes++;
4641 if (!OtherCallersToShareMerge.
empty()) {
4642 bool MoveAllCallerEdges =
true;
4643 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4644 if (CalleeCallerE == CalleeEdge)
4646 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4647 MoveAllCallerEdges =
false;
4653 if (MoveAllCallerEdges) {
4654 MergeNode = OrigCallee;
4655 NonNewMergedNodes++;
4662 assert(MergeNode != OrigCallee);
4663 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4666 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4671 if (!OtherCallersToShareMerge.
empty()) {
4675 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4676 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4677 if (CalleeCallerE == CalleeEdge)
4679 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4681 CallerToMoveCount[CalleeCallerE->Caller]++;
4682 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4686 removeNoneTypeCalleeEdges(OrigCallee);
4687 removeNoneTypeCalleeEdges(MergeNode);
4705template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4706void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4707 findOtherCallersToShareMerge(
4709 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4710 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4711 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4712 auto NumCalleeClones = CalleeEdges.size();
4715 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4718 unsigned PossibleOtherCallerNodes = 0;
4722 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4728 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4729 for (
auto CalleeEdge : CalleeEdges) {
4730 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4733 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4734 if (CalleeCallerEdges->Caller == Node) {
4735 assert(CalleeCallerEdges == CalleeEdge);
4738 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4741 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4743 PossibleOtherCallerNodes++;
4747 for (
auto Id : CalleeEdge->getContextIds()) {
4748 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4752 MissingAllocForContextId++;
4755 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4762 for (
auto CalleeEdge : CalleeEdges) {
4763 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4765 if (!PossibleOtherCallerNodes)
4767 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4769 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4771 if (CalleeCallerE == CalleeEdge)
4775 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4780 for (
auto Id : CalleeCallerE->getContextIds()) {
4781 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4786 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4787 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4788 PossibleOtherCallerNodes--;
4795 if (!PossibleOtherCallerNodes)
4800 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4801 if (
Count != NumCalleeClones)
4803 OtherCallersToShareMerge.
insert(OtherCaller);
4848template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4849bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4856 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4860 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4861 const FuncInfo &CalleeFunc) {
4863 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4867 struct FuncCloneInfo {
4872 DenseMap<CallInfo, CallInfo> CallMap;
4900 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4901 UnassignedCallClones;
4905 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4906 FuncInfo OrigFunc(Func);
4911 std::vector<FuncCloneInfo> FuncCloneInfos;
4912 for (
auto &
Call : CallsWithMetadata) {
4913 ContextNode *
Node = getNodeForInst(
Call);
4917 if (!Node ||
Node->Clones.empty())
4920 "Not having a call should have prevented cloning");
4924 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4928 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4930 ContextNode *CallsiteClone,
4933 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4935 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4936 DenseMap<CallInfo, CallInfo> &CallMap =
4937 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4938 CallInfo CallClone(
Call);
4939 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4940 CallClone = It->second;
4941 CallsiteClone->setCall(CallClone);
4943 for (
auto &MatchingCall :
Node->MatchingCalls) {
4944 CallInfo CallClone(MatchingCall);
4945 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4946 CallClone = It->second;
4948 MatchingCall = CallClone;
4956 auto MoveEdgeToNewCalleeCloneAndSetUp =
4957 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4958 ContextNode *OrigCallee =
Edge->Callee;
4959 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4960 removeNoneTypeCalleeEdges(NewClone);
4961 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4965 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4966 RecordCalleeFuncOfCallsite(
4967 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4974 std::deque<ContextNode *> ClonesWorklist;
4976 if (!
Node->emptyContextIds())
4977 ClonesWorklist.push_back(Node);
4983 unsigned NodeCloneCount = 0;
4984 while (!ClonesWorklist.empty()) {
4985 ContextNode *Clone = ClonesWorklist.front();
4986 ClonesWorklist.pop_front();
4995 if (FuncCloneInfos.size() < NodeCloneCount) {
4997 if (NodeCloneCount == 1) {
5002 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5003 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5007 FuncCloneInfos.push_back(
5008 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
5009 AssignCallsiteCloneToFuncClone(
5010 OrigFunc,
Call, Clone,
5011 AllocationCallToContextNodeMap.count(
Call));
5012 for (
auto &CE : Clone->CallerEdges) {
5014 if (!
CE->Caller->hasCall())
5016 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
5026 FuncInfo PreviousAssignedFuncClone;
5028 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5029 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5031 bool CallerAssignedToCloneOfFunc =
false;
5032 if (EI != Clone->CallerEdges.end()) {
5033 const std::shared_ptr<ContextEdge> &
Edge = *EI;
5034 PreviousAssignedFuncClone =
5035 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5036 CallerAssignedToCloneOfFunc =
true;
5041 DenseMap<CallInfo, CallInfo> NewCallMap;
5042 unsigned CloneNo = FuncCloneInfos.size();
5043 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
5044 "should already exist in the map");
5045 FuncInfo NewFuncClone = cloneFunctionForCallsite(
5046 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
5047 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
5048 FunctionClonesAnalysis++;
5054 if (!CallerAssignedToCloneOfFunc) {
5055 AssignCallsiteCloneToFuncClone(
5056 NewFuncClone,
Call, Clone,
5057 AllocationCallToContextNodeMap.count(
Call));
5058 for (
auto &CE : Clone->CallerEdges) {
5060 if (!
CE->Caller->hasCall())
5062 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5074 auto CallerEdges = Clone->CallerEdges;
5075 for (
auto CE : CallerEdges) {
5077 if (
CE->isRemoved()) {
5083 if (!
CE->Caller->hasCall())
5086 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5090 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5091 PreviousAssignedFuncClone)
5094 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5107 auto CalleeEdges =
CE->Caller->CalleeEdges;
5108 for (
auto CalleeEdge : CalleeEdges) {
5111 if (CalleeEdge->isRemoved()) {
5116 ContextNode *
Callee = CalleeEdge->Callee;
5120 if (Callee == Clone || !
Callee->hasCall())
5125 if (Callee == CalleeEdge->Caller)
5127 ContextNode *NewClone =
5128 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5131 removeNoneTypeCalleeEdges(Callee);
5139 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5140 OrigCall.setCloneNo(0);
5141 DenseMap<CallInfo, CallInfo> &CallMap =
5142 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5144 CallInfo NewCall(CallMap[OrigCall]);
5146 NewClone->setCall(NewCall);
5148 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5149 CallInfo OrigMatchingCall(MatchingCall);
5150 OrigMatchingCall.setCloneNo(0);
5152 CallInfo NewCall(CallMap[OrigMatchingCall]);
5155 MatchingCall = NewCall;
5164 auto FindFirstAvailFuncClone = [&]() {
5169 for (
auto &CF : FuncCloneInfos) {
5170 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5171 return CF.FuncClone;
5174 "Expected an available func clone for this callsite clone");
5191 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5192 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5196 auto CloneCallerEdges = Clone->CallerEdges;
5197 for (
auto &
Edge : CloneCallerEdges) {
5201 if (
Edge->isRemoved())
5204 if (!
Edge->Caller->hasCall())
5208 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5209 FuncInfo FuncCloneCalledByCaller =
5210 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5220 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5221 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5229 (FuncCloneAssignedToCurCallsiteClone &&
5230 FuncCloneAssignedToCurCallsiteClone !=
5231 FuncCloneCalledByCaller)) {
5246 if (FuncCloneToNewCallsiteCloneMap.count(
5247 FuncCloneCalledByCaller)) {
5248 ContextNode *NewClone =
5249 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5250 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5252 removeNoneTypeCalleeEdges(NewClone);
5255 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5256 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5259 ClonesWorklist.push_back(NewClone);
5263 removeNoneTypeCalleeEdges(Clone);
5271 if (!FuncCloneAssignedToCurCallsiteClone) {
5272 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5274 AssignCallsiteCloneToFuncClone(
5275 FuncCloneCalledByCaller,
Call, Clone,
5276 AllocationCallToContextNodeMap.count(
Call));
5280 assert(FuncCloneAssignedToCurCallsiteClone ==
5281 FuncCloneCalledByCaller);
5290 if (!FuncCloneAssignedToCurCallsiteClone) {
5291 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5292 assert(FuncCloneAssignedToCurCallsiteClone);
5294 AssignCallsiteCloneToFuncClone(
5295 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5296 AllocationCallToContextNodeMap.count(
Call));
5298 assert(FuncCloneToCurNodeCloneMap
5299 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5301 RecordCalleeFuncOfCallsite(
Edge->Caller,
5302 FuncCloneAssignedToCurCallsiteClone);
5322 if (!FuncCloneAssignedToCurCallsiteClone) {
5323 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5324 assert(FuncCloneAssignedToCurCallsiteClone &&
5325 "No available func clone for this callsite clone");
5326 AssignCallsiteCloneToFuncClone(
5327 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5328 AllocationCallToContextNodeMap.contains(
Call));
5333 for (
const auto &PE :
Node->CalleeEdges)
5335 for (
const auto &CE :
Node->CallerEdges)
5337 for (
auto *Clone :
Node->Clones) {
5339 for (
const auto &PE : Clone->CalleeEdges)
5341 for (
const auto &CE : Clone->CallerEdges)
5347 if (FuncCloneInfos.size() < 2)
5353 for (
auto &
Call : CallsWithMetadata) {
5354 ContextNode *
Node = getNodeForInst(
Call);
5355 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5361 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5365 DenseSet<unsigned> NodeCallClones;
5366 for (
auto *
C :
Node->Clones)
5367 NodeCallClones.
insert(
C->Call.cloneNo());
5370 for (
auto &FC : FuncCloneInfos) {
5375 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5380 auto &CallVector = UnassignedCallClones[
Node][
I];
5381 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5382 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5383 CallInfo CallClone = It->second;
5384 CallVector.push_back(CallClone);
5388 assert(
false &&
"Expected to find call in CallMap");
5391 for (
auto &MatchingCall :
Node->MatchingCalls) {
5392 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5393 CallInfo CallClone = It->second;
5394 CallVector.push_back(CallClone);
5398 assert(
false &&
"Expected to find call in CallMap");
5406 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5408 auto UpdateCalls = [&](ContextNode *
Node,
5409 DenseSet<const ContextNode *> &Visited,
5410 auto &&UpdateCalls) {
5411 auto Inserted = Visited.insert(Node);
5415 for (
auto *Clone :
Node->Clones)
5416 UpdateCalls(Clone, Visited, UpdateCalls);
5418 for (
auto &
Edge :
Node->CallerEdges)
5419 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5423 if (!
Node->hasCall() ||
Node->emptyContextIds())
5426 if (
Node->IsAllocation) {
5427 auto AT = allocTypeToUse(
Node->AllocTypes);
5433 !ContextIdToContextSizeInfos.empty()) {
5434 uint64_t TotalCold = 0;
5436 for (
auto Id :
Node->getContextIds()) {
5437 auto TypeI = ContextIdToAllocationType.find(Id);
5438 assert(TypeI != ContextIdToAllocationType.end());
5439 auto CSI = ContextIdToContextSizeInfos.find(Id);
5440 if (CSI != ContextIdToContextSizeInfos.end()) {
5441 for (
auto &Info : CSI->second) {
5443 if (TypeI->second == AllocationType::Cold)
5444 TotalCold +=
Info.TotalSize;
5449 AT = AllocationType::Cold;
5451 updateAllocationCall(
Node->Call, AT);
5456 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5459 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5460 updateCall(
Node->Call, CalleeFunc);
5462 for (
auto &
Call :
Node->MatchingCalls)
5463 updateCall(
Call, CalleeFunc);
5467 if (!UnassignedCallClones.
contains(Node))
5469 DenseSet<unsigned> NodeCallClones;
5470 for (
auto *
C :
Node->Clones)
5471 NodeCallClones.
insert(
C->Call.cloneNo());
5473 auto &ClonedCalls = UnassignedCallClones[
Node];
5474 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5478 if (NodeCallClones.
contains(CloneNo))
5481 for (
auto &
Call : CallVector)
5482 updateCall(
Call, CalleeFunc);
5491 DenseSet<const ContextNode *> Visited;
5492 for (
auto &Entry : AllocationCallToContextNodeMap)
5493 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5504 for (
auto &SN : FS->callsites()) {
5509 SN.Clones.size() >
I &&
5510 "Callsite summary has fewer entries than other summaries in function");
5511 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5518 for (
auto &AN : FS->allocs()) {
5522 assert(AN.Versions.size() >
I &&
5523 "Alloc summary has fewer entries than other summaries in function");
5524 if (AN.Versions.size() <=
I ||
5541 NewGV->takeName(DeclGV);
5548 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5549 if (!FuncToAliasMap.count(&
F))
5551 for (
auto *
A : FuncToAliasMap[&
F]) {
5553 auto *PrevA = M.getNamedAlias(AliasName);
5555 A->getType()->getPointerAddressSpace(),
5556 A->getLinkage(), AliasName, NewF);
5557 NewA->copyAttributesFrom(
A);
5559 TakeDeclNameAndReplace(PrevA, NewA);
5568 FunctionsClonedThinBackend++;
5585 for (
unsigned I = 1;
I < NumClones;
I++) {
5586 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5593 FunctionCloneDuplicatesThinBackend++;
5594 auto *Func = HashToFunc[Hash];
5595 if (Func->hasAvailableExternallyLinkage()) {
5601 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5603 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5606 auto *PrevF = M.getFunction(Name);
5609 TakeDeclNameAndReplace(PrevF, Alias);
5611 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5614 CloneFuncAliases(Func,
I);
5618 HashToFunc[Hash] = NewF;
5619 FunctionClonesThinBackend++;
5622 for (
auto &BB : *NewF) {
5623 for (
auto &Inst : BB) {
5624 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5625 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5630 TakeDeclNameAndReplace(PrevF, NewF);
5632 NewF->setName(Name);
5635 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5638 CloneFuncAliases(NewF,
I);
5647 const Function *CallingFunc =
nullptr) {
5666 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5672 if (!SrcFileMD &&
F.isDeclaration()) {
5676 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5681 assert(SrcFileMD || OrigName ==
F.getName());
5683 StringRef SrcFile = M.getSourceFileName();
5695 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5696 F.getName().contains(
'.')) {
5697 OrigName =
F.getName().rsplit(
'.').first;
5706 assert(TheFnVI ||
F.isDeclaration());
5710bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5712 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5713 Symtab = std::make_unique<InstrProfSymtab>();
5724 if (
Error E = Symtab->create(M,
true,
false)) {
5725 std::string SymtabFailure =
toString(std::move(
E));
5726 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5739 auto MIBIter = AllocNode.
MIBs.begin();
5740 for (
auto &MDOp : MemProfMD->
operands()) {
5742 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5747 auto ContextIterBegin =
5751 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5753 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5758 if (LastStackContextId == *ContextIter)
5760 LastStackContextId = *ContextIter;
5761 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5771bool MemProfContextDisambiguation::applyImport(
Module &M) {
5778 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5780 for (
auto &
A :
M.aliases()) {
5781 auto *Aliasee =
A.getAliaseeObject();
5783 FuncToAliasMap[
F].insert(&
A);
5786 if (!initializeIndirectCallPromotionInfo(M))
5793 OptimizationRemarkEmitter ORE(&
F);
5796 bool ClonesCreated =
false;
5797 unsigned NumClonesCreated = 0;
5798 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5808 if (ClonesCreated) {
5809 assert(NumClonesCreated == NumClones);
5816 ClonesCreated =
true;
5817 NumClonesCreated = NumClones;
5820 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5821 Function *CalledFunction, FunctionSummary *
FS) {
5823 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5835 if (CalledFunction != CB->getCalledOperand() &&
5836 (!GA || CalledFunction != GA->getAliaseeObject())) {
5837 SkippedCallsCloning++;
5843 auto CalleeOrigName = CalledFunction->getName();
5844 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5847 if (J > 0 && VMaps[J - 1]->
empty())
5851 if (!StackNode.
Clones[J])
5853 auto NewF =
M.getOrInsertFunction(
5855 CalledFunction->getFunctionType());
5869 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5870 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5872 <<
" assigned to call function clone "
5873 <<
ore::NV(
"Callee", NewF.getCallee()));
5887 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5891 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5893 "enable-import-metadata is needed to emit thinlto_src_module");
5894 StringRef SrcModule =
5897 if (GVS->modulePath() == SrcModule) {
5898 GVSummary = GVS.get();
5923 if (
FS->allocs().empty() &&
FS->callsites().empty())
5926 auto SI =
FS->callsites().begin();
5927 auto AI =
FS->allocs().begin();
5932 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5935 for (
auto CallsiteIt =
FS->callsites().rbegin();
5936 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5937 auto &Callsite = *CallsiteIt;
5941 if (!Callsite.StackIdIndices.empty())
5943 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5952 for (
auto &BB :
F) {
5953 for (
auto &
I : BB) {
5959 auto *CalledValue = CB->getCalledOperand();
5960 auto *CalledFunction = CB->getCalledFunction();
5961 if (CalledValue && !CalledFunction) {
5962 CalledValue = CalledValue->stripPointerCasts();
5969 assert(!CalledFunction &&
5970 "Expected null called function in callsite for alias");
5974 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5975 I.getMetadata(LLVMContext::MD_callsite));
5976 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5982 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5983 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5984 ? AllocTypeColdThinBackend++
5985 : AllocTypeNotColdThinBackend++;
5986 OrigAllocsThinBackend++;
5987 AllocVersionsThinBackend++;
5988 if (!MaxAllocVersionsThinBackend)
5989 MaxAllocVersionsThinBackend = 1;
5996 auto &AllocNode = *(AI++);
6004 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
6006 OrigAllocsThinBackend++;
6007 AllocVersionsThinBackend += AllocNode.Versions.size();
6008 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
6009 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
6019 if (AllocNode.Versions.size() == 1 &&
6022 AllocationType::NotCold ||
6024 AllocationType::None);
6025 UnclonableAllocsThinBackend++;
6031 return Type == ((uint8_t)AllocationType::NotCold |
6032 (uint8_t)AllocationType::Cold);
6036 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
6039 if (J > 0 && VMaps[J - 1]->
empty())
6042 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
6045 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
6046 : AllocTypeNotColdThinBackend++;
6061 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
6062 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
6064 <<
" marked with memprof allocation attribute "
6065 <<
ore::NV(
"Attribute", AllocTypeString));
6067 }
else if (!CallsiteContext.empty()) {
6068 if (!CalledFunction) {
6072 assert(!CI || !CI->isInlineAsm());
6082 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6088 CloneFuncIfNeeded(NumClones, FS);
6093 assert(SI !=
FS->callsites().end());
6094 auto &StackNode = *(
SI++);
6100 for (
auto StackId : CallsiteContext) {
6102 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6108 CloneCallsite(StackNode, CB, CalledFunction, FS);
6110 }
else if (CB->isTailCall() && CalledFunction) {
6113 ValueInfo CalleeVI =
6115 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6116 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6117 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6118 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6125 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6135 for (
auto &BB :
F) {
6136 for (
auto &
I : BB) {
6139 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6140 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6148unsigned MemProfContextDisambiguation::recordICPInfo(
6153 uint32_t NumCandidates;
6154 uint64_t TotalCount;
6155 auto CandidateProfileData =
6156 ICallAnalysis->getPromotionCandidatesForInstruction(
6158 if (CandidateProfileData.empty())
6164 bool ICPNeeded =
false;
6165 unsigned NumClones = 0;
6166 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6167 for (
const auto &Candidate : CandidateProfileData) {
6169 auto CalleeValueInfo =
6171 ImportSummary->getValueInfo(Candidate.Value);
6174 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6176 auto &StackNode = *(
SI++);
6181 [](
unsigned CloneNo) { return CloneNo != 0; });
6191 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6192 TotalCount, CallsiteInfoStartIndex});
6196void MemProfContextDisambiguation::performICP(
6198 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6200 OptimizationRemarkEmitter &ORE) {
6207 for (
auto &Info : ICallAnalysisInfo) {
6210 auto TotalCount =
Info.TotalCount;
6211 unsigned NumClones = 0;
6214 for (
auto &Candidate :
Info.CandidateProfileData) {
6225 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6226 if (TargetFunction ==
nullptr ||
6234 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6235 <<
"Memprof cannot promote indirect call: target with md5sum "
6236 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6241 RemainingCandidates.
push_back(Candidate);
6246 const char *Reason =
nullptr;
6249 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6250 <<
"Memprof cannot promote indirect call to "
6251 <<
ore::NV(
"TargetFunction", TargetFunction)
6252 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6255 RemainingCandidates.
push_back(Candidate);
6264 CallBase *CBClone = CB;
6265 for (
unsigned J = 0; J < NumClones; J++) {
6268 if (J > 0 && VMaps[J - 1]->
empty())
6278 TotalCount, isSamplePGO, &ORE);
6279 auto *TargetToUse = TargetFunction;
6282 if (StackNode.
Clones[J]) {
6301 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6303 <<
" promoted and assigned to call function clone "
6304 <<
ore::NV(
"Callee", TargetToUse));
6308 TotalCount -= Candidate.Count;
6312 CallBase *CBClone = CB;
6313 for (
unsigned J = 0; J < NumClones; J++) {
6316 if (J > 0 && VMaps[J - 1]->
empty())
6322 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6325 if (TotalCount != 0)
6327 IPVK_IndirectCallTarget,
Info.NumCandidates);
6332template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6333bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
6334 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark,
6335 bool AllowExtraAnalysis) {
6337 dbgs() <<
"CCG before cloning:\n";
6341 exportToDot(
"postbuild");
6354 dbgs() <<
"CCG after cloning:\n";
6358 exportToDot(
"cloned");
6360 bool Changed = assignFunctions();
6363 dbgs() <<
"CCG after assigning function clones:\n";
6367 exportToDot(
"clonefuncassign");
6370 printTotalSizes(
errs(), EmitRemark);
6375bool MemProfContextDisambiguation::processModule(
6377 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6382 return applyImport(M);
6395 ModuleCallsiteContextGraph CCG(M, OREGetter);
6398 return CCG.process();
6403 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6408 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6412 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6416 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6417 "-memprof-dot-context-id");
6418 if (ImportSummary) {
6428 auto ReadSummaryFile =
6430 if (!ReadSummaryFile) {
6437 if (!ImportSummaryForTestingOrErr) {
6443 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6444 ImportSummary = ImportSummaryForTesting.get();
6453 if (!processModule(M, OREGetter))
6472 bool AllowExtraAnalysis =
6475 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6476 CCG.process(EmitRemark, AllowExtraAnalysis);
6491 for (
auto &BB :
F) {
6492 for (
auto &
I : BB) {
6496 if (CI->hasFnAttr(
"memprof")) {
6497 CI->removeFnAttr(
"memprof");
6500 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6501 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6507 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6508 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(0), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
bool isWeakForLinker() const
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
LLVM_ABI MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bits of the poi...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...