52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
118 cl::desc(
"Specify the path prefix of the MemProf dot files."));
122 cl::desc(
"Export graph to dot files."));
127 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
137 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
142 "Export only nodes with contexts feeding given "
143 "-memprof-dot-alloc-id"),
145 "Export only nodes with given -memprof-dot-context-id")));
149 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
150 "or to highlight if -memprof-dot-scope=all"));
154 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
155 "highlight otherwise"));
159 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
163 cl::desc(
"Perform verification checks on CallingContextGraph."));
167 cl::desc(
"Perform frequent verification checks on nodes."));
170 "memprof-import-summary",
171 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
177 cl::desc(
"Max depth to recursively search for missing "
178 "frames through tail calls."));
183 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
187 cl::desc(
"Allow cloning of contexts through recursive cycles"));
194 cl::desc(
"Merge clones before assigning functions"));
203 cl::desc(
"Allow cloning of contexts having recursive cycles"));
209 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
220 cl::desc(
"Linking with hot/cold operator new interfaces"));
225 "Require target function definition when promoting indirect calls"));
232 cl::desc(
"Number of largest cold contexts to consider important"));
236 cl::desc(
"Enables edge fixup for important contexts"));
256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
257class CallsiteContextGraph {
259 CallsiteContextGraph() =
default;
260 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
261 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
268 void identifyClones();
275 bool assignFunctions();
282 const CallsiteContextGraph &CCG) {
287 friend struct GraphTraits<
288 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
289 friend struct DOTGraphTraits<
290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
292 void exportToDot(std::string Label)
const;
295 struct FuncInfo final
296 :
public std::pair<FuncTy *, unsigned > {
297 using Base = std::pair<FuncTy *, unsigned>;
298 FuncInfo(
const Base &
B) : Base(
B) {}
299 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) : Base(
F, CloneNo) {}
300 explicit operator bool()
const {
return this->first !=
nullptr; }
301 FuncTy *func()
const {
return this->first; }
302 unsigned cloneNo()
const {
return this->second; }
306 struct CallInfo final :
public std::pair<CallTy, unsigned > {
307 using Base = std::pair<CallTy, unsigned>;
308 CallInfo(
const Base &
B) : Base(
B) {}
309 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
310 : Base(
Call, CloneNo) {}
311 explicit operator bool()
const {
return (
bool)this->first; }
312 CallTy call()
const {
return this->first; }
313 unsigned cloneNo()
const {
return this->second; }
314 void setCloneNo(
unsigned N) { this->second =
N; }
315 void print(raw_ostream &OS)
const {
316 if (!
operator bool()) {
322 OS <<
"\t(clone " << cloneNo() <<
")";
328 friend raw_ostream &
operator<<(raw_ostream &OS,
const CallInfo &
Call) {
348 bool Recursive =
false;
352 uint8_t AllocTypes = 0;
371 uint64_t OrigStackOrAllocId = 0;
375 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
379 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
383 bool useCallerEdgesForContextInfo()
const {
388 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
400 DenseSet<uint32_t> getContextIds()
const {
406 for (
auto &
Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
408 DenseSet<uint32_t> ContextIds;
411 CalleeEdges, useCallerEdgesForContextInfo()
413 : std::vector<std::shared_ptr<ContextEdge>>());
414 for (
const auto &
Edge : Edges)
421 uint8_t computeAllocType()
const {
423 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
424 uint8_t
AllocType = (uint8_t)AllocationType::None;
426 CalleeEdges, useCallerEdgesForContextInfo()
428 : std::vector<std::shared_ptr<ContextEdge>>());
429 for (
const auto &
Edge : Edges) {
440 bool emptyContextIds()
const {
442 CalleeEdges, useCallerEdgesForContextInfo()
444 : std::vector<std::shared_ptr<ContextEdge>>());
445 for (
const auto &
Edge : Edges) {
446 if (!
Edge->getContextIds().empty())
453 std::vector<ContextNode *> Clones;
456 ContextNode *CloneOf =
nullptr;
458 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
460 ContextNode(
bool IsAllocation, CallInfo
C)
461 : IsAllocation(IsAllocation), Call(
C) {}
463 void addClone(ContextNode *Clone) {
465 CloneOf->Clones.push_back(Clone);
466 Clone->CloneOf = CloneOf;
468 Clones.push_back(Clone);
470 Clone->CloneOf =
this;
474 ContextNode *getOrigNode() {
481 unsigned int ContextId);
483 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
484 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
485 void eraseCalleeEdge(
const ContextEdge *
Edge);
486 void eraseCallerEdge(
const ContextEdge *
Edge);
488 void setCall(CallInfo
C) { Call =
C; }
490 bool hasCall()
const {
return (
bool)Call.call(); }
492 void printCall(raw_ostream &OS)
const { Call.print(OS); }
496 bool isRemoved()
const {
502 (AllocTypes == (uint8_t)AllocationType::None) ==
504 return AllocTypes == (uint8_t)AllocationType::None;
508 void print(raw_ostream &OS)
const;
510 friend raw_ostream &
operator<<(raw_ostream &OS,
const ContextNode &Node) {
524 uint8_t AllocTypes = 0;
532 bool IsBackedge =
false;
535 DenseSet<uint32_t> ContextIds;
537 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
538 DenseSet<uint32_t> ContextIds)
539 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
540 ContextIds(std::
move(ContextIds)) {}
542 DenseSet<uint32_t> &getContextIds() {
return ContextIds; }
546 inline void clear() {
548 AllocTypes = (uint8_t)AllocationType::None;
556 inline bool isRemoved()
const {
557 if (Callee || Caller)
562 assert(AllocTypes == (uint8_t)AllocationType::None);
563 assert(ContextIds.empty());
568 void print(raw_ostream &OS)
const;
570 friend raw_ostream &
operator<<(raw_ostream &OS,
const ContextEdge &
Edge) {
578 void removeNoneTypeCalleeEdges(ContextNode *Node);
579 void removeNoneTypeCallerEdges(ContextNode *Node);
581 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
582 DenseSet<const ContextNode *> &Visited);
587 template <
class NodeT,
class IteratorT>
588 std::vector<uint64_t>
589 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
593 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
596 template <
class NodeT,
class IteratorT>
597 void addStackNodesForMIB(
598 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
601 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
606 void updateStackNodes();
615 void fixupImportantContexts();
619 void handleCallsitesWithMultipleTargets();
622 void markBackedges();
632 bool partitionCallsByCallee(
634 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
638 MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
641 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
645 DenseSet<uint32_t> DotAllocContextIds;
648 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
653 struct CallContextInfo {
657 std::vector<uint64_t> StackIds;
662 DenseSet<uint32_t> ContextIds;
671 void removeEdgeFromGraph(ContextEdge *
Edge, EdgeIter *EI =
nullptr,
672 bool CalleeIter =
true);
680 void assignStackNodesPostOrder(
681 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
682 DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
683 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
684 const DenseSet<uint32_t> &ImportantContextIds);
689 DenseSet<uint32_t> duplicateContextIds(
690 const DenseSet<uint32_t> &StackSequenceContextIds,
691 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
694 void propagateDuplicateContextIds(
695 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
700 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
702 DenseSet<uint32_t> RemainingContextIds);
707 uint64_t getStackId(uint64_t IdOrIndex)
const {
708 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
718 calleesMatch(CallTy
Call, EdgeIter &EI,
719 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
723 const FuncTy *getCalleeFunc(CallTy
Call) {
724 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
730 bool calleeMatchesFunc(
731 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
732 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
733 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
734 Call, Func, CallerFunc, FoundCalleeChain);
738 bool sameCallee(CallTy Call1, CallTy Call2) {
739 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
744 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
745 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
750 uint64_t getLastStackId(CallTy
Call) {
751 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
756 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
757 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
762 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
767 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
768 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
774 FuncInfo cloneFunctionForCallsite(
775 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
776 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
777 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
778 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
783 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
784 unsigned CloneNo)
const {
785 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
789 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
790 CallInfo
C = CallInfo()) {
791 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
792 auto *NewNode = NodeOwner.back().get();
794 NodeToCallingFunc[NewNode] =
F;
795 NewNode->NodeId = NodeOwner.size();
800 ContextNode *getNodeForInst(
const CallInfo &
C);
801 ContextNode *getNodeForAlloc(
const CallInfo &
C);
802 ContextNode *getNodeForStackId(uint64_t StackId);
806 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds)
const;
811 uint8_t intersectAllocTypesImpl(
const DenseSet<uint32_t> &Node1Ids,
812 const DenseSet<uint32_t> &Node2Ids)
const;
816 uint8_t intersectAllocTypes(
const DenseSet<uint32_t> &Node1Ids,
817 const DenseSet<uint32_t> &Node2Ids)
const;
824 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
825 DenseSet<uint32_t> ContextIdsToMove = {});
831 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
832 ContextNode *NewCallee,
833 bool NewClone =
false,
834 DenseSet<uint32_t> ContextIdsToMove = {});
841 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
842 ContextNode *NewCaller);
845 void markBackedges(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
846 DenseSet<const ContextNode *> &CurrentStack);
850 mergeClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
851 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
853 void mergeNodeCalleeClones(
854 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
855 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
858 void findOtherCallersToShareMerge(
859 ContextNode *Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
860 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
861 DenseSet<ContextNode *> &OtherCallersToShareMerge);
867 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
868 const DenseSet<uint32_t> &AllocContextIds);
871 DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
877 DenseMap<uint32_t, std::vector<ContextTotalSize>> ContextIdToContextSizeInfos;
883 DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
887 struct ImportantContextInfo {
889 std::vector<uint64_t> StackIds;
892 unsigned MaxLength = 0;
896 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
900 DenseMap<uint32_t, ImportantContextInfo> ImportantContextIdInfo;
905 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *Node,
909 const DenseSet<uint32_t> &NodeContextIds,
910 const DenseSet<uint32_t> &ImportantContextIds) {
915 DenseSet<uint32_t> Ids =
919 auto Size = StackIds.size();
920 for (
auto Id : Ids) {
921 auto &
Entry = ImportantContextIdInfo[
Id];
930 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
931 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
934 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
940 unsigned int LastContextId = 0;
943template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
945 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
946template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
948 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
949template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
951 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
952template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
954 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
957class ModuleCallsiteContextGraph
958 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
961 ModuleCallsiteContextGraph(
963 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
966 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
969 uint64_t getStackId(uint64_t IdOrIndex)
const;
971 bool calleeMatchesFunc(
972 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
973 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
974 bool sameCallee(Instruction *Call1, Instruction *Call2);
975 bool findProfiledCalleeThroughTailCalls(
976 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
977 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
978 bool &FoundMultipleCalleeChains);
979 uint64_t getLastStackId(Instruction *
Call);
980 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
983 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
984 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
986 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
987 DenseMap<CallInfo, CallInfo> &CallMap,
988 std::vector<CallInfo> &CallsWithMetadataInFunc,
990 std::string getLabel(
const Function *Func,
const Instruction *
Call,
991 unsigned CloneNo)
const;
994 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1000struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1001 IndexCall() : PointerUnion() {}
1002 IndexCall(std::nullptr_t) : IndexCall() {}
1003 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1004 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1005 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1007 IndexCall *operator->() {
return this; }
1009 void print(raw_ostream &OS)
const {
1010 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1035class IndexCallsiteContextGraph
1036 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1039 IndexCallsiteContextGraph(
1040 ModuleSummaryIndex &Index,
1044 ~IndexCallsiteContextGraph() {
1049 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1051 for (
auto &Callsite :
I.second)
1052 FS->addCallsite(*Callsite.second);
1057 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1060 uint64_t getStackId(uint64_t IdOrIndex)
const;
1061 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1062 bool calleeMatchesFunc(
1063 IndexCall &
Call,
const FunctionSummary *Func,
1064 const FunctionSummary *CallerFunc,
1065 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1066 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1067 bool findProfiledCalleeThroughTailCalls(
1068 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1069 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1070 bool &FoundMultipleCalleeChains);
1071 uint64_t getLastStackId(IndexCall &
Call);
1072 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1075 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1076 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1077 IndexCall>::FuncInfo
1078 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1079 DenseMap<CallInfo, CallInfo> &CallMap,
1080 std::vector<CallInfo> &CallsWithMetadataInFunc,
1082 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1083 unsigned CloneNo)
const;
1087 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1089 const ModuleSummaryIndex &Index;
1097 std::unordered_map<FunctionSummary *,
1098 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1099 FunctionCalleesToSynthesizedCallsiteInfos;
1110 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1113 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1134template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1135bool allocTypesMatch(
1136 const std::vector<uint8_t> &InAllocTypes,
1137 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1141 assert(InAllocTypes.size() == Edges.size());
1143 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1145 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1149 if (l == (uint8_t)AllocationType::None ||
1150 r->AllocTypes == (uint8_t)AllocationType::None)
1152 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1161template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1162bool allocTypesMatchClone(
1163 const std::vector<uint8_t> &InAllocTypes,
1164 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1165 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1169 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1173 for (
const auto &
E : Clone->CalleeEdges) {
1175 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1179 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1180 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1182 if (Iter == EdgeCalleeMap.
end())
1190 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1198template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1199typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1200CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1201 const CallInfo &
C) {
1202 ContextNode *
Node = getNodeForAlloc(
C);
1206 return NonAllocationCallToContextNodeMap.lookup(
C);
1209template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1210typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1211CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1212 const CallInfo &
C) {
1213 return AllocationCallToContextNodeMap.lookup(
C);
1216template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1217typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1218CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1220 auto StackEntryNode = StackEntryIdToContextNodeMap.
find(StackId);
1221 if (StackEntryNode != StackEntryIdToContextNodeMap.
end())
1222 return StackEntryNode->second;
1226template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1227void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1229 unsigned int ContextId) {
1230 for (
auto &
Edge : CallerEdges) {
1231 if (
Edge->Caller == Caller) {
1233 Edge->getContextIds().insert(ContextId);
1237 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1238 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1239 CallerEdges.push_back(
Edge);
1243template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1244void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1245 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1261 auto CalleeCallerCount =
Callee->CallerEdges.size();
1262 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1267 }
else if (CalleeIter) {
1269 *EI =
Caller->CalleeEdges.erase(*EI);
1272 *EI =
Callee->CallerEdges.erase(*EI);
1274 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1275 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1278template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1279void CallsiteContextGraph<
1280 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1281 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1283 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1285 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1291template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1292void CallsiteContextGraph<
1293 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1294 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1296 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1298 Edge->Caller->eraseCalleeEdge(
Edge.get());
1299 EI =
Node->CallerEdges.erase(EI);
1305template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1306typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1307CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1308 findEdgeFromCallee(
const ContextNode *Callee) {
1309 for (
const auto &
Edge : CalleeEdges)
1310 if (
Edge->Callee == Callee)
1315template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1316typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1317CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1318 findEdgeFromCaller(
const ContextNode *Caller) {
1319 for (
const auto &
Edge : CallerEdges)
1320 if (
Edge->Caller == Caller)
1325template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1327 eraseCalleeEdge(
const ContextEdge *
Edge) {
1329 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1330 return CalleeEdge.get() ==
Edge;
1332 assert(EI != CalleeEdges.end());
1333 CalleeEdges.erase(EI);
1336template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1337void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1338 eraseCallerEdge(
const ContextEdge *
Edge) {
1340 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1341 return CallerEdge.get() ==
Edge;
1343 assert(EI != CallerEdges.end());
1344 CallerEdges.erase(EI);
1347template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1348uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1349 DenseSet<uint32_t> &ContextIds)
const {
1351 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1352 uint8_t
AllocType = (uint8_t)AllocationType::None;
1353 for (
auto Id : ContextIds) {
1354 AllocType |= (uint8_t)ContextIdToAllocationType.
at(Id);
1362template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1364CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1365 const DenseSet<uint32_t> &Node1Ids,
1366 const DenseSet<uint32_t> &Node2Ids)
const {
1368 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1369 uint8_t
AllocType = (uint8_t)AllocationType::None;
1370 for (
auto Id : Node1Ids) {
1371 if (!Node2Ids.
count(Id))
1373 AllocType |= (uint8_t)ContextIdToAllocationType.
at(Id);
1381template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1382uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1383 const DenseSet<uint32_t> &Node1Ids,
1384 const DenseSet<uint32_t> &Node2Ids)
const {
1385 if (Node1Ids.
size() < Node2Ids.
size())
1386 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1388 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1391template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1392typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1393CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1394 CallInfo
Call,
const FuncTy *
F) {
1396 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1397 AllocationCallToContextNodeMap[
Call] = AllocNode;
1399 AllocNode->OrigStackOrAllocId = LastContextId;
1402 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1418template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1419template <
class NodeT,
class IteratorT>
1420void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1421 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1424 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1430 ContextIdToAllocationType[++LastContextId] =
AllocType;
1432 bool IsImportant =
false;
1433 if (!ContextSizeInfo.
empty()) {
1434 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1438 uint64_t TotalCold = 0;
1439 for (
auto &CSI : ContextSizeInfo)
1440 TotalCold += CSI.TotalSize;
1446 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1449 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1450 TotalSizeToContextIdTopNCold.erase(
1451 TotalSizeToContextIdTopNCold.begin());
1453 ImportantContextIdInfo.
erase(IdToRemove);
1455 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1459 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1463 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1468 ContextNode *PrevNode = AllocNode;
1472 SmallSet<uint64_t, 8> StackIdSet;
1475 ContextIter != StackContext.
end(); ++ContextIter) {
1476 auto StackId = getStackId(*ContextIter);
1478 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1479 ContextNode *StackNode = getNodeForStackId(StackId);
1481 StackNode = createNewNode(
false);
1482 StackEntryIdToContextNodeMap[StackId] = StackNode;
1483 StackNode->OrigStackOrAllocId = StackId;
1490 StackNode->Recursive =
true;
1492 StackNode->AllocTypes |= (uint8_t)
AllocType;
1493 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1494 PrevNode = StackNode;
1498template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1500CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1501 const DenseSet<uint32_t> &StackSequenceContextIds,
1502 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1503 DenseSet<uint32_t> NewContextIds;
1504 for (
auto OldId : StackSequenceContextIds) {
1505 NewContextIds.
insert(++LastContextId);
1506 OldToNewContextIds[OldId].insert(LastContextId);
1509 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1510 if (DotAllocContextIds.
contains(OldId))
1511 DotAllocContextIds.
insert(LastContextId);
1513 return NewContextIds;
1516template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1517void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1518 propagateDuplicateContextIds(
1519 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1521 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1522 DenseSet<uint32_t> NewIds;
1523 for (
auto Id : ContextIds)
1524 if (
auto NewId = OldToNewContextIds.find(Id);
1525 NewId != OldToNewContextIds.end())
1531 auto UpdateCallers = [&](ContextNode *
Node,
1532 DenseSet<const ContextEdge *> &Visited,
1533 auto &&UpdateCallers) ->
void {
1534 for (
const auto &
Edge :
Node->CallerEdges) {
1538 ContextNode *NextNode =
Edge->Caller;
1539 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1542 if (!NewIdsToAdd.
empty()) {
1543 Edge->getContextIds().insert_range(NewIdsToAdd);
1544 UpdateCallers(NextNode, Visited, UpdateCallers);
1549 DenseSet<const ContextEdge *> Visited;
1550 for (
auto &Entry : AllocationCallToContextNodeMap) {
1552 UpdateCallers(Node, Visited, UpdateCallers);
1556template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1557void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1558 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1561 DenseSet<uint32_t> RemainingContextIds) {
1563 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1564 DenseSet<uint32_t> RecursiveContextIds;
1565 DenseSet<uint32_t> AllCallerContextIds;
1570 for (
auto &CE : OrigEdges) {
1571 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1572 for (
auto Id :
CE->getContextIds())
1573 if (!AllCallerContextIds.
insert(Id).second)
1574 RecursiveContextIds.
insert(Id);
1578 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1580 DenseSet<uint32_t> NewEdgeContextIds;
1581 DenseSet<uint32_t> NotFoundContextIds;
1585 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1586 NotFoundContextIds);
1589 if (RecursiveContextIds.
empty()) {
1592 RemainingContextIds.
swap(NotFoundContextIds);
1602 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1604 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1607 if (NewEdgeContextIds.
empty()) {
1611 if (TowardsCallee) {
1612 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1613 auto NewEdge = std::make_shared<ContextEdge>(
1614 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1615 NewNode->CalleeEdges.push_back(NewEdge);
1616 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1618 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1619 auto NewEdge = std::make_shared<ContextEdge>(
1620 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1621 NewNode->CallerEdges.push_back(NewEdge);
1622 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1625 if (
Edge->getContextIds().empty()) {
1626 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1633template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1635 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1639 assert(!Edge->ContextIds.empty());
1642template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1644 bool CheckEdges =
true) {
1645 if (
Node->isRemoved())
1649 auto NodeContextIds =
Node->getContextIds();
1653 if (
Node->CallerEdges.size()) {
1655 Node->CallerEdges.front()->ContextIds);
1659 set_union(CallerEdgeContextIds, Edge->ContextIds);
1666 NodeContextIds == CallerEdgeContextIds ||
1669 if (
Node->CalleeEdges.size()) {
1671 Node->CalleeEdges.front()->ContextIds);
1675 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1681 NodeContextIds == CalleeEdgeContextIds);
1690 for (
const auto &
E :
Node->CalleeEdges)
1696template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1697void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1698 assignStackNodesPostOrder(ContextNode *Node,
1699 DenseSet<const ContextNode *> &Visited,
1700 DenseMap<uint64_t, std::vector<CallContextInfo>>
1701 &StackIdToMatchingCalls,
1702 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1703 const DenseSet<uint32_t> &ImportantContextIds) {
1711 auto CallerEdges =
Node->CallerEdges;
1712 for (
auto &
Edge : CallerEdges) {
1714 if (
Edge->isRemoved()) {
1718 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1719 CallToMatchingCall, ImportantContextIds);
1728 if (
Node->IsAllocation ||
1729 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1732 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1736 if (Calls.size() == 1) {
1737 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1738 if (Ids.size() == 1) {
1739 assert(SavedContextIds.empty());
1741 assert(Node == getNodeForStackId(Ids[0]));
1742 if (
Node->Recursive)
1745 NonAllocationCallToContextNodeMap[
Call] =
Node;
1747 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1755 uint64_t LastId =
Node->OrigStackOrAllocId;
1756 ContextNode *LastNode = getNodeForStackId(LastId);
1759 assert(LastNode == Node);
1761 ContextNode *LastNode =
Node;
1766 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1768 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1769 bool CreatedNode =
false;
1770 for (
unsigned I = 0;
I < Calls.size();
1771 I++, PrevIterCreatedNode = CreatedNode) {
1772 CreatedNode =
false;
1773 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1776 if (SavedContextIds.empty()) {
1783 auto MatchingCall = CallToMatchingCall[
Call];
1784 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1788 assert(
I > 0 && !PrevIterCreatedNode);
1791 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1796 assert(LastId == Ids.back());
1805 ContextNode *PrevNode = LastNode;
1809 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1811 ContextNode *CurNode = getNodeForStackId(Id);
1815 assert(!CurNode->Recursive);
1817 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1829 if (SavedContextIds.empty()) {
1838 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1839 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1841 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1843 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1849 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1854 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1859 for (
auto Id : Ids) {
1860 ContextNode *CurNode = getNodeForStackId(Id);
1867 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1874 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1875 if (PrevEdge->getContextIds().empty())
1876 removeEdgeFromGraph(PrevEdge);
1881 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1882 ? (uint8_t)AllocationType::None
1883 : CurNode->computeAllocType();
1887 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1891 for (
auto Id : Ids) {
1892 ContextNode *CurNode = getNodeForStackId(Id);
1901template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1902void CallsiteContextGraph<DerivedCCG, FuncTy,
1903 CallTy>::fixupImportantContexts() {
1904 if (ImportantContextIdInfo.
empty())
1908 NumImportantContextIds = ImportantContextIdInfo.
size();
1914 exportToDot(
"beforestackfixup");
1939 for (
auto &[CurContextId,
Info] : ImportantContextIdInfo) {
1940 if (
Info.StackIdsToNode.empty())
1943 ContextNode *PrevNode =
nullptr;
1944 ContextNode *CurNode =
nullptr;
1945 DenseSet<const ContextEdge *> VisitedEdges;
1946 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1949 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1953 auto LenToEnd = AllStackIds.size() -
I;
1961 auto CheckStackIds = AllStackIds.slice(
I, Len);
1962 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1963 if (EntryIt ==
Info.StackIdsToNode.end())
1965 CurNode = EntryIt->second;
1982 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1985 if (CurEdge->getContextIds().insert(CurContextId).second) {
1986 NumFixupEdgeIdsInserted++;
1991 NumFixupEdgesAdded++;
1992 DenseSet<uint32_t> ContextIds({CurContextId});
1993 auto AllocType = computeAllocType(ContextIds);
1994 auto NewEdge = std::make_shared<ContextEdge>(
1995 PrevNode, CurNode,
AllocType, std::move(ContextIds));
1996 PrevNode->CallerEdges.push_back(NewEdge);
1997 CurNode->CalleeEdges.push_back(NewEdge);
1999 CurEdge = NewEdge.get();
2002 VisitedEdges.
insert(CurEdge);
2005 for (
auto &
Edge : PrevNode->CallerEdges) {
2009 Edge->getContextIds().erase(CurContextId);
2017template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2018void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2026 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2027 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2028 for (
auto &
Call : CallsWithMetadata) {
2030 if (AllocationCallToContextNodeMap.count(
Call))
2032 auto StackIdsWithContextNodes =
2033 getStackIdsWithContextNodesForCall(
Call.call());
2036 if (StackIdsWithContextNodes.empty())
2040 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2041 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2051 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2055 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2056 for (
auto &It : StackIdToMatchingCalls) {
2057 auto &Calls = It.getSecond();
2059 if (Calls.size() == 1) {
2060 auto &Ids = Calls[0].StackIds;
2061 if (Ids.size() == 1)
2074 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2075 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2076 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2079 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2080 return A.StackIds.size() >
B.StackIds.size() ||
2081 (
A.StackIds.size() ==
B.StackIds.size() &&
2082 (
A.StackIds <
B.StackIds ||
2083 (
A.StackIds ==
B.StackIds &&
2084 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2090 uint64_t LastId = It.getFirst();
2091 ContextNode *LastNode = getNodeForStackId(LastId);
2095 if (LastNode->Recursive)
2100 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2108 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2111 for (
unsigned I = 0;
I < Calls.size();
I++) {
2112 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2113 assert(SavedContextIds.empty());
2114 assert(LastId == Ids.back());
2119 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2120 MatchingIdsFuncSet.
clear();
2127 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2129 ContextNode *PrevNode = LastNode;
2130 ContextNode *CurNode = LastNode;
2135 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2137 CurNode = getNodeForStackId(Id);
2141 if (CurNode->Recursive) {
2146 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2167 if (StackSequenceContextIds.
empty()) {
2180 if (Ids.back() != getLastStackId(
Call)) {
2181 for (
const auto &PE : LastNode->CallerEdges) {
2182 set_subtract(StackSequenceContextIds, PE->getContextIds());
2183 if (StackSequenceContextIds.
empty())
2187 if (StackSequenceContextIds.
empty())
2199 MatchingIdsFuncSet.
insert(Func);
2206 bool DuplicateContextIds =
false;
2207 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2208 auto &CallCtxInfo = Calls[J];
2209 auto &NextIds = CallCtxInfo.StackIds;
2212 auto *NextFunc = CallCtxInfo.Func;
2213 if (NextFunc != Func) {
2216 DuplicateContextIds =
true;
2219 auto &NextCall = CallCtxInfo.Call;
2220 CallToMatchingCall[NextCall] =
Call;
2231 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2232 StackSequenceContextIds.
size());
2235 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2236 : StackSequenceContextIds;
2237 assert(!SavedContextIds.empty());
2239 if (!DuplicateContextIds) {
2243 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2244 if (LastNodeContextIds.
empty())
2251 propagateDuplicateContextIds(OldToNewContextIds);
2261 DenseSet<const ContextNode *> Visited;
2263 ImportantContextIdInfo.keys());
2264 for (
auto &Entry : AllocationCallToContextNodeMap)
2265 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2266 CallToMatchingCall, ImportantContextIds);
2268 fixupImportantContexts();
2274uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2275 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2277 return CallsiteContext.
back();
2280uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2282 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2285 return Index.getStackIdAtIndex(CallsiteContext.
back());
2307 auto Pos =
F.getName().find_last_of(
'.');
2310 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2316std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2317 const Instruction *
Call,
2318 unsigned CloneNo)
const {
2324std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2325 const IndexCall &
Call,
2326 unsigned CloneNo)
const {
2327 auto VI = FSToVIMap.find(Func);
2328 assert(VI != FSToVIMap.end());
2331 return CallerName +
" -> alloc";
2334 return CallerName +
" -> " +
2336 Callsite->Clones[CloneNo]);
2340std::vector<uint64_t>
2341ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2342 Instruction *
Call) {
2343 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2345 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2349std::vector<uint64_t>
2350IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2352 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2354 return getStackIdsWithContextNodes<CallsiteInfo,
2355 SmallVector<unsigned>::const_iterator>(
2359template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2360template <
class NodeT,
class IteratorT>
2361std::vector<uint64_t>
2362CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2363 CallStack<NodeT, IteratorT> &CallsiteContext) {
2364 std::vector<uint64_t> StackIds;
2365 for (
auto IdOrIndex : CallsiteContext) {
2366 auto StackId = getStackId(IdOrIndex);
2367 ContextNode *
Node = getNodeForStackId(StackId);
2370 StackIds.push_back(StackId);
2375ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2377 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2378 :
Mod(
M), OREGetter(OREGetter) {
2382 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2384 std::vector<CallInfo> CallsWithMetadata;
2385 for (
auto &BB :
F) {
2386 for (
auto &
I : BB) {
2389 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2390 CallsWithMetadata.push_back(&
I);
2391 auto *AllocNode = addAllocNode(&
I, &
F);
2392 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2396 for (
auto &MDOp : MemProfMD->operands()) {
2398 std::vector<ContextTotalSize> ContextSizeInfo;
2400 if (MIBMD->getNumOperands() > 2) {
2401 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2402 MDNode *ContextSizePair =
2411 ContextSizeInfo.push_back({FullStackId, TotalSize});
2417 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2418 AllocNode, StackContext, CallsiteContext,
2420 TotalSizeToContextIdTopNCold);
2425 DotAllocContextIds = AllocNode->getContextIds();
2429 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2430 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2433 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2434 CallsWithMetadata.push_back(&
I);
2438 if (!CallsWithMetadata.empty())
2439 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2443 dbgs() <<
"CCG before updating call stack chains:\n";
2448 exportToDot(
"prestackupdate");
2453 exportToDot(
"poststackupdate");
2455 handleCallsitesWithMultipleTargets();
2460 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2461 for (
auto &
Call : FuncEntry.second)
2462 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2465IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2469 : Index(Index), isPrevailing(isPrevailing) {
2473 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2474 for (
auto &
I : Index) {
2475 auto VI = Index.getValueInfo(
I);
2476 for (
auto &S : VI.getSummaryList()) {
2485 !isPrevailing(VI.getGUID(), S.get()))
2490 std::vector<CallInfo> CallsWithMetadata;
2491 if (!
FS->allocs().empty()) {
2492 for (
auto &AN :
FS->mutableAllocs()) {
2497 if (AN.MIBs.empty())
2499 IndexCall AllocCall(&AN);
2500 CallsWithMetadata.push_back(AllocCall);
2501 auto *AllocNode = addAllocNode(AllocCall, FS);
2509 AN.ContextSizeInfos.size() == AN.MIBs.size());
2511 for (
auto &MIB : AN.MIBs) {
2514 std::vector<ContextTotalSize> ContextSizeInfo;
2515 if (!AN.ContextSizeInfos.empty()) {
2516 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2517 ContextSizeInfo.push_back({FullStackId, TotalSize});
2519 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2520 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2521 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2527 DotAllocContextIds = AllocNode->getContextIds();
2533 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2537 if (!
FS->callsites().empty())
2538 for (
auto &SN :
FS->mutableCallsites()) {
2539 IndexCall StackNodeCall(&SN);
2540 CallsWithMetadata.push_back(StackNodeCall);
2543 if (!CallsWithMetadata.empty())
2544 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2546 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2552 dbgs() <<
"CCG before updating call stack chains:\n";
2557 exportToDot(
"prestackupdate");
2562 exportToDot(
"poststackupdate");
2564 handleCallsitesWithMultipleTargets();
2569template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2570void CallsiteContextGraph<DerivedCCG, FuncTy,
2571 CallTy>::handleCallsitesWithMultipleTargets() {
2586 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2587 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2588 auto *
Node = Entry.second;
2597 std::vector<CallInfo> AllCalls;
2598 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2599 AllCalls.push_back(
Node->Call);
2613 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2616 auto It = AllCalls.begin();
2618 for (; It != AllCalls.end(); ++It) {
2621 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2624 if (!Edge->Callee->hasCall())
2626 assert(NodeToCallingFunc.count(Edge->Callee));
2628 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2637 if (
Node->Call != ThisCall) {
2638 Node->setCall(ThisCall);
2649 Node->MatchingCalls.clear();
2652 if (It == AllCalls.end()) {
2653 RemovedEdgesWithMismatchedCallees++;
2657 Node->setCall(CallInfo());
2662 for (++It; It != AllCalls.end(); ++It) {
2666 Node->MatchingCalls.push_back(ThisCall);
2675 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2676 return !it.second->hasCall() || it.second->Call != it.first;
2680 for (
auto &[
Call,
Node] : NewCallToNode)
2681 NonAllocationCallToContextNodeMap[
Call] =
Node;
2685 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2686 NonAllocationCallToContextNodeMap[
Call] =
Node;
2689template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2690bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2692 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2696 struct CallsWithSameCallee {
2697 std::vector<CallInfo> Calls;
2698 ContextNode *
Node =
nullptr;
2704 for (
auto ThisCall : AllCalls) {
2705 auto *
F = getCalleeFunc(
ThisCall.call());
2707 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2716 for (
const auto &Edge :
Node->CalleeEdges) {
2717 if (!Edge->Callee->hasCall())
2719 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2720 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2721 CalleeNodeToCallInfo[Edge->Callee] =
2722 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2728 if (CalleeNodeToCallInfo.
empty())
2740 ContextNode *UnmatchedCalleesNode =
nullptr;
2742 bool UsedOrigNode =
false;
2747 auto CalleeEdges =
Node->CalleeEdges;
2748 for (
auto &Edge : CalleeEdges) {
2749 if (!Edge->Callee->hasCall())
2754 ContextNode *CallerNodeToUse =
nullptr;
2758 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2759 if (!UnmatchedCalleesNode)
2760 UnmatchedCalleesNode =
2761 createNewNode(
false, NodeToCallingFunc[
Node]);
2762 CallerNodeToUse = UnmatchedCalleesNode;
2766 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2769 if (!UsedOrigNode) {
2772 Node->MatchingCalls.clear();
2773 UsedOrigNode =
true;
2776 createNewNode(
false, NodeToCallingFunc[
Node]);
2780 Info->Node->setCall(
Info->Calls.front());
2786 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2788 CallerNodeToUse =
Info->Node;
2792 if (CallerNodeToUse ==
Node)
2795 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2802 for (
auto &
I : CalleeNodeToCallInfo)
2803 removeNoneTypeCallerEdges(
I.second->Node);
2804 if (UnmatchedCalleesNode)
2805 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2806 removeNoneTypeCallerEdges(
Node);
2816uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2819 return Index.getStackIdAtIndex(IdOrIndex);
2822template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2823bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2824 CallTy
Call, EdgeIter &EI,
2825 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2827 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2828 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2831 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2832 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2837 if (FoundCalleeChain.empty())
2841 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2845 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2846 CurEdge->AllocTypes |=
Edge->AllocTypes;
2851 auto NewEdge = std::make_shared<ContextEdge>(
2852 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2853 Callee->CallerEdges.push_back(NewEdge);
2854 if (Caller ==
Edge->Caller) {
2858 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2861 "Iterator position not restored after insert and increment");
2863 Caller->CalleeEdges.push_back(NewEdge);
2868 auto *CurCalleeNode =
Edge->Callee;
2869 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2870 ContextNode *NewNode =
nullptr;
2872 if (TailCallToContextNodeMap.
count(NewCall)) {
2873 NewNode = TailCallToContextNodeMap[NewCall];
2874 NewNode->AllocTypes |=
Edge->AllocTypes;
2876 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2878 NewNode = createNewNode(
false, Func, NewCall);
2879 TailCallToContextNodeMap[NewCall] = NewNode;
2880 NewNode->AllocTypes =
Edge->AllocTypes;
2884 AddEdge(NewNode, CurCalleeNode);
2886 CurCalleeNode = NewNode;
2890 AddEdge(
Edge->Caller, CurCalleeNode);
2898 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2910bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2911 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2912 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2913 bool &FoundMultipleCalleeChains) {
2920 FoundCalleeChain.push_back({Callsite,
F});
2935 bool FoundSingleCalleeChain =
false;
2936 for (
auto &BB : *CalleeFunc) {
2937 for (
auto &
I : BB) {
2939 if (!CB || !CB->isTailCall())
2941 auto *CalledValue = CB->getCalledOperand();
2942 auto *CalledFunction = CB->getCalledFunction();
2943 if (CalledValue && !CalledFunction) {
2944 CalledValue = CalledValue->stripPointerCasts();
2951 assert(!CalledFunction &&
2952 "Expected null called function in callsite for alias");
2955 if (!CalledFunction)
2957 if (CalledFunction == ProfiledCallee) {
2958 if (FoundSingleCalleeChain) {
2959 FoundMultipleCalleeChains =
true;
2962 FoundSingleCalleeChain =
true;
2963 FoundProfiledCalleeCount++;
2964 FoundProfiledCalleeDepth +=
Depth;
2965 if (
Depth > FoundProfiledCalleeMaxDepth)
2966 FoundProfiledCalleeMaxDepth =
Depth;
2967 SaveCallsiteInfo(&
I, CalleeFunc);
2968 }
else if (findProfiledCalleeThroughTailCalls(
2969 ProfiledCallee, CalledFunction,
Depth + 1,
2970 FoundCalleeChain, FoundMultipleCalleeChains)) {
2973 assert(!FoundMultipleCalleeChains);
2974 if (FoundSingleCalleeChain) {
2975 FoundMultipleCalleeChains =
true;
2978 FoundSingleCalleeChain =
true;
2979 SaveCallsiteInfo(&
I, CalleeFunc);
2980 }
else if (FoundMultipleCalleeChains)
2985 return FoundSingleCalleeChain;
2988const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2990 if (!CB->getCalledOperand() || CB->isIndirectCall())
2992 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2999bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3000 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3001 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3003 if (!CB->getCalledOperand() || CB->isIndirectCall())
3005 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3007 if (CalleeFunc == Func)
3010 if (Alias && Alias->getAliasee() == Func)
3021 bool FoundMultipleCalleeChains =
false;
3022 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3024 FoundMultipleCalleeChains)) {
3025 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3026 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3027 <<
" that actually called " << CalleeVal->getName()
3028 << (FoundMultipleCalleeChains
3029 ?
" (found multiple possible chains)"
3032 if (FoundMultipleCalleeChains)
3033 FoundProfiledCalleeNonUniquelyCount++;
3040bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3041 Instruction *Call2) {
3043 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3045 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3048 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3050 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3052 return CalleeFunc1 == CalleeFunc2;
3055bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3056 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3057 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3058 bool &FoundMultipleCalleeChains) {
3064 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3067 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3068 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3071 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3072 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3073 CallsiteInfo *NewCallsiteInfo =
3074 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3075 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3082 bool FoundSingleCalleeChain =
false;
3085 !isPrevailing(CurCallee.
getGUID(), S.get()))
3090 auto FSVI = CurCallee;
3093 FSVI = AS->getAliaseeVI();
3094 for (
auto &CallEdge :
FS->calls()) {
3095 if (!CallEdge.second.hasTailCall())
3097 if (CallEdge.first == ProfiledCallee) {
3098 if (FoundSingleCalleeChain) {
3099 FoundMultipleCalleeChains =
true;
3102 FoundSingleCalleeChain =
true;
3103 FoundProfiledCalleeCount++;
3104 FoundProfiledCalleeDepth +=
Depth;
3105 if (
Depth > FoundProfiledCalleeMaxDepth)
3106 FoundProfiledCalleeMaxDepth =
Depth;
3107 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3109 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3110 FSToVIMap[
FS] = FSVI;
3111 }
else if (findProfiledCalleeThroughTailCalls(
3112 ProfiledCallee, CallEdge.first,
Depth + 1,
3113 FoundCalleeChain, FoundMultipleCalleeChains)) {
3116 assert(!FoundMultipleCalleeChains);
3117 if (FoundSingleCalleeChain) {
3118 FoundMultipleCalleeChains =
true;
3121 FoundSingleCalleeChain =
true;
3122 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3124 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3125 FSToVIMap[
FS] = FSVI;
3126 }
else if (FoundMultipleCalleeChains)
3131 return FoundSingleCalleeChain;
3134const FunctionSummary *
3135IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3137 if (
Callee.getSummaryList().empty())
3142bool IndexCallsiteContextGraph::calleeMatchesFunc(
3143 IndexCall &
Call,
const FunctionSummary *Func,
3144 const FunctionSummary *CallerFunc,
3145 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3149 AliasSummary *Alias =
3150 Callee.getSummaryList().empty()
3153 assert(FSToVIMap.count(Func));
3154 auto FuncVI = FSToVIMap[
Func];
3155 if (Callee == FuncVI ||
3170 bool FoundMultipleCalleeChains =
false;
3171 if (!findProfiledCalleeThroughTailCalls(
3172 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3173 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3174 <<
" from " << FSToVIMap[CallerFunc]
3175 <<
" that actually called " << Callee
3176 << (FoundMultipleCalleeChains
3177 ?
" (found multiple possible chains)"
3180 if (FoundMultipleCalleeChains)
3181 FoundProfiledCalleeNonUniquelyCount++;
3188bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3191 return Callee1 == Callee2;
3194template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3195void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3201template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3202void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3203 raw_ostream &OS)
const {
3204 OS <<
"Node " <<
this <<
"\n";
3208 OS <<
" (recursive)";
3210 if (!MatchingCalls.
empty()) {
3211 OS <<
"\tMatchingCalls:\n";
3212 for (
auto &MatchingCall : MatchingCalls) {
3214 MatchingCall.print(OS);
3218 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3220 OS <<
"\tContextIds:";
3222 auto ContextIds = getContextIds();
3223 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3224 std::sort(SortedIds.begin(), SortedIds.end());
3225 for (
auto Id : SortedIds)
3228 OS <<
"\tCalleeEdges:\n";
3229 for (
auto &
Edge : CalleeEdges)
3230 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3232 OS <<
"\tCallerEdges:\n";
3233 for (
auto &
Edge : CallerEdges)
3234 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3236 if (!Clones.empty()) {
3239 for (
auto *
C : Clones) {
3243 OS <<
C <<
" NodeId: " <<
C->NodeId;
3246 }
else if (CloneOf) {
3247 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3251template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3259void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3260 raw_ostream &OS)
const {
3261 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3262 << (IsBackedge ?
" (BE)" :
"")
3264 OS <<
" ContextIds:";
3265 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3266 std::sort(SortedIds.begin(), SortedIds.end());
3267 for (
auto Id : SortedIds)
3271template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3272void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3276template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3277void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3278 raw_ostream &OS)
const {
3279 OS <<
"Callsite Context Graph:\n";
3280 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3282 if (
Node->isRemoved())
3289template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3290void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3291 raw_ostream &OS)
const {
3292 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3294 if (
Node->isRemoved())
3296 if (!
Node->IsAllocation)
3298 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3299 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3300 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3301 std::sort(SortedIds.begin(), SortedIds.end());
3302 for (
auto Id : SortedIds) {
3303 auto TypeI = ContextIdToAllocationType.
find(Id);
3304 assert(TypeI != ContextIdToAllocationType.
end());
3305 auto CSI = ContextIdToContextSizeInfos.
find(Id);
3306 if (CSI != ContextIdToContextSizeInfos.
end()) {
3307 for (
auto &
Info : CSI->second) {
3308 OS <<
"MemProf hinting: "
3310 <<
" full allocation context " <<
Info.FullStackId
3311 <<
" with total size " <<
Info.TotalSize <<
" is "
3313 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3315 <<
" due to cold byte percent";
3317 OS <<
" (context id " <<
Id <<
")";
3325template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3327 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3330 for (
auto &
Edge :
Node->CallerEdges)
3335template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3337 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3338 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3340 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3356 return G->NodeOwner.begin()->get();
3359 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3360 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3379template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3393 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3399 std::string LabelString =
3400 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3403 LabelString +=
"\n";
3404 if (
Node->hasCall()) {
3405 auto Func =
G->NodeToCallingFunc.find(
Node);
3406 assert(Func !=
G->NodeToCallingFunc.end());
3408 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3410 LabelString +=
"null call";
3411 if (
Node->Recursive)
3412 LabelString +=
" (recursive)";
3414 LabelString +=
" (external)";
3420 auto ContextIds =
Node->getContextIds();
3424 bool Highlight =
false;
3433 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3434 getContextIds(ContextIds) +
"\"")
3438 AttributeString +=
",fontsize=\"30\"";
3440 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3442 if (
Node->CloneOf) {
3443 AttributeString +=
",color=\"blue\"";
3444 AttributeString +=
",style=\"filled,bold,dashed\"";
3446 AttributeString +=
",style=\"filled\"";
3447 return AttributeString;
3452 auto &Edge = *(ChildIter.getCurrent());
3457 bool Highlight =
false;
3466 auto Color = getColor(Edge->AllocTypes, Highlight);
3467 std::string AttributeString =
3468 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3470 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3473 if (Edge->IsBackedge)
3474 AttributeString +=
",style=\"dotted\"";
3477 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3478 return AttributeString;
3484 if (
Node->isRemoved())
3497 std::string IdString =
"ContextIds:";
3498 if (ContextIds.
size() < 100) {
3499 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3500 std::sort(SortedIds.begin(), SortedIds.end());
3501 for (
auto Id : SortedIds)
3502 IdString += (
" " +
Twine(Id)).str();
3504 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3509 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3515 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3517 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3518 if (AllocTypes == (uint8_t)AllocationType::Cold)
3519 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3521 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3522 return Highlight ?
"magenta" :
"mediumorchid1";
3526 static std::string getNodeId(NodeRef Node) {
3527 std::stringstream SStream;
3528 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3529 std::string
Result = SStream.str();
3538template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3543template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3544void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3545 std::string Label)
const {
3550template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3551typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3552CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3553 const std::shared_ptr<ContextEdge> &
Edge,
3554 DenseSet<uint32_t> ContextIdsToMove) {
3556 assert(NodeToCallingFunc.count(Node));
3557 ContextNode *Clone =
3558 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3559 Node->addClone(Clone);
3560 Clone->MatchingCalls =
Node->MatchingCalls;
3561 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3566template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3567void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3568 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3569 ContextNode *NewCallee,
bool NewClone,
3570 DenseSet<uint32_t> ContextIdsToMove) {
3573 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3575 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3577 ContextNode *OldCallee =
Edge->Callee;
3581 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3585 if (ContextIdsToMove.
empty())
3586 ContextIdsToMove =
Edge->getContextIds();
3590 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3593 NewCallee->AllocTypes |=
Edge->AllocTypes;
3595 if (ExistingEdgeToNewCallee) {
3598 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3599 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3600 assert(
Edge->ContextIds == ContextIdsToMove);
3601 removeEdgeFromGraph(
Edge.get());
3604 Edge->Callee = NewCallee;
3605 NewCallee->CallerEdges.push_back(
Edge);
3607 OldCallee->eraseCallerEdge(
Edge.get());
3614 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3615 if (ExistingEdgeToNewCallee) {
3618 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3619 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3622 auto NewEdge = std::make_shared<ContextEdge>(
3623 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3624 Edge->Caller->CalleeEdges.push_back(NewEdge);
3625 NewCallee->CallerEdges.push_back(NewEdge);
3629 NewCallee->AllocTypes |= CallerEdgeAllocType;
3631 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3636 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3637 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3641 if (CalleeToUse == OldCallee) {
3645 if (EdgeIsRecursive) {
3649 CalleeToUse = NewCallee;
3653 DenseSet<uint32_t> EdgeContextIdsToMove =
3655 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3656 OldCalleeEdge->AllocTypes =
3657 computeAllocType(OldCalleeEdge->getContextIds());
3664 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3665 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3666 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3670 auto NewEdge = std::make_shared<ContextEdge>(
3671 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3672 EdgeContextIdsToMove);
3673 NewCallee->CalleeEdges.push_back(NewEdge);
3674 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3678 OldCallee->AllocTypes = OldCallee->computeAllocType();
3680 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3681 OldCallee->emptyContextIds());
3685 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3688 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3694template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3695void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3696 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3697 ContextNode *NewCaller) {
3698 auto *OldCallee =
Edge->Callee;
3699 auto *NewCallee = OldCallee;
3702 bool Recursive =
Edge->Caller ==
Edge->Callee;
3704 NewCallee = NewCaller;
3706 ContextNode *OldCaller =
Edge->Caller;
3707 OldCaller->eraseCalleeEdge(
Edge.get());
3711 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3713 if (ExistingEdgeToNewCaller) {
3716 ExistingEdgeToNewCaller->getContextIds().insert_range(
3717 Edge->getContextIds());
3718 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3719 Edge->ContextIds.clear();
3720 Edge->AllocTypes = (uint8_t)AllocationType::None;
3721 OldCallee->eraseCallerEdge(
Edge.get());
3724 Edge->Caller = NewCaller;
3725 NewCaller->CalleeEdges.push_back(
Edge);
3727 assert(NewCallee == NewCaller);
3730 Edge->Callee = NewCallee;
3731 NewCallee->CallerEdges.push_back(
Edge);
3732 OldCallee->eraseCallerEdge(
Edge.get());
3738 NewCaller->AllocTypes |=
Edge->AllocTypes;
3745 bool IsNewNode = NewCaller->CallerEdges.empty();
3754 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3755 auto OldCallerCaller = OldCallerEdge->Caller;
3759 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3760 if (OldCaller == OldCallerCaller) {
3761 OldCallerCaller = NewCaller;
3767 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3768 OldCallerEdge->AllocTypes =
3769 computeAllocType(OldCallerEdge->getContextIds());
3774 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3778 if (ExistingCallerEdge) {
3779 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3780 ExistingCallerEdge->AllocTypes |=
3781 computeAllocType(EdgeContextIdsToMove);
3784 auto NewEdge = std::make_shared<ContextEdge>(
3785 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3786 EdgeContextIdsToMove);
3787 NewCaller->CallerEdges.push_back(NewEdge);
3788 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3793 OldCaller->AllocTypes = OldCaller->computeAllocType();
3795 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3796 OldCaller->emptyContextIds());
3800 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3803 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3809template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3810void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3811 recursivelyRemoveNoneTypeCalleeEdges(
3812 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3817 removeNoneTypeCalleeEdges(Node);
3819 for (
auto *Clone :
Node->Clones)
3820 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3824 auto CallerEdges =
Node->CallerEdges;
3825 for (
auto &
Edge : CallerEdges) {
3827 if (
Edge->isRemoved()) {
3831 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3836template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3837void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3842 DenseSet<const ContextNode *> Visited;
3843 DenseSet<const ContextNode *> CurrentStack;
3844 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3846 if (
Node->isRemoved())
3849 if (!
Node->CallerEdges.empty())
3851 markBackedges(Node, Visited, CurrentStack);
3857template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3858void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3859 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3860 DenseSet<const ContextNode *> &CurrentStack) {
3861 auto I = Visited.
insert(Node);
3865 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3866 auto *
Callee = CalleeEdge->Callee;
3867 if (Visited.
count(Callee)) {
3870 if (CurrentStack.
count(Callee))
3871 CalleeEdge->IsBackedge =
true;
3874 CurrentStack.
insert(Callee);
3875 markBackedges(Callee, Visited, CurrentStack);
3876 CurrentStack.
erase(Callee);
3880template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3881void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3882 DenseSet<const ContextNode *> Visited;
3883 for (
auto &Entry : AllocationCallToContextNodeMap) {
3885 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3888 for (
auto &Entry : AllocationCallToContextNodeMap)
3889 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3902template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3903void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3904 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3905 const DenseSet<uint32_t> &AllocContextIds) {
3915 if (!
Node->hasCall())
3934 auto CallerEdges =
Node->CallerEdges;
3935 for (
auto &
Edge : CallerEdges) {
3937 if (
Edge->isRemoved()) {
3943 if (
Edge->IsBackedge) {
3950 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3951 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3974 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3978 [&](
const std::shared_ptr<ContextEdge> &
A,
3979 const std::shared_ptr<ContextEdge> &
B) {
3982 if (A->ContextIds.empty())
3988 if (B->ContextIds.empty())
3991 if (A->AllocTypes == B->AllocTypes)
3994 return *A->ContextIds.begin() < *B->ContextIds.begin();
3995 return AllocTypeCloningPriority[A->AllocTypes] <
3996 AllocTypeCloningPriority[B->AllocTypes];
3999 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4001 DenseSet<uint32_t> RecursiveContextIds;
4006 DenseSet<uint32_t> AllCallerContextIds;
4007 for (
auto &CE :
Node->CallerEdges) {
4010 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4011 for (
auto Id :
CE->getContextIds())
4012 if (!AllCallerContextIds.
insert(Id).second)
4013 RecursiveContextIds.
insert(Id);
4023 auto CallerEdges =
Node->CallerEdges;
4024 for (
auto &CallerEdge : CallerEdges) {
4026 if (CallerEdge->isRemoved()) {
4030 assert(CallerEdge->Callee == Node);
4039 if (!CallerEdge->Caller->hasCall())
4044 auto CallerEdgeContextsForAlloc =
4046 if (!RecursiveContextIds.
empty())
4047 CallerEdgeContextsForAlloc =
4049 if (CallerEdgeContextsForAlloc.empty())
4052 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4056 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4057 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4058 for (
auto &CalleeEdge :
Node->CalleeEdges)
4059 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4060 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4076 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4077 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4078 if (!CallerEdge->IsBackedge &&
4079 allocTypeToUse(CallerAllocTypeForAlloc) ==
4080 allocTypeToUse(
Node->AllocTypes) &&
4081 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4082 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4086 if (CallerEdge->IsBackedge) {
4090 DeferredBackedges++;
4103 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4104 !Visited.
count(CallerEdge->Caller)) {
4105 const auto OrigIdCount = CallerEdge->getContextIds().size();
4108 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4109 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4113 bool UpdatedEdge =
false;
4114 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4115 for (
auto E :
Node->CallerEdges) {
4117 if (
E->Caller->CloneOf != CallerEdge->Caller)
4121 auto CallerEdgeContextsForAllocNew =
4123 if (CallerEdgeContextsForAllocNew.empty())
4133 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4143 if (CallerEdge->isRemoved())
4153 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4154 if (CallerEdgeContextsForAlloc.empty())
4159 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4160 CalleeEdgeAllocTypesForCallerEdge.clear();
4161 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4162 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4163 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4169 ContextNode *Clone =
nullptr;
4170 for (
auto *CurClone :
Node->Clones) {
4171 if (allocTypeToUse(CurClone->AllocTypes) !=
4172 allocTypeToUse(CallerAllocTypeForAlloc))
4179 assert(!BothSingleAlloc ||
4180 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4186 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4187 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4195 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4196 CallerEdgeContextsForAlloc);
4198 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4201 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4208 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4214void ModuleCallsiteContextGraph::updateAllocationCall(
4219 "memprof", AllocTypeString);
4222 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4223 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4225 <<
" marked with memprof allocation attribute "
4226 <<
ore::NV(
"Attribute", AllocTypeString));
4229void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4233 assert(AI->Versions.size() >
Call.cloneNo());
4238ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4240 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4241 return AllocationType::None;
4242 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4243 ? AllocationType::Cold
4244 : AllocationType::NotCold;
4248IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4250 assert(AI->Versions.size() >
Call.cloneNo());
4254void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4255 FuncInfo CalleeFunc) {
4256 auto *CurF = getCalleeFunc(CallerCall.call());
4257 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4264 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4266 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4268 MismatchedCloneAssignments++;
4271 if (NewCalleeCloneNo > 0)
4272 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4273 OREGetter(CallerCall.call()->getFunction())
4274 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4275 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4276 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4277 <<
" assigned to call function clone "
4278 <<
ore::NV(
"Callee", CalleeFunc.func()));
4281void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4282 FuncInfo CalleeFunc) {
4285 "Caller cannot be an allocation which should not have profiled calls");
4286 assert(CI->Clones.size() > CallerCall.cloneNo());
4287 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4288 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4293 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4295 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4297 MismatchedCloneAssignments++;
4299 CurCalleeCloneNo = NewCalleeCloneNo;
4311 SP->replaceLinkageName(MDName);
4315 TempDISubprogram NewDecl = Decl->
clone();
4316 NewDecl->replaceLinkageName(MDName);
4320CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4322ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4323 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4324 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4329 assert(!
Func.func()->getParent()->getFunction(Name));
4330 NewFunc->setName(Name);
4332 for (
auto &Inst : CallsWithMetadataInFunc) {
4334 assert(Inst.cloneNo() == 0);
4337 OREGetter(
Func.func())
4338 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4339 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4340 return {NewFunc, CloneNo};
4343CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4344 IndexCall>::FuncInfo
4345IndexCallsiteContextGraph::cloneFunctionForCallsite(
4346 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4347 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4361 for (
auto &Inst : CallsWithMetadataInFunc) {
4363 assert(Inst.cloneNo() == 0);
4365 assert(AI->Versions.size() == CloneNo);
4368 AI->Versions.push_back(0);
4371 assert(CI && CI->Clones.size() == CloneNo);
4374 CI->Clones.push_back(0);
4376 CallMap[Inst] = {Inst.call(), CloneNo};
4378 return {
Func.func(), CloneNo};
4395template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4396void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4402 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4403 for (
auto &Entry : AllocationCallToContextNodeMap) {
4405 for (
auto Id :
Node->getContextIds())
4406 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4407 for (
auto *Clone :
Node->Clones) {
4408 for (
auto Id : Clone->getContextIds())
4409 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4416 DenseSet<const ContextNode *> Visited;
4417 for (
auto &Entry : AllocationCallToContextNodeMap) {
4420 mergeClones(Node, Visited, ContextIdToAllocationNode);
4426 auto Clones =
Node->Clones;
4427 for (
auto *Clone : Clones)
4428 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4432 dbgs() <<
"CCG after merging:\n";
4436 exportToDot(
"aftermerge");
4444template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4445void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4446 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4447 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4457 bool FoundUnvisited =
true;
4459 while (FoundUnvisited) {
4461 FoundUnvisited =
false;
4464 auto CallerEdges =
Node->CallerEdges;
4465 for (
auto CallerEdge : CallerEdges) {
4467 if (CallerEdge->Callee != Node)
4472 FoundUnvisited =
true;
4473 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4477 TotalMergeInvokes++;
4478 TotalMergeIters += Iters;
4479 if (Iters > MaxMergeIters)
4480 MaxMergeIters = Iters;
4483 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4486template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4487void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4488 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4489 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4491 if (
Node->emptyContextIds())
4496 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4497 OrigNodeToCloneEdges;
4498 for (
const auto &
E :
Node->CalleeEdges) {
4503 OrigNodeToCloneEdges[
Base].push_back(
E);
4509 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4510 const std::shared_ptr<ContextEdge> &
B) {
4511 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4512 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4513 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4515 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4519 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4524 for (
auto Entry : OrigNodeToCloneEdges) {
4527 auto &CalleeEdges =
Entry.second;
4528 auto NumCalleeClones = CalleeEdges.size();
4530 if (NumCalleeClones == 1)
4541 DenseSet<ContextNode *> OtherCallersToShareMerge;
4542 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4543 OtherCallersToShareMerge);
4548 ContextNode *MergeNode =
nullptr;
4549 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4550 for (
auto CalleeEdge : CalleeEdges) {
4551 auto *OrigCallee = CalleeEdge->Callee;
4557 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4558 MergeNode = OrigCallee;
4559 NonNewMergedNodes++;
4566 if (!OtherCallersToShareMerge.
empty()) {
4567 bool MoveAllCallerEdges =
true;
4568 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4569 if (CalleeCallerE == CalleeEdge)
4571 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4572 MoveAllCallerEdges =
false;
4578 if (MoveAllCallerEdges) {
4579 MergeNode = OrigCallee;
4580 NonNewMergedNodes++;
4587 assert(MergeNode != OrigCallee);
4588 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4591 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4596 if (!OtherCallersToShareMerge.
empty()) {
4600 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4601 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4602 if (CalleeCallerE == CalleeEdge)
4604 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4606 CallerToMoveCount[CalleeCallerE->Caller]++;
4607 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4611 removeNoneTypeCalleeEdges(OrigCallee);
4612 removeNoneTypeCalleeEdges(MergeNode);
4630template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4631void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4632 findOtherCallersToShareMerge(
4634 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4635 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4636 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4637 auto NumCalleeClones = CalleeEdges.size();
4640 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4643 unsigned PossibleOtherCallerNodes = 0;
4647 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4653 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4654 for (
auto CalleeEdge : CalleeEdges) {
4655 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4658 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4659 if (CalleeCallerEdges->Caller == Node) {
4660 assert(CalleeCallerEdges == CalleeEdge);
4663 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4666 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4668 PossibleOtherCallerNodes++;
4672 for (
auto Id : CalleeEdge->getContextIds()) {
4673 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4677 MissingAllocForContextId++;
4680 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4687 for (
auto CalleeEdge : CalleeEdges) {
4688 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4690 if (!PossibleOtherCallerNodes)
4692 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4694 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4696 if (CalleeCallerE == CalleeEdge)
4700 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4705 for (
auto Id : CalleeCallerE->getContextIds()) {
4706 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4711 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4712 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4713 PossibleOtherCallerNodes--;
4720 if (!PossibleOtherCallerNodes)
4725 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4726 if (
Count != NumCalleeClones)
4728 OtherCallersToShareMerge.
insert(OtherCaller);
4773template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4774bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4781 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4785 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4786 const FuncInfo &CalleeFunc) {
4788 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4792 struct FuncCloneInfo {
4797 DenseMap<CallInfo, CallInfo> CallMap;
4825 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4826 UnassignedCallClones;
4830 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4831 FuncInfo OrigFunc(Func);
4836 std::vector<FuncCloneInfo> FuncCloneInfos;
4837 for (
auto &
Call : CallsWithMetadata) {
4838 ContextNode *
Node = getNodeForInst(
Call);
4842 if (!Node ||
Node->Clones.empty())
4845 "Not having a call should have prevented cloning");
4849 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4853 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4855 ContextNode *CallsiteClone,
4858 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4860 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4861 DenseMap<CallInfo, CallInfo> &CallMap =
4862 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4863 CallInfo CallClone(
Call);
4864 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4865 CallClone = It->second;
4866 CallsiteClone->setCall(CallClone);
4868 for (
auto &MatchingCall :
Node->MatchingCalls) {
4869 CallInfo CallClone(MatchingCall);
4870 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4871 CallClone = It->second;
4873 MatchingCall = CallClone;
4881 auto MoveEdgeToNewCalleeCloneAndSetUp =
4882 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4883 ContextNode *OrigCallee =
Edge->Callee;
4884 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4885 removeNoneTypeCalleeEdges(NewClone);
4886 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4890 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4891 RecordCalleeFuncOfCallsite(
4892 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4899 std::deque<ContextNode *> ClonesWorklist;
4901 if (!
Node->emptyContextIds())
4902 ClonesWorklist.push_back(Node);
4908 unsigned NodeCloneCount = 0;
4909 while (!ClonesWorklist.empty()) {
4910 ContextNode *Clone = ClonesWorklist.front();
4911 ClonesWorklist.pop_front();
4920 if (FuncCloneInfos.size() < NodeCloneCount) {
4922 if (NodeCloneCount == 1) {
4927 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4928 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4932 FuncCloneInfos.push_back(
4933 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4934 AssignCallsiteCloneToFuncClone(
4935 OrigFunc,
Call, Clone,
4936 AllocationCallToContextNodeMap.count(
Call));
4937 for (
auto &CE : Clone->CallerEdges) {
4939 if (!
CE->Caller->hasCall())
4941 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4951 FuncInfo PreviousAssignedFuncClone;
4953 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4954 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4956 bool CallerAssignedToCloneOfFunc =
false;
4957 if (EI != Clone->CallerEdges.end()) {
4958 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4959 PreviousAssignedFuncClone =
4960 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4961 CallerAssignedToCloneOfFunc =
true;
4966 DenseMap<CallInfo, CallInfo> NewCallMap;
4967 unsigned CloneNo = FuncCloneInfos.size();
4968 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4969 "should already exist in the map");
4970 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4971 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4972 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4973 FunctionClonesAnalysis++;
4979 if (!CallerAssignedToCloneOfFunc) {
4980 AssignCallsiteCloneToFuncClone(
4981 NewFuncClone,
Call, Clone,
4982 AllocationCallToContextNodeMap.count(
Call));
4983 for (
auto &CE : Clone->CallerEdges) {
4985 if (!
CE->Caller->hasCall())
4987 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4999 auto CallerEdges = Clone->CallerEdges;
5000 for (
auto CE : CallerEdges) {
5002 if (
CE->isRemoved()) {
5008 if (!
CE->Caller->hasCall())
5011 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5015 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5016 PreviousAssignedFuncClone)
5019 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5032 auto CalleeEdges =
CE->Caller->CalleeEdges;
5033 for (
auto CalleeEdge : CalleeEdges) {
5036 if (CalleeEdge->isRemoved()) {
5041 ContextNode *
Callee = CalleeEdge->Callee;
5045 if (Callee == Clone || !
Callee->hasCall())
5050 if (Callee == CalleeEdge->Caller)
5052 ContextNode *NewClone =
5053 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5056 removeNoneTypeCalleeEdges(Callee);
5064 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5065 OrigCall.setCloneNo(0);
5066 DenseMap<CallInfo, CallInfo> &CallMap =
5067 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5069 CallInfo NewCall(CallMap[OrigCall]);
5071 NewClone->setCall(NewCall);
5073 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5074 CallInfo OrigMatchingCall(MatchingCall);
5075 OrigMatchingCall.setCloneNo(0);
5077 CallInfo NewCall(CallMap[OrigMatchingCall]);
5080 MatchingCall = NewCall;
5089 auto FindFirstAvailFuncClone = [&]() {
5094 for (
auto &CF : FuncCloneInfos) {
5095 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5096 return CF.FuncClone;
5099 "Expected an available func clone for this callsite clone");
5116 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5117 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5121 auto CloneCallerEdges = Clone->CallerEdges;
5122 for (
auto &
Edge : CloneCallerEdges) {
5126 if (
Edge->isRemoved())
5129 if (!
Edge->Caller->hasCall())
5133 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5134 FuncInfo FuncCloneCalledByCaller =
5135 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5145 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5146 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5154 (FuncCloneAssignedToCurCallsiteClone &&
5155 FuncCloneAssignedToCurCallsiteClone !=
5156 FuncCloneCalledByCaller)) {
5171 if (FuncCloneToNewCallsiteCloneMap.count(
5172 FuncCloneCalledByCaller)) {
5173 ContextNode *NewClone =
5174 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5175 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5177 removeNoneTypeCalleeEdges(NewClone);
5180 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5181 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5184 ClonesWorklist.push_back(NewClone);
5188 removeNoneTypeCalleeEdges(Clone);
5196 if (!FuncCloneAssignedToCurCallsiteClone) {
5197 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5199 AssignCallsiteCloneToFuncClone(
5200 FuncCloneCalledByCaller,
Call, Clone,
5201 AllocationCallToContextNodeMap.count(
Call));
5205 assert(FuncCloneAssignedToCurCallsiteClone ==
5206 FuncCloneCalledByCaller);
5215 if (!FuncCloneAssignedToCurCallsiteClone) {
5216 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5217 assert(FuncCloneAssignedToCurCallsiteClone);
5219 AssignCallsiteCloneToFuncClone(
5220 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5221 AllocationCallToContextNodeMap.count(
Call));
5223 assert(FuncCloneToCurNodeCloneMap
5224 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5226 RecordCalleeFuncOfCallsite(
Edge->Caller,
5227 FuncCloneAssignedToCurCallsiteClone);
5247 if (!FuncCloneAssignedToCurCallsiteClone) {
5248 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5249 assert(FuncCloneAssignedToCurCallsiteClone &&
5250 "No available func clone for this callsite clone");
5251 AssignCallsiteCloneToFuncClone(
5252 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5253 AllocationCallToContextNodeMap.contains(
Call));
5258 for (
const auto &PE :
Node->CalleeEdges)
5260 for (
const auto &CE :
Node->CallerEdges)
5262 for (
auto *Clone :
Node->Clones) {
5264 for (
const auto &PE : Clone->CalleeEdges)
5266 for (
const auto &CE : Clone->CallerEdges)
5272 if (FuncCloneInfos.size() < 2)
5278 for (
auto &
Call : CallsWithMetadata) {
5279 ContextNode *
Node = getNodeForInst(
Call);
5280 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5286 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5290 DenseSet<unsigned> NodeCallClones;
5291 for (
auto *
C :
Node->Clones)
5292 NodeCallClones.
insert(
C->Call.cloneNo());
5295 for (
auto &FC : FuncCloneInfos) {
5300 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5305 auto &CallVector = UnassignedCallClones[
Node][
I];
5306 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5307 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5308 CallInfo CallClone = It->second;
5309 CallVector.push_back(CallClone);
5313 assert(
false &&
"Expected to find call in CallMap");
5316 for (
auto &MatchingCall :
Node->MatchingCalls) {
5317 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5318 CallInfo CallClone = It->second;
5319 CallVector.push_back(CallClone);
5323 assert(
false &&
"Expected to find call in CallMap");
5331 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5333 auto UpdateCalls = [&](ContextNode *
Node,
5334 DenseSet<const ContextNode *> &Visited,
5335 auto &&UpdateCalls) {
5336 auto Inserted = Visited.insert(Node);
5340 for (
auto *Clone :
Node->Clones)
5341 UpdateCalls(Clone, Visited, UpdateCalls);
5343 for (
auto &
Edge :
Node->CallerEdges)
5344 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5348 if (!
Node->hasCall() ||
Node->emptyContextIds())
5351 if (
Node->IsAllocation) {
5352 auto AT = allocTypeToUse(
Node->AllocTypes);
5358 !ContextIdToContextSizeInfos.
empty()) {
5359 uint64_t TotalCold = 0;
5361 for (
auto Id :
Node->getContextIds()) {
5362 auto TypeI = ContextIdToAllocationType.
find(Id);
5363 assert(TypeI != ContextIdToAllocationType.
end());
5364 auto CSI = ContextIdToContextSizeInfos.
find(Id);
5365 if (CSI != ContextIdToContextSizeInfos.
end()) {
5366 for (
auto &
Info : CSI->second) {
5368 if (TypeI->second == AllocationType::Cold)
5369 TotalCold +=
Info.TotalSize;
5374 AT = AllocationType::Cold;
5376 updateAllocationCall(
Node->Call, AT);
5381 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5384 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5385 updateCall(
Node->Call, CalleeFunc);
5387 for (
auto &
Call :
Node->MatchingCalls)
5388 updateCall(
Call, CalleeFunc);
5392 if (!UnassignedCallClones.
contains(Node))
5394 DenseSet<unsigned> NodeCallClones;
5395 for (
auto *
C :
Node->Clones)
5396 NodeCallClones.
insert(
C->Call.cloneNo());
5398 auto &ClonedCalls = UnassignedCallClones[
Node];
5399 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5403 if (NodeCallClones.
contains(CloneNo))
5406 for (
auto &
Call : CallVector)
5407 updateCall(
Call, CalleeFunc);
5416 DenseSet<const ContextNode *> Visited;
5417 for (
auto &Entry : AllocationCallToContextNodeMap)
5418 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5429 for (
auto &SN : FS->callsites()) {
5434 SN.Clones.size() >
I &&
5435 "Callsite summary has fewer entries than other summaries in function");
5436 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5443 for (
auto &AN : FS->allocs()) {
5447 assert(AN.Versions.size() >
I &&
5448 "Alloc summary has fewer entries than other summaries in function");
5449 if (AN.Versions.size() <=
I ||
5466 NewGV->takeName(DeclGV);
5473 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5474 if (!FuncToAliasMap.count(&
F))
5476 for (
auto *
A : FuncToAliasMap[&
F]) {
5478 auto *PrevA = M.getNamedAlias(AliasName);
5480 A->getType()->getPointerAddressSpace(),
5481 A->getLinkage(), AliasName, NewF);
5482 NewA->copyAttributesFrom(
A);
5484 TakeDeclNameAndReplace(PrevA, NewA);
5493 FunctionsClonedThinBackend++;
5510 for (
unsigned I = 1;
I < NumClones;
I++) {
5511 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5518 FunctionCloneDuplicatesThinBackend++;
5519 auto *Func = HashToFunc[Hash];
5520 if (Func->hasAvailableExternallyLinkage()) {
5526 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5528 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5531 auto *PrevF = M.getFunction(Name);
5534 TakeDeclNameAndReplace(PrevF, Alias);
5536 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5539 CloneFuncAliases(Func,
I);
5543 HashToFunc[Hash] = NewF;
5544 FunctionClonesThinBackend++;
5547 for (
auto &BB : *NewF) {
5548 for (
auto &Inst : BB) {
5549 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5550 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5555 TakeDeclNameAndReplace(PrevF, NewF);
5557 NewF->setName(Name);
5560 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5563 CloneFuncAliases(NewF,
I);
5572 const Function *CallingFunc =
nullptr) {
5591 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5597 if (!SrcFileMD &&
F.isDeclaration()) {
5601 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5606 assert(SrcFileMD || OrigName ==
F.getName());
5608 StringRef SrcFile = M.getSourceFileName();
5620 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5621 F.getName().contains(
'.')) {
5622 OrigName =
F.getName().rsplit(
'.').first;
5631 assert(TheFnVI ||
F.isDeclaration());
5635bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5637 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5638 Symtab = std::make_unique<InstrProfSymtab>();
5649 if (
Error E = Symtab->create(M,
true,
false)) {
5650 std::string SymtabFailure =
toString(std::move(
E));
5651 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5664 auto MIBIter = AllocNode.
MIBs.begin();
5665 for (
auto &MDOp : MemProfMD->
operands()) {
5667 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5672 auto ContextIterBegin =
5676 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5678 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5683 if (LastStackContextId == *ContextIter)
5685 LastStackContextId = *ContextIter;
5686 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5696bool MemProfContextDisambiguation::applyImport(
Module &M) {
5703 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5705 for (
auto &
A :
M.aliases()) {
5706 auto *Aliasee =
A.getAliaseeObject();
5708 FuncToAliasMap[
F].insert(&
A);
5711 if (!initializeIndirectCallPromotionInfo(M))
5718 OptimizationRemarkEmitter ORE(&
F);
5721 bool ClonesCreated =
false;
5722 unsigned NumClonesCreated = 0;
5723 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5733 if (ClonesCreated) {
5734 assert(NumClonesCreated == NumClones);
5741 ClonesCreated =
true;
5742 NumClonesCreated = NumClones;
5745 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5746 Function *CalledFunction, FunctionSummary *
FS) {
5748 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5760 if (CalledFunction != CB->getCalledOperand() &&
5761 (!GA || CalledFunction != GA->getAliaseeObject())) {
5762 SkippedCallsCloning++;
5768 auto CalleeOrigName = CalledFunction->getName();
5769 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5772 if (J > 0 && VMaps[J - 1]->
empty())
5776 if (!StackNode.
Clones[J])
5778 auto NewF =
M.getOrInsertFunction(
5780 CalledFunction->getFunctionType());
5794 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5795 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5797 <<
" assigned to call function clone "
5798 <<
ore::NV(
"Callee", NewF.getCallee()));
5812 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5816 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5818 "enable-import-metadata is needed to emit thinlto_src_module");
5819 StringRef SrcModule =
5822 if (GVS->modulePath() == SrcModule) {
5823 GVSummary = GVS.get();
5827 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5837 if (
FS->allocs().empty() &&
FS->callsites().empty())
5840 auto SI =
FS->callsites().begin();
5841 auto AI =
FS->allocs().begin();
5846 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5849 for (
auto CallsiteIt =
FS->callsites().rbegin();
5850 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5851 auto &Callsite = *CallsiteIt;
5855 if (!Callsite.StackIdIndices.empty())
5857 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5866 for (
auto &BB :
F) {
5867 for (
auto &
I : BB) {
5873 auto *CalledValue = CB->getCalledOperand();
5874 auto *CalledFunction = CB->getCalledFunction();
5875 if (CalledValue && !CalledFunction) {
5876 CalledValue = CalledValue->stripPointerCasts();
5883 assert(!CalledFunction &&
5884 "Expected null called function in callsite for alias");
5888 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5889 I.getMetadata(LLVMContext::MD_callsite));
5890 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5896 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5897 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5898 ? AllocTypeColdThinBackend++
5899 : AllocTypeNotColdThinBackend++;
5900 OrigAllocsThinBackend++;
5901 AllocVersionsThinBackend++;
5902 if (!MaxAllocVersionsThinBackend)
5903 MaxAllocVersionsThinBackend = 1;
5910 auto &AllocNode = *(AI++);
5918 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5920 OrigAllocsThinBackend++;
5921 AllocVersionsThinBackend += AllocNode.Versions.size();
5922 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5923 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5933 if (AllocNode.Versions.size() == 1 &&
5936 AllocationType::NotCold ||
5938 AllocationType::None);
5939 UnclonableAllocsThinBackend++;
5945 return Type == ((uint8_t)AllocationType::NotCold |
5946 (uint8_t)AllocationType::Cold);
5950 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5953 if (J > 0 && VMaps[J - 1]->
empty())
5956 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5959 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5960 : AllocTypeNotColdThinBackend++;
5975 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5976 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5978 <<
" marked with memprof allocation attribute "
5979 <<
ore::NV(
"Attribute", AllocTypeString));
5981 }
else if (!CallsiteContext.empty()) {
5982 if (!CalledFunction) {
5986 assert(!CI || !CI->isInlineAsm());
5996 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6002 CloneFuncIfNeeded(NumClones, FS);
6007 assert(SI !=
FS->callsites().end());
6008 auto &StackNode = *(
SI++);
6014 for (
auto StackId : CallsiteContext) {
6016 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6022 CloneCallsite(StackNode, CB, CalledFunction, FS);
6024 }
else if (CB->isTailCall() && CalledFunction) {
6027 ValueInfo CalleeVI =
6029 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6030 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6031 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6032 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6039 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6049 for (
auto &BB :
F) {
6050 for (
auto &
I : BB) {
6053 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6054 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6062unsigned MemProfContextDisambiguation::recordICPInfo(
6067 uint32_t NumCandidates;
6068 uint64_t TotalCount;
6069 auto CandidateProfileData =
6070 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
6072 if (CandidateProfileData.empty())
6078 bool ICPNeeded =
false;
6079 unsigned NumClones = 0;
6080 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6081 for (
const auto &Candidate : CandidateProfileData) {
6083 auto CalleeValueInfo =
6085 ImportSummary->getValueInfo(Candidate.Value);
6088 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6090 auto &StackNode = *(
SI++);
6095 [](
unsigned CloneNo) { return CloneNo != 0; });
6105 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6106 TotalCount, CallsiteInfoStartIndex});
6110void MemProfContextDisambiguation::performICP(
6112 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6114 OptimizationRemarkEmitter &ORE) {
6121 for (
auto &
Info : ICallAnalysisInfo) {
6124 auto TotalCount =
Info.TotalCount;
6125 unsigned NumPromoted = 0;
6126 unsigned NumClones = 0;
6128 for (
auto &Candidate :
Info.CandidateProfileData) {
6139 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6140 if (TargetFunction ==
nullptr ||
6148 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6149 <<
"Memprof cannot promote indirect call: target with md5sum "
6150 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6159 const char *Reason =
nullptr;
6162 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6163 <<
"Memprof cannot promote indirect call to "
6164 <<
ore::NV(
"TargetFunction", TargetFunction)
6165 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6176 CallBase *CBClone = CB;
6177 for (
unsigned J = 0; J < NumClones; J++) {
6180 if (J > 0 && VMaps[J - 1]->
empty())
6190 TotalCount, isSamplePGO, &ORE);
6191 auto *TargetToUse = TargetFunction;
6194 if (StackNode.
Clones[J]) {
6213 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6215 <<
" promoted and assigned to call function clone "
6216 <<
ore::NV(
"Callee", TargetToUse));
6220 TotalCount -= Candidate.Count;
6225 CallBase *CBClone = CB;
6226 for (
unsigned J = 0; J < NumClones; J++) {
6229 if (J > 0 && VMaps[J - 1]->
empty())
6235 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6238 if (TotalCount != 0)
6240 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6241 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6246template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6247bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6249 dbgs() <<
"CCG before cloning:\n";
6253 exportToDot(
"postbuild");
6266 dbgs() <<
"CCG after cloning:\n";
6270 exportToDot(
"cloned");
6272 bool Changed = assignFunctions();
6275 dbgs() <<
"CCG after assigning function clones:\n";
6279 exportToDot(
"clonefuncassign");
6282 printTotalSizes(
errs());
6287bool MemProfContextDisambiguation::processModule(
6289 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6294 return applyImport(M);
6307 ModuleCallsiteContextGraph CCG(M, OREGetter);
6308 return CCG.process();
6313 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6318 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6322 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6326 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6327 "-memprof-dot-context-id");
6328 if (ImportSummary) {
6338 auto ReadSummaryFile =
6340 if (!ReadSummaryFile) {
6347 if (!ImportSummaryForTestingOrErr) {
6353 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6354 ImportSummary = ImportSummaryForTesting.get();
6363 if (!processModule(M, OREGetter))
6380 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6396 for (
auto &BB :
F) {
6397 for (
auto &
I : BB) {
6401 if (CI->hasFnAttr(
"memprof")) {
6402 CI->removeFnAttr(
"memprof");
6405 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6406 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6412 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6413 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...