52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
118 cl::desc(
"Specify the path prefix of the MemProf dot files."));
122 cl::desc(
"Export graph to dot files."));
127 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
137 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
142 "Export only nodes with contexts feeding given "
143 "-memprof-dot-alloc-id"),
145 "Export only nodes with given -memprof-dot-context-id")));
149 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
150 "or to highlight if -memprof-dot-scope=all"));
154 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
155 "highlight otherwise"));
159 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
163 cl::desc(
"Perform verification checks on CallingContextGraph."));
167 cl::desc(
"Perform frequent verification checks on nodes."));
170 "memprof-import-summary",
171 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
177 cl::desc(
"Max depth to recursively search for missing "
178 "frames through tail calls."));
183 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
187 cl::desc(
"Allow cloning of contexts through recursive cycles"));
194 cl::desc(
"Merge clones before assigning functions"));
203 cl::desc(
"Allow cloning of contexts having recursive cycles"));
209 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
220 cl::desc(
"Linking with hot/cold operator new interfaces"));
225 "Require target function definition when promoting indirect calls"));
232 cl::desc(
"Number of largest cold contexts to consider important"));
236 cl::desc(
"Enables edge fixup for important contexts"));
258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
259class CallsiteContextGraph {
261 CallsiteContextGraph() =
default;
262 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
263 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
270 void identifyClones();
277 bool assignFunctions();
284 const CallsiteContextGraph &CCG) {
290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
292 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
294 void exportToDot(std::string Label)
const;
297 struct FuncInfo final
298 :
public std::pair<FuncTy *, unsigned > {
299 using Base = std::pair<FuncTy *, unsigned>;
301 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
302 explicit operator bool()
const {
return this->first !=
nullptr; }
303 FuncTy *func()
const {
return this->first; }
304 unsigned cloneNo()
const {
return this->second; }
308 struct CallInfo final :
public std::pair<CallTy, unsigned > {
309 using Base = std::pair<CallTy, unsigned>;
311 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
313 explicit operator bool()
const {
return (
bool)this->first; }
314 CallTy call()
const {
return this->first; }
315 unsigned cloneNo()
const {
return this->second; }
316 void setCloneNo(
unsigned N) { this->second =
N; }
318 if (!
operator bool()) {
324 OS <<
"\t(clone " << cloneNo() <<
")";
350 bool Recursive =
false;
377 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
381 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
385 bool useCallerEdgesForContextInfo()
const {
390 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
408 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
409 Count += Edge->getContextIds().size();
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges)
423 uint8_t computeAllocType()
const {
428 CalleeEdges, useCallerEdgesForContextInfo()
430 : std::vector<std::shared_ptr<ContextEdge>>());
431 for (
const auto &Edge : Edges) {
442 bool emptyContextIds()
const {
444 CalleeEdges, useCallerEdgesForContextInfo()
446 : std::vector<std::shared_ptr<ContextEdge>>());
447 for (
const auto &Edge : Edges) {
448 if (!Edge->getContextIds().empty())
455 std::vector<ContextNode *> Clones;
458 ContextNode *CloneOf =
nullptr;
460 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
462 ContextNode(
bool IsAllocation, CallInfo
C)
463 : IsAllocation(IsAllocation),
Call(
C) {}
465 void addClone(ContextNode *Clone) {
467 CloneOf->Clones.push_back(Clone);
468 Clone->CloneOf = CloneOf;
470 Clones.push_back(Clone);
472 Clone->CloneOf =
this;
476 ContextNode *getOrigNode() {
483 unsigned int ContextId);
485 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
486 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
487 void eraseCalleeEdge(
const ContextEdge *Edge);
488 void eraseCallerEdge(
const ContextEdge *Edge);
490 void setCall(CallInfo
C) {
Call =
C; }
492 bool hasCall()
const {
return (
bool)
Call.call(); }
498 bool isRemoved()
const {
534 bool IsBackedge =
false;
541 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
542 ContextIds(std::move(ContextIds)) {}
548 inline void clear() {
558 inline bool isRemoved()
const {
559 if (Callee || Caller)
580 void removeNoneTypeCalleeEdges(ContextNode *
Node);
581 void removeNoneTypeCallerEdges(ContextNode *
Node);
583 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
589 template <
class NodeT,
class IteratorT>
590 std::vector<uint64_t>
595 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
598 template <
class NodeT,
class IteratorT>
599 void addStackNodesForMIB(
603 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
608 void updateStackNodes();
617 void fixupImportantContexts();
621 void handleCallsitesWithMultipleTargets();
624 void markBackedges();
634 bool partitionCallsByCallee(
636 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
643 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
650 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
655 struct CallContextInfo {
659 std::vector<uint64_t> StackIds;
673 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
674 bool CalleeIter =
true);
682 void assignStackNodesPostOrder(
696 void propagateDuplicateContextIds(
702 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
710 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
720 calleesMatch(CallTy
Call, EdgeIter &EI,
725 const FuncTy *getCalleeFunc(CallTy
Call) {
726 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
732 bool calleeMatchesFunc(
733 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
734 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
735 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
736 Call, Func, CallerFunc, FoundCalleeChain);
740 bool sameCallee(CallTy Call1, CallTy Call2) {
741 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
746 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
747 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
753 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
759 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
764 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
769 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
770 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
776 FuncInfo cloneFunctionForCallsite(
778 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
779 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
780 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
785 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
786 unsigned CloneNo)
const {
787 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
791 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
792 CallInfo
C = CallInfo()) {
793 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
794 auto *NewNode = NodeOwner.back().get();
796 NodeToCallingFunc[NewNode] =
F;
797 NewNode->NodeId = NodeOwner.size();
802 ContextNode *getNodeForInst(
const CallInfo &
C);
803 ContextNode *getNodeForAlloc(
const CallInfo &
C);
804 ContextNode *getNodeForStackId(
uint64_t StackId);
826 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
833 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
834 ContextNode *NewCallee,
835 bool NewClone =
false,
843 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
844 ContextNode *NewCaller);
855 void mergeNodeCalleeClones(
860 void findOtherCallersToShareMerge(
861 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
889 struct ImportantContextInfo {
891 std::vector<uint64_t> StackIds;
894 unsigned MaxLength = 0;
898 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
907 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
921 auto Size = StackIds.size();
922 for (
auto Id : Ids) {
923 auto &Entry = ImportantContextIdInfo[Id];
924 Entry.StackIdsToNode[StackIds] =
Node;
926 if (
Size > Entry.MaxLength)
927 Entry.MaxLength =
Size;
936 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
942 unsigned int LastContextId = 0;
945template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
947 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
948template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
950 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
959class ModuleCallsiteContextGraph
960 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
963 ModuleCallsiteContextGraph(
965 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
968 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
971 uint64_t getStackId(uint64_t IdOrIndex)
const;
973 bool calleeMatchesFunc(
974 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
975 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
976 bool sameCallee(Instruction *Call1, Instruction *Call2);
977 bool findProfiledCalleeThroughTailCalls(
978 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
979 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
980 bool &FoundMultipleCalleeChains);
981 uint64_t getLastStackId(Instruction *
Call);
982 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
985 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
986 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
988 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
989 DenseMap<CallInfo, CallInfo> &CallMap,
990 std::vector<CallInfo> &CallsWithMetadataInFunc,
992 std::string getLabel(
const Function *Func,
const Instruction *
Call,
993 unsigned CloneNo)
const;
996 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1002struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1003 IndexCall() : PointerUnion() {}
1004 IndexCall(std::nullptr_t) : IndexCall() {}
1005 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1006 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1007 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1009 IndexCall *operator->() {
return this; }
1011 void print(raw_ostream &OS)
const {
1012 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1037class IndexCallsiteContextGraph
1038 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1041 IndexCallsiteContextGraph(
1042 ModuleSummaryIndex &Index,
1046 ~IndexCallsiteContextGraph() {
1051 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1053 for (
auto &Callsite :
I.second)
1054 FS->addCallsite(*Callsite.second);
1059 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1062 uint64_t getStackId(uint64_t IdOrIndex)
const;
1063 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1064 bool calleeMatchesFunc(
1065 IndexCall &
Call,
const FunctionSummary *Func,
1066 const FunctionSummary *CallerFunc,
1067 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1068 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1069 bool findProfiledCalleeThroughTailCalls(
1070 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1071 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1072 bool &FoundMultipleCalleeChains);
1073 uint64_t getLastStackId(IndexCall &
Call);
1074 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1077 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1078 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1079 IndexCall>::FuncInfo
1080 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1081 DenseMap<CallInfo, CallInfo> &CallMap,
1082 std::vector<CallInfo> &CallsWithMetadataInFunc,
1084 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1085 unsigned CloneNo)
const;
1089 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1091 const ModuleSummaryIndex &Index;
1099 std::unordered_map<FunctionSummary *,
1100 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1101 FunctionCalleesToSynthesizedCallsiteInfos;
1112 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1115 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1136template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1137bool allocTypesMatch(
1138 const std::vector<uint8_t> &InAllocTypes,
1139 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1143 assert(InAllocTypes.size() == Edges.size());
1145 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1147 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1151 if (l == (uint8_t)AllocationType::None ||
1152 r->AllocTypes == (uint8_t)AllocationType::None)
1154 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1163template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1164bool allocTypesMatchClone(
1165 const std::vector<uint8_t> &InAllocTypes,
1166 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1167 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1171 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1175 for (
const auto &
E : Clone->CalleeEdges) {
1177 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1181 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1182 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1184 if (Iter == EdgeCalleeMap.
end())
1192 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1200template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1201typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1202CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1203 const CallInfo &
C) {
1204 ContextNode *
Node = getNodeForAlloc(
C);
1208 return NonAllocationCallToContextNodeMap.lookup(
C);
1211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1212typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1213CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1214 const CallInfo &
C) {
1215 return AllocationCallToContextNodeMap.lookup(
C);
1218template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1219typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1220CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1222 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1223 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1224 return StackEntryNode->second;
1228template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1229void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1231 unsigned int ContextId) {
1232 for (
auto &
Edge : CallerEdges) {
1233 if (
Edge->Caller == Caller) {
1235 Edge->getContextIds().insert(ContextId);
1239 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1240 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1241 CallerEdges.push_back(
Edge);
1245template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1246void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1247 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1263 auto CalleeCallerCount =
Callee->CallerEdges.size();
1264 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1269 }
else if (CalleeIter) {
1271 *EI =
Caller->CalleeEdges.erase(*EI);
1274 *EI =
Callee->CallerEdges.erase(*EI);
1276 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1277 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1280template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1281void CallsiteContextGraph<
1282 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1283 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1285 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1287 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1293template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1294void CallsiteContextGraph<
1295 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1296 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1298 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1300 Edge->Caller->eraseCalleeEdge(
Edge.get());
1301 EI =
Node->CallerEdges.erase(EI);
1307template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1308typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1309CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1310 findEdgeFromCallee(
const ContextNode *Callee) {
1311 for (
const auto &
Edge : CalleeEdges)
1312 if (
Edge->Callee == Callee)
1317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1318typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1319CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1320 findEdgeFromCaller(
const ContextNode *Caller) {
1321 for (
const auto &
Edge : CallerEdges)
1322 if (
Edge->Caller == Caller)
1327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1329 eraseCalleeEdge(
const ContextEdge *
Edge) {
1331 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1332 return CalleeEdge.get() ==
Edge;
1334 assert(EI != CalleeEdges.end());
1335 CalleeEdges.erase(EI);
1338template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1339void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1340 eraseCallerEdge(
const ContextEdge *
Edge) {
1342 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1343 return CallerEdge.get() ==
Edge;
1345 assert(EI != CallerEdges.end());
1346 CallerEdges.erase(EI);
1349template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1350uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1351 DenseSet<uint32_t> &ContextIds)
const {
1353 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1354 uint8_t
AllocType = (uint8_t)AllocationType::None;
1355 for (
auto Id : ContextIds) {
1356 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1366CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1367 const DenseSet<uint32_t> &Node1Ids,
1368 const DenseSet<uint32_t> &Node2Ids)
const {
1370 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1371 uint8_t
AllocType = (uint8_t)AllocationType::None;
1372 for (
auto Id : Node1Ids) {
1373 if (!Node2Ids.
count(Id))
1375 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1383template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1384uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1385 const DenseSet<uint32_t> &Node1Ids,
1386 const DenseSet<uint32_t> &Node2Ids)
const {
1387 if (Node1Ids.
size() < Node2Ids.
size())
1388 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1390 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1393template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1394typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1395CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1396 CallInfo
Call,
const FuncTy *
F) {
1398 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1399 AllocationCallToContextNodeMap[
Call] = AllocNode;
1401 AllocNode->OrigStackOrAllocId = LastContextId;
1404 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1420template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1421template <
class NodeT,
class IteratorT>
1422void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1423 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1426 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1432 ContextIdToAllocationType[++LastContextId] =
AllocType;
1434 bool IsImportant =
false;
1435 if (!ContextSizeInfo.
empty()) {
1436 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1440 uint64_t TotalCold = 0;
1441 for (
auto &CSI : ContextSizeInfo)
1442 TotalCold += CSI.TotalSize;
1448 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1451 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1452 TotalSizeToContextIdTopNCold.erase(
1453 TotalSizeToContextIdTopNCold.begin());
1454 assert(ImportantContextIdInfo.count(IdToRemove));
1455 ImportantContextIdInfo.erase(IdToRemove);
1457 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1461 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1465 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1470 ContextNode *PrevNode = AllocNode;
1474 SmallSet<uint64_t, 8> StackIdSet;
1477 ContextIter != StackContext.
end(); ++ContextIter) {
1478 auto StackId = getStackId(*ContextIter);
1480 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1481 ContextNode *StackNode = getNodeForStackId(StackId);
1483 StackNode = createNewNode(
false);
1484 StackEntryIdToContextNodeMap[StackId] = StackNode;
1485 StackNode->OrigStackOrAllocId = StackId;
1490 auto Ins = StackIdSet.
insert(StackId);
1492 StackNode->Recursive =
true;
1494 StackNode->AllocTypes |= (uint8_t)
AllocType;
1495 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1496 PrevNode = StackNode;
1500template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1502CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1503 const DenseSet<uint32_t> &StackSequenceContextIds,
1504 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1505 DenseSet<uint32_t> NewContextIds;
1506 for (
auto OldId : StackSequenceContextIds) {
1507 NewContextIds.
insert(++LastContextId);
1508 OldToNewContextIds[OldId].insert(LastContextId);
1509 assert(ContextIdToAllocationType.count(OldId));
1511 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1512 if (DotAllocContextIds.
contains(OldId))
1513 DotAllocContextIds.
insert(LastContextId);
1515 return NewContextIds;
1518template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1519void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1520 propagateDuplicateContextIds(
1521 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1523 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1524 DenseSet<uint32_t> NewIds;
1525 for (
auto Id : ContextIds)
1526 if (
auto NewId = OldToNewContextIds.find(Id);
1527 NewId != OldToNewContextIds.end())
1533 auto UpdateCallers = [&](ContextNode *
Node,
1534 DenseSet<const ContextEdge *> &Visited,
1535 auto &&UpdateCallers) ->
void {
1536 for (
const auto &
Edge :
Node->CallerEdges) {
1540 ContextNode *NextNode =
Edge->Caller;
1541 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1544 if (!NewIdsToAdd.
empty()) {
1545 Edge->getContextIds().insert_range(NewIdsToAdd);
1546 UpdateCallers(NextNode, Visited, UpdateCallers);
1551 DenseSet<const ContextEdge *> Visited;
1552 for (
auto &Entry : AllocationCallToContextNodeMap) {
1554 UpdateCallers(Node, Visited, UpdateCallers);
1558template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1559void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1560 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1563 DenseSet<uint32_t> RemainingContextIds) {
1565 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1566 DenseSet<uint32_t> RecursiveContextIds;
1567 DenseSet<uint32_t> AllCallerContextIds;
1572 for (
auto &CE : OrigEdges) {
1573 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1574 for (
auto Id :
CE->getContextIds())
1575 if (!AllCallerContextIds.
insert(Id).second)
1576 RecursiveContextIds.
insert(Id);
1580 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1582 DenseSet<uint32_t> NewEdgeContextIds;
1583 DenseSet<uint32_t> NotFoundContextIds;
1587 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1588 NotFoundContextIds);
1591 if (RecursiveContextIds.
empty()) {
1594 RemainingContextIds.
swap(NotFoundContextIds);
1604 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1606 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1609 if (NewEdgeContextIds.
empty()) {
1613 if (TowardsCallee) {
1614 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1615 auto NewEdge = std::make_shared<ContextEdge>(
1616 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1617 NewNode->CalleeEdges.push_back(NewEdge);
1618 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1620 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1621 auto NewEdge = std::make_shared<ContextEdge>(
1622 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1623 NewNode->CallerEdges.push_back(NewEdge);
1624 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1627 if (
Edge->getContextIds().empty()) {
1628 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1635template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1637 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1641 assert(!Edge->ContextIds.empty());
1644template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1646 bool CheckEdges =
true) {
1647 if (
Node->isRemoved())
1651 auto NodeContextIds =
Node->getContextIds();
1655 if (
Node->CallerEdges.size()) {
1657 Node->CallerEdges.front()->ContextIds);
1661 set_union(CallerEdgeContextIds, Edge->ContextIds);
1668 NodeContextIds == CallerEdgeContextIds ||
1671 if (
Node->CalleeEdges.size()) {
1673 Node->CalleeEdges.front()->ContextIds);
1677 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1683 NodeContextIds == CalleeEdgeContextIds);
1692 for (
const auto &
E :
Node->CalleeEdges)
1698template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1699void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1700 assignStackNodesPostOrder(ContextNode *Node,
1701 DenseSet<const ContextNode *> &Visited,
1702 DenseMap<uint64_t, std::vector<CallContextInfo>>
1703 &StackIdToMatchingCalls,
1704 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1705 const DenseSet<uint32_t> &ImportantContextIds) {
1713 auto CallerEdges =
Node->CallerEdges;
1714 for (
auto &
Edge : CallerEdges) {
1716 if (
Edge->isRemoved()) {
1720 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1721 CallToMatchingCall, ImportantContextIds);
1730 if (
Node->IsAllocation ||
1731 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1734 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1738 if (Calls.size() == 1) {
1739 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1740 if (Ids.size() == 1) {
1741 assert(SavedContextIds.empty());
1743 assert(Node == getNodeForStackId(Ids[0]));
1744 if (
Node->Recursive)
1747 NonAllocationCallToContextNodeMap[
Call] =
Node;
1749 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1757 uint64_t LastId =
Node->OrigStackOrAllocId;
1758 ContextNode *LastNode = getNodeForStackId(LastId);
1761 assert(LastNode == Node);
1763 ContextNode *LastNode =
Node;
1768 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1770 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1771 bool CreatedNode =
false;
1772 for (
unsigned I = 0;
I < Calls.size();
1773 I++, PrevIterCreatedNode = CreatedNode) {
1774 CreatedNode =
false;
1775 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1778 if (SavedContextIds.empty()) {
1785 auto MatchingCall = CallToMatchingCall[
Call];
1786 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1790 assert(
I > 0 && !PrevIterCreatedNode);
1793 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1798 assert(LastId == Ids.back());
1807 ContextNode *PrevNode = LastNode;
1811 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1813 ContextNode *CurNode = getNodeForStackId(Id);
1817 assert(!CurNode->Recursive);
1819 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1831 if (SavedContextIds.empty()) {
1840 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1841 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1843 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1845 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1851 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1856 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1861 for (
auto Id : Ids) {
1862 ContextNode *CurNode = getNodeForStackId(Id);
1869 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1876 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1877 if (PrevEdge->getContextIds().empty())
1878 removeEdgeFromGraph(PrevEdge);
1883 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1884 ? (uint8_t)AllocationType::None
1885 : CurNode->computeAllocType();
1889 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1893 for (
auto Id : Ids) {
1894 ContextNode *CurNode = getNodeForStackId(Id);
1903template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1904void CallsiteContextGraph<DerivedCCG, FuncTy,
1905 CallTy>::fixupImportantContexts() {
1906 if (ImportantContextIdInfo.empty())
1910 NumImportantContextIds = ImportantContextIdInfo.size();
1916 exportToDot(
"beforestackfixup");
1941 for (
auto &[CurContextId,
Info] : ImportantContextIdInfo) {
1942 if (
Info.StackIdsToNode.empty())
1945 ContextNode *PrevNode =
nullptr;
1946 ContextNode *CurNode =
nullptr;
1947 DenseSet<const ContextEdge *> VisitedEdges;
1948 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1951 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1955 auto LenToEnd = AllStackIds.size() -
I;
1963 auto CheckStackIds = AllStackIds.slice(
I, Len);
1964 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1965 if (EntryIt ==
Info.StackIdsToNode.end())
1967 CurNode = EntryIt->second;
1984 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1987 if (CurEdge->getContextIds().insert(CurContextId).second) {
1988 NumFixupEdgeIdsInserted++;
1993 NumFixupEdgesAdded++;
1994 DenseSet<uint32_t> ContextIds({CurContextId});
1995 auto AllocType = computeAllocType(ContextIds);
1996 auto NewEdge = std::make_shared<ContextEdge>(
1997 PrevNode, CurNode,
AllocType, std::move(ContextIds));
1998 PrevNode->CallerEdges.push_back(NewEdge);
1999 CurNode->CalleeEdges.push_back(NewEdge);
2001 CurEdge = NewEdge.get();
2004 VisitedEdges.
insert(CurEdge);
2007 for (
auto &
Edge : PrevNode->CallerEdges) {
2011 Edge->getContextIds().erase(CurContextId);
2019template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2020void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2028 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2029 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2030 for (
auto &
Call : CallsWithMetadata) {
2032 if (AllocationCallToContextNodeMap.count(
Call))
2034 auto StackIdsWithContextNodes =
2035 getStackIdsWithContextNodesForCall(
Call.call());
2038 if (StackIdsWithContextNodes.empty())
2042 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2043 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2053 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2057 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2058 for (
auto &It : StackIdToMatchingCalls) {
2059 auto &Calls = It.getSecond();
2061 if (Calls.size() == 1) {
2062 auto &Ids = Calls[0].StackIds;
2063 if (Ids.size() == 1)
2076 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2077 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2078 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2081 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2082 return A.StackIds.size() >
B.StackIds.size() ||
2083 (
A.StackIds.size() ==
B.StackIds.size() &&
2084 (
A.StackIds <
B.StackIds ||
2085 (
A.StackIds ==
B.StackIds &&
2086 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2092 uint64_t LastId = It.getFirst();
2093 ContextNode *LastNode = getNodeForStackId(LastId);
2097 if (LastNode->Recursive)
2102 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2110 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2113 for (
unsigned I = 0;
I < Calls.size();
I++) {
2114 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2115 assert(SavedContextIds.empty());
2116 assert(LastId == Ids.back());
2121 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2122 MatchingIdsFuncSet.
clear();
2129 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2131 ContextNode *PrevNode = LastNode;
2132 ContextNode *CurNode = LastNode;
2137 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2139 CurNode = getNodeForStackId(Id);
2143 if (CurNode->Recursive) {
2148 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2169 if (StackSequenceContextIds.
empty()) {
2182 if (Ids.back() != getLastStackId(
Call)) {
2183 for (
const auto &PE : LastNode->CallerEdges) {
2184 set_subtract(StackSequenceContextIds, PE->getContextIds());
2185 if (StackSequenceContextIds.
empty())
2189 if (StackSequenceContextIds.
empty())
2201 MatchingIdsFuncSet.
insert(Func);
2208 bool DuplicateContextIds =
false;
2209 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2210 auto &CallCtxInfo = Calls[J];
2211 auto &NextIds = CallCtxInfo.StackIds;
2214 auto *NextFunc = CallCtxInfo.Func;
2215 if (NextFunc != Func) {
2218 DuplicateContextIds =
true;
2221 auto &NextCall = CallCtxInfo.Call;
2222 CallToMatchingCall[NextCall] =
Call;
2233 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2234 StackSequenceContextIds.
size());
2237 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2238 : StackSequenceContextIds;
2239 assert(!SavedContextIds.empty());
2241 if (!DuplicateContextIds) {
2245 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2246 if (LastNodeContextIds.
empty())
2253 propagateDuplicateContextIds(OldToNewContextIds);
2263 DenseSet<const ContextNode *> Visited;
2265 ImportantContextIdInfo.keys());
2266 for (
auto &Entry : AllocationCallToContextNodeMap)
2267 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2268 CallToMatchingCall, ImportantContextIds);
2270 fixupImportantContexts();
2276uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2277 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2279 return CallsiteContext.
back();
2282uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2284 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2287 return Index.getStackIdAtIndex(CallsiteContext.
back());
2309 auto Pos =
F.getName().find_last_of(
'.');
2312 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2318std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2319 const Instruction *
Call,
2320 unsigned CloneNo)
const {
2326std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2327 const IndexCall &
Call,
2328 unsigned CloneNo)
const {
2329 auto VI = FSToVIMap.find(Func);
2330 assert(VI != FSToVIMap.end());
2333 return CallerName +
" -> alloc";
2336 return CallerName +
" -> " +
2338 Callsite->Clones[CloneNo]);
2342std::vector<uint64_t>
2343ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2344 Instruction *
Call) {
2345 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2347 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2351std::vector<uint64_t>
2352IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2354 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2356 return getStackIdsWithContextNodes<CallsiteInfo,
2357 SmallVector<unsigned>::const_iterator>(
2361template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2362template <
class NodeT,
class IteratorT>
2363std::vector<uint64_t>
2364CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2365 CallStack<NodeT, IteratorT> &CallsiteContext) {
2366 std::vector<uint64_t> StackIds;
2367 for (
auto IdOrIndex : CallsiteContext) {
2368 auto StackId = getStackId(IdOrIndex);
2369 ContextNode *
Node = getNodeForStackId(StackId);
2372 StackIds.push_back(StackId);
2377ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2379 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2380 :
Mod(
M), OREGetter(OREGetter) {
2384 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2386 std::vector<CallInfo> CallsWithMetadata;
2387 for (
auto &BB :
F) {
2388 for (
auto &
I : BB) {
2391 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2392 CallsWithMetadata.push_back(&
I);
2393 auto *AllocNode = addAllocNode(&
I, &
F);
2394 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2398 for (
auto &MDOp : MemProfMD->operands()) {
2400 std::vector<ContextTotalSize> ContextSizeInfo;
2402 if (MIBMD->getNumOperands() > 2) {
2403 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2404 MDNode *ContextSizePair =
2413 ContextSizeInfo.push_back({FullStackId, TotalSize});
2419 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2420 AllocNode, StackContext, CallsiteContext,
2422 TotalSizeToContextIdTopNCold);
2427 DotAllocContextIds = AllocNode->getContextIds();
2431 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2432 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2435 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2436 CallsWithMetadata.push_back(&
I);
2440 if (!CallsWithMetadata.empty())
2441 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2445 dbgs() <<
"CCG before updating call stack chains:\n";
2450 exportToDot(
"prestackupdate");
2455 exportToDot(
"poststackupdate");
2457 handleCallsitesWithMultipleTargets();
2462 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2463 for (
auto &
Call : FuncEntry.second)
2464 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2467IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2471 : Index(Index), isPrevailing(isPrevailing) {
2475 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2476 for (
auto &
I : Index) {
2477 auto VI = Index.getValueInfo(
I);
2478 for (
auto &S : VI.getSummaryList()) {
2487 !isPrevailing(VI.getGUID(), S.get()))
2492 std::vector<CallInfo> CallsWithMetadata;
2493 if (!
FS->allocs().empty()) {
2494 for (
auto &AN :
FS->mutableAllocs()) {
2499 if (AN.MIBs.empty())
2501 IndexCall AllocCall(&AN);
2502 CallsWithMetadata.push_back(AllocCall);
2503 auto *AllocNode = addAllocNode(AllocCall, FS);
2511 AN.ContextSizeInfos.size() == AN.MIBs.size());
2513 for (
auto &MIB : AN.MIBs) {
2516 std::vector<ContextTotalSize> ContextSizeInfo;
2517 if (!AN.ContextSizeInfos.empty()) {
2518 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2519 ContextSizeInfo.push_back({FullStackId, TotalSize});
2521 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2522 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2523 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2529 DotAllocContextIds = AllocNode->getContextIds();
2535 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2539 if (!
FS->callsites().empty())
2540 for (
auto &SN :
FS->mutableCallsites()) {
2541 IndexCall StackNodeCall(&SN);
2542 CallsWithMetadata.push_back(StackNodeCall);
2545 if (!CallsWithMetadata.empty())
2546 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2548 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2554 dbgs() <<
"CCG before updating call stack chains:\n";
2559 exportToDot(
"prestackupdate");
2564 exportToDot(
"poststackupdate");
2566 handleCallsitesWithMultipleTargets();
2571template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2572void CallsiteContextGraph<DerivedCCG, FuncTy,
2573 CallTy>::handleCallsitesWithMultipleTargets() {
2588 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2589 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2590 auto *
Node = Entry.second;
2599 std::vector<CallInfo> AllCalls;
2600 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2601 AllCalls.push_back(
Node->Call);
2615 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2618 auto It = AllCalls.begin();
2620 for (; It != AllCalls.end(); ++It) {
2623 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2626 if (!Edge->Callee->hasCall())
2628 assert(NodeToCallingFunc.count(Edge->Callee));
2630 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2639 if (
Node->Call != ThisCall) {
2640 Node->setCall(ThisCall);
2651 Node->MatchingCalls.clear();
2654 if (It == AllCalls.end()) {
2655 RemovedEdgesWithMismatchedCallees++;
2659 Node->setCall(CallInfo());
2664 for (++It; It != AllCalls.end(); ++It) {
2668 Node->MatchingCalls.push_back(ThisCall);
2677 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2678 return !it.second->hasCall() || it.second->Call != it.first;
2682 for (
auto &[
Call,
Node] : NewCallToNode)
2683 NonAllocationCallToContextNodeMap[
Call] =
Node;
2687 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2688 NonAllocationCallToContextNodeMap[
Call] =
Node;
2691template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2692bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2694 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2698 struct CallsWithSameCallee {
2699 std::vector<CallInfo> Calls;
2700 ContextNode *
Node =
nullptr;
2706 for (
auto ThisCall : AllCalls) {
2707 auto *
F = getCalleeFunc(
ThisCall.call());
2709 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2718 for (
const auto &Edge :
Node->CalleeEdges) {
2719 if (!Edge->Callee->hasCall())
2721 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2722 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2723 CalleeNodeToCallInfo[Edge->Callee] =
2724 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2730 if (CalleeNodeToCallInfo.
empty())
2742 ContextNode *UnmatchedCalleesNode =
nullptr;
2744 bool UsedOrigNode =
false;
2749 auto CalleeEdges =
Node->CalleeEdges;
2750 for (
auto &Edge : CalleeEdges) {
2751 if (!Edge->Callee->hasCall())
2756 ContextNode *CallerNodeToUse =
nullptr;
2760 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2761 if (!UnmatchedCalleesNode)
2762 UnmatchedCalleesNode =
2763 createNewNode(
false, NodeToCallingFunc[
Node]);
2764 CallerNodeToUse = UnmatchedCalleesNode;
2768 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2771 if (!UsedOrigNode) {
2774 Node->MatchingCalls.clear();
2775 UsedOrigNode =
true;
2778 createNewNode(
false, NodeToCallingFunc[
Node]);
2782 Info->Node->setCall(
Info->Calls.front());
2788 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2790 CallerNodeToUse =
Info->Node;
2794 if (CallerNodeToUse ==
Node)
2797 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2804 for (
auto &
I : CalleeNodeToCallInfo)
2805 removeNoneTypeCallerEdges(
I.second->Node);
2806 if (UnmatchedCalleesNode)
2807 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2808 removeNoneTypeCallerEdges(
Node);
2818uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2821 return Index.getStackIdAtIndex(IdOrIndex);
2824template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2825bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2826 CallTy
Call, EdgeIter &EI,
2827 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2829 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2830 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2833 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2834 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2839 if (FoundCalleeChain.empty())
2843 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2847 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2848 CurEdge->AllocTypes |=
Edge->AllocTypes;
2853 auto NewEdge = std::make_shared<ContextEdge>(
2854 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2855 Callee->CallerEdges.push_back(NewEdge);
2856 if (Caller ==
Edge->Caller) {
2860 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2863 "Iterator position not restored after insert and increment");
2865 Caller->CalleeEdges.push_back(NewEdge);
2870 auto *CurCalleeNode =
Edge->Callee;
2871 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2872 ContextNode *NewNode =
nullptr;
2874 if (TailCallToContextNodeMap.
count(NewCall)) {
2875 NewNode = TailCallToContextNodeMap[NewCall];
2876 NewNode->AllocTypes |=
Edge->AllocTypes;
2878 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2880 NewNode = createNewNode(
false, Func, NewCall);
2881 TailCallToContextNodeMap[NewCall] = NewNode;
2882 NewNode->AllocTypes =
Edge->AllocTypes;
2886 AddEdge(NewNode, CurCalleeNode);
2888 CurCalleeNode = NewNode;
2892 AddEdge(
Edge->Caller, CurCalleeNode);
2900 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2912bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2913 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2914 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2915 bool &FoundMultipleCalleeChains) {
2922 FoundCalleeChain.push_back({Callsite,
F});
2937 bool FoundSingleCalleeChain =
false;
2938 for (
auto &BB : *CalleeFunc) {
2939 for (
auto &
I : BB) {
2941 if (!CB || !CB->isTailCall())
2943 auto *CalledValue = CB->getCalledOperand();
2944 auto *CalledFunction = CB->getCalledFunction();
2945 if (CalledValue && !CalledFunction) {
2946 CalledValue = CalledValue->stripPointerCasts();
2953 assert(!CalledFunction &&
2954 "Expected null called function in callsite for alias");
2957 if (!CalledFunction)
2959 if (CalledFunction == ProfiledCallee) {
2960 if (FoundSingleCalleeChain) {
2961 FoundMultipleCalleeChains =
true;
2964 FoundSingleCalleeChain =
true;
2965 FoundProfiledCalleeCount++;
2966 FoundProfiledCalleeDepth +=
Depth;
2967 if (
Depth > FoundProfiledCalleeMaxDepth)
2968 FoundProfiledCalleeMaxDepth =
Depth;
2969 SaveCallsiteInfo(&
I, CalleeFunc);
2970 }
else if (findProfiledCalleeThroughTailCalls(
2971 ProfiledCallee, CalledFunction,
Depth + 1,
2972 FoundCalleeChain, FoundMultipleCalleeChains)) {
2975 assert(!FoundMultipleCalleeChains);
2976 if (FoundSingleCalleeChain) {
2977 FoundMultipleCalleeChains =
true;
2980 FoundSingleCalleeChain =
true;
2981 SaveCallsiteInfo(&
I, CalleeFunc);
2982 }
else if (FoundMultipleCalleeChains)
2987 return FoundSingleCalleeChain;
2990const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2992 if (!CB->getCalledOperand() || CB->isIndirectCall())
2994 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3001bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3002 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3003 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3005 if (!CB->getCalledOperand() || CB->isIndirectCall())
3007 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3009 if (CalleeFunc == Func)
3012 if (Alias && Alias->getAliasee() == Func)
3023 bool FoundMultipleCalleeChains =
false;
3024 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3026 FoundMultipleCalleeChains)) {
3027 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3028 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3029 <<
" that actually called " << CalleeVal->getName()
3030 << (FoundMultipleCalleeChains
3031 ?
" (found multiple possible chains)"
3034 if (FoundMultipleCalleeChains)
3035 FoundProfiledCalleeNonUniquelyCount++;
3042bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3043 Instruction *Call2) {
3045 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3047 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3050 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3052 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3054 return CalleeFunc1 == CalleeFunc2;
3057bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3058 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3059 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3060 bool &FoundMultipleCalleeChains) {
3066 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3069 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3070 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3073 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3074 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3075 CallsiteInfo *NewCallsiteInfo =
3076 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3077 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3084 bool FoundSingleCalleeChain =
false;
3087 !isPrevailing(CurCallee.
getGUID(), S.get()))
3092 auto FSVI = CurCallee;
3095 FSVI = AS->getAliaseeVI();
3096 for (
auto &CallEdge :
FS->calls()) {
3097 if (!CallEdge.second.hasTailCall())
3099 if (CallEdge.first == ProfiledCallee) {
3100 if (FoundSingleCalleeChain) {
3101 FoundMultipleCalleeChains =
true;
3104 FoundSingleCalleeChain =
true;
3105 FoundProfiledCalleeCount++;
3106 FoundProfiledCalleeDepth +=
Depth;
3107 if (
Depth > FoundProfiledCalleeMaxDepth)
3108 FoundProfiledCalleeMaxDepth =
Depth;
3109 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3111 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3112 FSToVIMap[
FS] = FSVI;
3113 }
else if (findProfiledCalleeThroughTailCalls(
3114 ProfiledCallee, CallEdge.first,
Depth + 1,
3115 FoundCalleeChain, FoundMultipleCalleeChains)) {
3118 assert(!FoundMultipleCalleeChains);
3119 if (FoundSingleCalleeChain) {
3120 FoundMultipleCalleeChains =
true;
3123 FoundSingleCalleeChain =
true;
3124 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3126 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3127 FSToVIMap[
FS] = FSVI;
3128 }
else if (FoundMultipleCalleeChains)
3133 return FoundSingleCalleeChain;
3136const FunctionSummary *
3137IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3139 if (
Callee.getSummaryList().empty())
3144bool IndexCallsiteContextGraph::calleeMatchesFunc(
3145 IndexCall &
Call,
const FunctionSummary *Func,
3146 const FunctionSummary *CallerFunc,
3147 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3151 AliasSummary *Alias =
3152 Callee.getSummaryList().empty()
3155 assert(FSToVIMap.count(Func));
3156 auto FuncVI = FSToVIMap[
Func];
3157 if (Callee == FuncVI ||
3172 bool FoundMultipleCalleeChains =
false;
3173 if (!findProfiledCalleeThroughTailCalls(
3174 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3175 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3176 <<
" from " << FSToVIMap[CallerFunc]
3177 <<
" that actually called " << Callee
3178 << (FoundMultipleCalleeChains
3179 ?
" (found multiple possible chains)"
3182 if (FoundMultipleCalleeChains)
3183 FoundProfiledCalleeNonUniquelyCount++;
3190bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3193 return Callee1 == Callee2;
3196template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3197void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3203template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3204void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3205 raw_ostream &OS)
const {
3206 OS <<
"Node " <<
this <<
"\n";
3210 OS <<
" (recursive)";
3212 if (!MatchingCalls.empty()) {
3213 OS <<
"\tMatchingCalls:\n";
3214 for (
auto &MatchingCall : MatchingCalls) {
3216 MatchingCall.print(OS);
3220 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3222 OS <<
"\tContextIds:";
3224 auto ContextIds = getContextIds();
3225 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3226 std::sort(SortedIds.begin(), SortedIds.end());
3227 for (
auto Id : SortedIds)
3230 OS <<
"\tCalleeEdges:\n";
3231 for (
auto &
Edge : CalleeEdges)
3232 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3234 OS <<
"\tCallerEdges:\n";
3235 for (
auto &
Edge : CallerEdges)
3236 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3238 if (!Clones.empty()) {
3241 for (
auto *
C : Clones) {
3245 OS <<
C <<
" NodeId: " <<
C->NodeId;
3248 }
else if (CloneOf) {
3249 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3253template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3254void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3261void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3262 raw_ostream &OS)
const {
3263 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3264 << (IsBackedge ?
" (BE)" :
"")
3266 OS <<
" ContextIds:";
3267 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3268 std::sort(SortedIds.begin(), SortedIds.end());
3269 for (
auto Id : SortedIds)
3273template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3274void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3278template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3279void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3280 raw_ostream &OS)
const {
3281 OS <<
"Callsite Context Graph:\n";
3282 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3284 if (
Node->isRemoved())
3291template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3292void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3293 raw_ostream &OS)
const {
3294 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3296 if (
Node->isRemoved())
3298 if (!
Node->IsAllocation)
3300 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3301 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3302 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3303 std::sort(SortedIds.begin(), SortedIds.end());
3304 for (
auto Id : SortedIds) {
3305 auto TypeI = ContextIdToAllocationType.find(Id);
3306 assert(TypeI != ContextIdToAllocationType.end());
3307 auto CSI = ContextIdToContextSizeInfos.find(Id);
3308 if (CSI != ContextIdToContextSizeInfos.end()) {
3309 for (
auto &
Info : CSI->second) {
3310 OS <<
"MemProf hinting: "
3312 <<
" full allocation context " <<
Info.FullStackId
3313 <<
" with total size " <<
Info.TotalSize <<
" is "
3315 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3317 <<
" due to cold byte percent";
3319 OS <<
" (context id " <<
Id <<
")";
3327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3329 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3332 for (
auto &
Edge :
Node->CallerEdges)
3337template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3339 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3340 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3342 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3358 return G->NodeOwner.begin()->get();
3361 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3362 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3381template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3395 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3401 std::string LabelString =
3402 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3405 LabelString +=
"\n";
3406 if (
Node->hasCall()) {
3407 auto Func =
G->NodeToCallingFunc.find(
Node);
3408 assert(Func !=
G->NodeToCallingFunc.end());
3410 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3412 LabelString +=
"null call";
3413 if (
Node->Recursive)
3414 LabelString +=
" (recursive)";
3416 LabelString +=
" (external)";
3422 auto ContextIds =
Node->getContextIds();
3426 bool Highlight =
false;
3435 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3436 getContextIds(ContextIds) +
"\"")
3440 AttributeString +=
",fontsize=\"30\"";
3442 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3444 if (
Node->CloneOf) {
3445 AttributeString +=
",color=\"blue\"";
3446 AttributeString +=
",style=\"filled,bold,dashed\"";
3448 AttributeString +=
",style=\"filled\"";
3449 return AttributeString;
3454 auto &Edge = *(ChildIter.getCurrent());
3459 bool Highlight =
false;
3468 auto Color = getColor(Edge->AllocTypes, Highlight);
3469 std::string AttributeString =
3470 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3472 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3475 if (Edge->IsBackedge)
3476 AttributeString +=
",style=\"dotted\"";
3479 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3480 return AttributeString;
3486 if (
Node->isRemoved())
3499 std::string IdString =
"ContextIds:";
3500 if (ContextIds.
size() < 100) {
3501 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3502 std::sort(SortedIds.begin(), SortedIds.end());
3503 for (
auto Id : SortedIds)
3504 IdString += (
" " +
Twine(Id)).str();
3506 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3511 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3517 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3519 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3520 if (AllocTypes == (uint8_t)AllocationType::Cold)
3521 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3523 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3524 return Highlight ?
"magenta" :
"mediumorchid1";
3528 static std::string getNodeId(NodeRef Node) {
3529 std::stringstream SStream;
3530 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3531 std::string
Result = SStream.str();
3540template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3545template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3546void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3547 std::string Label)
const {
3552template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3553typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3554CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3555 const std::shared_ptr<ContextEdge> &
Edge,
3556 DenseSet<uint32_t> ContextIdsToMove) {
3558 assert(NodeToCallingFunc.count(Node));
3559 ContextNode *Clone =
3560 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3561 Node->addClone(Clone);
3562 Clone->MatchingCalls =
Node->MatchingCalls;
3563 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3568template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3569void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3570 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3571 ContextNode *NewCallee,
bool NewClone,
3572 DenseSet<uint32_t> ContextIdsToMove) {
3575 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3577 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3579 ContextNode *OldCallee =
Edge->Callee;
3583 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3587 if (ContextIdsToMove.
empty())
3588 ContextIdsToMove =
Edge->getContextIds();
3592 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3595 NewCallee->AllocTypes |=
Edge->AllocTypes;
3597 if (ExistingEdgeToNewCallee) {
3600 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3601 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3602 assert(
Edge->ContextIds == ContextIdsToMove);
3603 removeEdgeFromGraph(
Edge.get());
3606 Edge->Callee = NewCallee;
3607 NewCallee->CallerEdges.push_back(
Edge);
3609 OldCallee->eraseCallerEdge(
Edge.get());
3616 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3617 if (ExistingEdgeToNewCallee) {
3620 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3621 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3624 auto NewEdge = std::make_shared<ContextEdge>(
3625 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3626 Edge->Caller->CalleeEdges.push_back(NewEdge);
3627 NewCallee->CallerEdges.push_back(NewEdge);
3631 NewCallee->AllocTypes |= CallerEdgeAllocType;
3633 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3638 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3639 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3643 if (CalleeToUse == OldCallee) {
3647 if (EdgeIsRecursive) {
3651 CalleeToUse = NewCallee;
3655 DenseSet<uint32_t> EdgeContextIdsToMove =
3657 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3658 OldCalleeEdge->AllocTypes =
3659 computeAllocType(OldCalleeEdge->getContextIds());
3666 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3667 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3668 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3672 auto NewEdge = std::make_shared<ContextEdge>(
3673 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3674 EdgeContextIdsToMove);
3675 NewCallee->CalleeEdges.push_back(NewEdge);
3676 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3680 OldCallee->AllocTypes = OldCallee->computeAllocType();
3682 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3683 OldCallee->emptyContextIds());
3687 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3690 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3696template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3697void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3698 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3699 ContextNode *NewCaller) {
3700 auto *OldCallee =
Edge->Callee;
3701 auto *NewCallee = OldCallee;
3704 bool Recursive =
Edge->Caller ==
Edge->Callee;
3706 NewCallee = NewCaller;
3708 ContextNode *OldCaller =
Edge->Caller;
3709 OldCaller->eraseCalleeEdge(
Edge.get());
3713 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3715 if (ExistingEdgeToNewCaller) {
3718 ExistingEdgeToNewCaller->getContextIds().insert_range(
3719 Edge->getContextIds());
3720 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3721 Edge->ContextIds.clear();
3722 Edge->AllocTypes = (uint8_t)AllocationType::None;
3723 OldCallee->eraseCallerEdge(
Edge.get());
3726 Edge->Caller = NewCaller;
3727 NewCaller->CalleeEdges.push_back(
Edge);
3729 assert(NewCallee == NewCaller);
3732 Edge->Callee = NewCallee;
3733 NewCallee->CallerEdges.push_back(
Edge);
3734 OldCallee->eraseCallerEdge(
Edge.get());
3740 NewCaller->AllocTypes |=
Edge->AllocTypes;
3747 bool IsNewNode = NewCaller->CallerEdges.empty();
3756 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3757 auto OldCallerCaller = OldCallerEdge->Caller;
3761 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3762 if (OldCaller == OldCallerCaller) {
3763 OldCallerCaller = NewCaller;
3769 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3770 OldCallerEdge->AllocTypes =
3771 computeAllocType(OldCallerEdge->getContextIds());
3776 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3780 if (ExistingCallerEdge) {
3781 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3782 ExistingCallerEdge->AllocTypes |=
3783 computeAllocType(EdgeContextIdsToMove);
3786 auto NewEdge = std::make_shared<ContextEdge>(
3787 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3788 EdgeContextIdsToMove);
3789 NewCaller->CallerEdges.push_back(NewEdge);
3790 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3795 OldCaller->AllocTypes = OldCaller->computeAllocType();
3797 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3798 OldCaller->emptyContextIds());
3802 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3805 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3811template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3812void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3813 recursivelyRemoveNoneTypeCalleeEdges(
3814 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3819 removeNoneTypeCalleeEdges(Node);
3821 for (
auto *Clone :
Node->Clones)
3822 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3826 auto CallerEdges =
Node->CallerEdges;
3827 for (
auto &
Edge : CallerEdges) {
3829 if (
Edge->isRemoved()) {
3833 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3838template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3839void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3844 DenseSet<const ContextNode *> Visited;
3845 DenseSet<const ContextNode *> CurrentStack;
3846 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3848 if (
Node->isRemoved())
3851 if (!
Node->CallerEdges.empty())
3853 markBackedges(Node, Visited, CurrentStack);
3859template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3860void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3861 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3862 DenseSet<const ContextNode *> &CurrentStack) {
3863 auto I = Visited.
insert(Node);
3867 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3868 auto *
Callee = CalleeEdge->Callee;
3869 if (Visited.
count(Callee)) {
3872 if (CurrentStack.
count(Callee))
3873 CalleeEdge->IsBackedge =
true;
3876 CurrentStack.
insert(Callee);
3877 markBackedges(Callee, Visited, CurrentStack);
3878 CurrentStack.
erase(Callee);
3882template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3883void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3884 DenseSet<const ContextNode *> Visited;
3885 for (
auto &Entry : AllocationCallToContextNodeMap) {
3887 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3890 for (
auto &Entry : AllocationCallToContextNodeMap)
3891 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3904template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3905void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3906 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3907 const DenseSet<uint32_t> &AllocContextIds) {
3917 if (!
Node->hasCall())
3936 auto CallerEdges =
Node->CallerEdges;
3937 for (
auto &
Edge : CallerEdges) {
3939 if (
Edge->isRemoved()) {
3945 if (
Edge->IsBackedge) {
3952 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3953 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3976 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3980 [&](
const std::shared_ptr<ContextEdge> &
A,
3981 const std::shared_ptr<ContextEdge> &
B) {
3984 if (A->ContextIds.empty())
3990 if (B->ContextIds.empty())
3993 if (A->AllocTypes == B->AllocTypes)
3996 return *A->ContextIds.begin() < *B->ContextIds.begin();
3997 return AllocTypeCloningPriority[A->AllocTypes] <
3998 AllocTypeCloningPriority[B->AllocTypes];
4001 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4003 DenseSet<uint32_t> RecursiveContextIds;
4008 DenseSet<uint32_t> AllCallerContextIds;
4009 for (
auto &CE :
Node->CallerEdges) {
4012 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4013 for (
auto Id :
CE->getContextIds())
4014 if (!AllCallerContextIds.
insert(Id).second)
4015 RecursiveContextIds.
insert(Id);
4025 auto CallerEdges =
Node->CallerEdges;
4026 for (
auto &CallerEdge : CallerEdges) {
4028 if (CallerEdge->isRemoved()) {
4032 assert(CallerEdge->Callee == Node);
4041 if (!CallerEdge->Caller->hasCall())
4046 auto CallerEdgeContextsForAlloc =
4048 if (!RecursiveContextIds.
empty())
4049 CallerEdgeContextsForAlloc =
4051 if (CallerEdgeContextsForAlloc.empty())
4054 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4058 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4059 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4060 for (
auto &CalleeEdge :
Node->CalleeEdges)
4061 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4062 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4078 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4079 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4080 if (!CallerEdge->IsBackedge &&
4081 allocTypeToUse(CallerAllocTypeForAlloc) ==
4082 allocTypeToUse(
Node->AllocTypes) &&
4083 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4084 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4088 if (CallerEdge->IsBackedge) {
4092 DeferredBackedges++;
4105 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4106 !Visited.
count(CallerEdge->Caller)) {
4107 const auto OrigIdCount = CallerEdge->getContextIds().size();
4110 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4111 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4115 bool UpdatedEdge =
false;
4116 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4117 for (
auto E :
Node->CallerEdges) {
4119 if (
E->Caller->CloneOf != CallerEdge->Caller)
4123 auto CallerEdgeContextsForAllocNew =
4125 if (CallerEdgeContextsForAllocNew.empty())
4135 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4145 if (CallerEdge->isRemoved())
4155 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4156 if (CallerEdgeContextsForAlloc.empty())
4161 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4162 CalleeEdgeAllocTypesForCallerEdge.clear();
4163 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4164 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4165 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4171 ContextNode *Clone =
nullptr;
4172 for (
auto *CurClone :
Node->Clones) {
4173 if (allocTypeToUse(CurClone->AllocTypes) !=
4174 allocTypeToUse(CallerAllocTypeForAlloc))
4181 assert(!BothSingleAlloc ||
4182 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4188 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4189 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4197 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4198 CallerEdgeContextsForAlloc);
4200 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4203 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4210 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4216void ModuleCallsiteContextGraph::updateAllocationCall(
4221 "memprof", AllocTypeString);
4224 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4225 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4227 <<
" marked with memprof allocation attribute "
4228 <<
ore::NV(
"Attribute", AllocTypeString));
4231void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4235 assert(AI->Versions.size() >
Call.cloneNo());
4240ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4242 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4243 return AllocationType::None;
4244 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4245 ? AllocationType::Cold
4246 : AllocationType::NotCold;
4250IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4252 assert(AI->Versions.size() >
Call.cloneNo());
4256void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4257 FuncInfo CalleeFunc) {
4258 auto *CurF = getCalleeFunc(CallerCall.call());
4259 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4266 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4268 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4270 MismatchedCloneAssignments++;
4273 if (NewCalleeCloneNo > 0)
4274 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4275 OREGetter(CallerCall.call()->getFunction())
4276 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4277 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4278 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4279 <<
" assigned to call function clone "
4280 <<
ore::NV(
"Callee", CalleeFunc.func()));
4283void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4284 FuncInfo CalleeFunc) {
4287 "Caller cannot be an allocation which should not have profiled calls");
4288 assert(CI->Clones.size() > CallerCall.cloneNo());
4289 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4290 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4295 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4297 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4299 MismatchedCloneAssignments++;
4301 CurCalleeCloneNo = NewCalleeCloneNo;
4313 SP->replaceLinkageName(MDName);
4317 TempDISubprogram NewDecl = Decl->
clone();
4318 NewDecl->replaceLinkageName(MDName);
4322CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4324ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4325 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4326 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4331 assert(!
Func.func()->getParent()->getFunction(Name));
4332 NewFunc->setName(Name);
4334 for (
auto &Inst : CallsWithMetadataInFunc) {
4336 assert(Inst.cloneNo() == 0);
4339 OREGetter(
Func.func())
4340 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4341 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4342 return {NewFunc, CloneNo};
4345CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4346 IndexCall>::FuncInfo
4347IndexCallsiteContextGraph::cloneFunctionForCallsite(
4348 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4349 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4363 for (
auto &Inst : CallsWithMetadataInFunc) {
4365 assert(Inst.cloneNo() == 0);
4367 assert(AI->Versions.size() == CloneNo);
4370 AI->Versions.push_back(0);
4373 assert(CI && CI->Clones.size() == CloneNo);
4376 CI->Clones.push_back(0);
4378 CallMap[Inst] = {Inst.call(), CloneNo};
4380 return {
Func.func(), CloneNo};
4397template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4398void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4404 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4405 for (
auto &Entry : AllocationCallToContextNodeMap) {
4407 for (
auto Id :
Node->getContextIds())
4408 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4409 for (
auto *Clone :
Node->Clones) {
4410 for (
auto Id : Clone->getContextIds())
4411 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4418 DenseSet<const ContextNode *> Visited;
4419 for (
auto &Entry : AllocationCallToContextNodeMap) {
4422 mergeClones(Node, Visited, ContextIdToAllocationNode);
4428 auto Clones =
Node->Clones;
4429 for (
auto *Clone : Clones)
4430 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4434 dbgs() <<
"CCG after merging:\n";
4438 exportToDot(
"aftermerge");
4446template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4447void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4448 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4449 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4459 bool FoundUnvisited =
true;
4461 while (FoundUnvisited) {
4463 FoundUnvisited =
false;
4466 auto CallerEdges =
Node->CallerEdges;
4467 for (
auto CallerEdge : CallerEdges) {
4469 if (CallerEdge->Callee != Node)
4474 FoundUnvisited =
true;
4475 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4479 TotalMergeInvokes++;
4480 TotalMergeIters += Iters;
4481 if (Iters > MaxMergeIters)
4482 MaxMergeIters = Iters;
4485 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4488template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4489void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4490 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4491 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4493 if (
Node->emptyContextIds())
4498 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4499 OrigNodeToCloneEdges;
4500 for (
const auto &
E :
Node->CalleeEdges) {
4505 OrigNodeToCloneEdges[
Base].push_back(
E);
4511 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4512 const std::shared_ptr<ContextEdge> &
B) {
4513 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4514 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4515 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4517 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4521 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4526 for (
auto Entry : OrigNodeToCloneEdges) {
4529 auto &CalleeEdges =
Entry.second;
4530 auto NumCalleeClones = CalleeEdges.size();
4532 if (NumCalleeClones == 1)
4543 DenseSet<ContextNode *> OtherCallersToShareMerge;
4544 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4545 OtherCallersToShareMerge);
4550 ContextNode *MergeNode =
nullptr;
4551 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4552 for (
auto CalleeEdge : CalleeEdges) {
4553 auto *OrigCallee = CalleeEdge->Callee;
4559 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4560 MergeNode = OrigCallee;
4561 NonNewMergedNodes++;
4568 if (!OtherCallersToShareMerge.
empty()) {
4569 bool MoveAllCallerEdges =
true;
4570 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4571 if (CalleeCallerE == CalleeEdge)
4573 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4574 MoveAllCallerEdges =
false;
4580 if (MoveAllCallerEdges) {
4581 MergeNode = OrigCallee;
4582 NonNewMergedNodes++;
4589 assert(MergeNode != OrigCallee);
4590 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4593 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4598 if (!OtherCallersToShareMerge.
empty()) {
4602 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4603 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4604 if (CalleeCallerE == CalleeEdge)
4606 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4608 CallerToMoveCount[CalleeCallerE->Caller]++;
4609 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4613 removeNoneTypeCalleeEdges(OrigCallee);
4614 removeNoneTypeCalleeEdges(MergeNode);
4632template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4633void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4634 findOtherCallersToShareMerge(
4636 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4637 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4638 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4639 auto NumCalleeClones = CalleeEdges.size();
4642 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4645 unsigned PossibleOtherCallerNodes = 0;
4649 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4655 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4656 for (
auto CalleeEdge : CalleeEdges) {
4657 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4660 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4661 if (CalleeCallerEdges->Caller == Node) {
4662 assert(CalleeCallerEdges == CalleeEdge);
4665 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4668 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4670 PossibleOtherCallerNodes++;
4674 for (
auto Id : CalleeEdge->getContextIds()) {
4675 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4679 MissingAllocForContextId++;
4682 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4689 for (
auto CalleeEdge : CalleeEdges) {
4690 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4692 if (!PossibleOtherCallerNodes)
4694 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4696 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4698 if (CalleeCallerE == CalleeEdge)
4702 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4707 for (
auto Id : CalleeCallerE->getContextIds()) {
4708 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4713 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4714 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4715 PossibleOtherCallerNodes--;
4722 if (!PossibleOtherCallerNodes)
4727 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4728 if (
Count != NumCalleeClones)
4730 OtherCallersToShareMerge.
insert(OtherCaller);
4775template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4776bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4783 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4787 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4788 const FuncInfo &CalleeFunc) {
4790 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4794 struct FuncCloneInfo {
4799 DenseMap<CallInfo, CallInfo> CallMap;
4827 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4828 UnassignedCallClones;
4832 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4833 FuncInfo OrigFunc(Func);
4838 std::vector<FuncCloneInfo> FuncCloneInfos;
4839 for (
auto &
Call : CallsWithMetadata) {
4840 ContextNode *
Node = getNodeForInst(
Call);
4844 if (!Node ||
Node->Clones.empty())
4847 "Not having a call should have prevented cloning");
4851 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4855 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4857 ContextNode *CallsiteClone,
4860 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4862 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4863 DenseMap<CallInfo, CallInfo> &CallMap =
4864 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4865 CallInfo CallClone(
Call);
4866 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4867 CallClone = It->second;
4868 CallsiteClone->setCall(CallClone);
4870 for (
auto &MatchingCall :
Node->MatchingCalls) {
4871 CallInfo CallClone(MatchingCall);
4872 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4873 CallClone = It->second;
4875 MatchingCall = CallClone;
4883 auto MoveEdgeToNewCalleeCloneAndSetUp =
4884 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4885 ContextNode *OrigCallee =
Edge->Callee;
4886 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4887 removeNoneTypeCalleeEdges(NewClone);
4888 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4892 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4893 RecordCalleeFuncOfCallsite(
4894 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4901 std::deque<ContextNode *> ClonesWorklist;
4903 if (!
Node->emptyContextIds())
4904 ClonesWorklist.push_back(Node);
4910 unsigned NodeCloneCount = 0;
4911 while (!ClonesWorklist.empty()) {
4912 ContextNode *Clone = ClonesWorklist.front();
4913 ClonesWorklist.pop_front();
4922 if (FuncCloneInfos.size() < NodeCloneCount) {
4924 if (NodeCloneCount == 1) {
4929 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4930 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4934 FuncCloneInfos.push_back(
4935 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4936 AssignCallsiteCloneToFuncClone(
4937 OrigFunc,
Call, Clone,
4938 AllocationCallToContextNodeMap.count(
Call));
4939 for (
auto &CE : Clone->CallerEdges) {
4941 if (!
CE->Caller->hasCall())
4943 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4953 FuncInfo PreviousAssignedFuncClone;
4955 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4956 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4958 bool CallerAssignedToCloneOfFunc =
false;
4959 if (EI != Clone->CallerEdges.end()) {
4960 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4961 PreviousAssignedFuncClone =
4962 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4963 CallerAssignedToCloneOfFunc =
true;
4968 DenseMap<CallInfo, CallInfo> NewCallMap;
4969 unsigned CloneNo = FuncCloneInfos.size();
4970 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4971 "should already exist in the map");
4972 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4973 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4974 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4975 FunctionClonesAnalysis++;
4981 if (!CallerAssignedToCloneOfFunc) {
4982 AssignCallsiteCloneToFuncClone(
4983 NewFuncClone,
Call, Clone,
4984 AllocationCallToContextNodeMap.count(
Call));
4985 for (
auto &CE : Clone->CallerEdges) {
4987 if (!
CE->Caller->hasCall())
4989 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5001 auto CallerEdges = Clone->CallerEdges;
5002 for (
auto CE : CallerEdges) {
5004 if (
CE->isRemoved()) {
5010 if (!
CE->Caller->hasCall())
5013 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5017 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5018 PreviousAssignedFuncClone)
5021 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5034 auto CalleeEdges =
CE->Caller->CalleeEdges;
5035 for (
auto CalleeEdge : CalleeEdges) {
5038 if (CalleeEdge->isRemoved()) {
5043 ContextNode *
Callee = CalleeEdge->Callee;
5047 if (Callee == Clone || !
Callee->hasCall())
5052 if (Callee == CalleeEdge->Caller)
5054 ContextNode *NewClone =
5055 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5058 removeNoneTypeCalleeEdges(Callee);
5066 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5067 OrigCall.setCloneNo(0);
5068 DenseMap<CallInfo, CallInfo> &CallMap =
5069 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5071 CallInfo NewCall(CallMap[OrigCall]);
5073 NewClone->setCall(NewCall);
5075 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5076 CallInfo OrigMatchingCall(MatchingCall);
5077 OrigMatchingCall.setCloneNo(0);
5079 CallInfo NewCall(CallMap[OrigMatchingCall]);
5082 MatchingCall = NewCall;
5091 auto FindFirstAvailFuncClone = [&]() {
5096 for (
auto &CF : FuncCloneInfos) {
5097 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5098 return CF.FuncClone;
5101 "Expected an available func clone for this callsite clone");
5118 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5119 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5123 auto CloneCallerEdges = Clone->CallerEdges;
5124 for (
auto &
Edge : CloneCallerEdges) {
5128 if (
Edge->isRemoved())
5131 if (!
Edge->Caller->hasCall())
5135 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5136 FuncInfo FuncCloneCalledByCaller =
5137 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5147 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5148 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5156 (FuncCloneAssignedToCurCallsiteClone &&
5157 FuncCloneAssignedToCurCallsiteClone !=
5158 FuncCloneCalledByCaller)) {
5173 if (FuncCloneToNewCallsiteCloneMap.count(
5174 FuncCloneCalledByCaller)) {
5175 ContextNode *NewClone =
5176 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5177 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5179 removeNoneTypeCalleeEdges(NewClone);
5182 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5183 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5186 ClonesWorklist.push_back(NewClone);
5190 removeNoneTypeCalleeEdges(Clone);
5198 if (!FuncCloneAssignedToCurCallsiteClone) {
5199 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5201 AssignCallsiteCloneToFuncClone(
5202 FuncCloneCalledByCaller,
Call, Clone,
5203 AllocationCallToContextNodeMap.count(
Call));
5207 assert(FuncCloneAssignedToCurCallsiteClone ==
5208 FuncCloneCalledByCaller);
5217 if (!FuncCloneAssignedToCurCallsiteClone) {
5218 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5219 assert(FuncCloneAssignedToCurCallsiteClone);
5221 AssignCallsiteCloneToFuncClone(
5222 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5223 AllocationCallToContextNodeMap.count(
Call));
5225 assert(FuncCloneToCurNodeCloneMap
5226 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5228 RecordCalleeFuncOfCallsite(
Edge->Caller,
5229 FuncCloneAssignedToCurCallsiteClone);
5249 if (!FuncCloneAssignedToCurCallsiteClone) {
5250 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5251 assert(FuncCloneAssignedToCurCallsiteClone &&
5252 "No available func clone for this callsite clone");
5253 AssignCallsiteCloneToFuncClone(
5254 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5255 AllocationCallToContextNodeMap.contains(
Call));
5260 for (
const auto &PE :
Node->CalleeEdges)
5262 for (
const auto &CE :
Node->CallerEdges)
5264 for (
auto *Clone :
Node->Clones) {
5266 for (
const auto &PE : Clone->CalleeEdges)
5268 for (
const auto &CE : Clone->CallerEdges)
5274 if (FuncCloneInfos.size() < 2)
5280 for (
auto &
Call : CallsWithMetadata) {
5281 ContextNode *
Node = getNodeForInst(
Call);
5282 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5288 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5292 DenseSet<unsigned> NodeCallClones;
5293 for (
auto *
C :
Node->Clones)
5294 NodeCallClones.
insert(
C->Call.cloneNo());
5297 for (
auto &FC : FuncCloneInfos) {
5302 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5307 auto &CallVector = UnassignedCallClones[
Node][
I];
5308 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5309 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5310 CallInfo CallClone = It->second;
5311 CallVector.push_back(CallClone);
5315 assert(
false &&
"Expected to find call in CallMap");
5318 for (
auto &MatchingCall :
Node->MatchingCalls) {
5319 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5320 CallInfo CallClone = It->second;
5321 CallVector.push_back(CallClone);
5325 assert(
false &&
"Expected to find call in CallMap");
5333 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5335 auto UpdateCalls = [&](ContextNode *
Node,
5336 DenseSet<const ContextNode *> &Visited,
5337 auto &&UpdateCalls) {
5338 auto Inserted = Visited.insert(Node);
5342 for (
auto *Clone :
Node->Clones)
5343 UpdateCalls(Clone, Visited, UpdateCalls);
5345 for (
auto &
Edge :
Node->CallerEdges)
5346 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5350 if (!
Node->hasCall() ||
Node->emptyContextIds())
5353 if (
Node->IsAllocation) {
5354 auto AT = allocTypeToUse(
Node->AllocTypes);
5360 !ContextIdToContextSizeInfos.empty()) {
5361 uint64_t TotalCold = 0;
5363 for (
auto Id :
Node->getContextIds()) {
5364 auto TypeI = ContextIdToAllocationType.find(Id);
5365 assert(TypeI != ContextIdToAllocationType.end());
5366 auto CSI = ContextIdToContextSizeInfos.find(Id);
5367 if (CSI != ContextIdToContextSizeInfos.end()) {
5368 for (
auto &
Info : CSI->second) {
5370 if (TypeI->second == AllocationType::Cold)
5371 TotalCold +=
Info.TotalSize;
5376 AT = AllocationType::Cold;
5378 updateAllocationCall(
Node->Call, AT);
5383 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5386 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5387 updateCall(
Node->Call, CalleeFunc);
5389 for (
auto &
Call :
Node->MatchingCalls)
5390 updateCall(
Call, CalleeFunc);
5394 if (!UnassignedCallClones.
contains(Node))
5396 DenseSet<unsigned> NodeCallClones;
5397 for (
auto *
C :
Node->Clones)
5398 NodeCallClones.
insert(
C->Call.cloneNo());
5400 auto &ClonedCalls = UnassignedCallClones[
Node];
5401 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5405 if (NodeCallClones.
contains(CloneNo))
5408 for (
auto &
Call : CallVector)
5409 updateCall(
Call, CalleeFunc);
5418 DenseSet<const ContextNode *> Visited;
5419 for (
auto &Entry : AllocationCallToContextNodeMap)
5420 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5431 for (
auto &SN : FS->callsites()) {
5436 SN.Clones.size() >
I &&
5437 "Callsite summary has fewer entries than other summaries in function");
5438 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5445 for (
auto &AN : FS->allocs()) {
5449 assert(AN.Versions.size() >
I &&
5450 "Alloc summary has fewer entries than other summaries in function");
5451 if (AN.Versions.size() <=
I ||
5468 NewGV->takeName(DeclGV);
5475 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5476 if (!FuncToAliasMap.count(&
F))
5478 for (
auto *
A : FuncToAliasMap[&
F]) {
5480 auto *PrevA = M.getNamedAlias(AliasName);
5482 A->getType()->getPointerAddressSpace(),
5483 A->getLinkage(), AliasName, NewF);
5484 NewA->copyAttributesFrom(
A);
5486 TakeDeclNameAndReplace(PrevA, NewA);
5495 FunctionsClonedThinBackend++;
5512 for (
unsigned I = 1;
I < NumClones;
I++) {
5513 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5520 FunctionCloneDuplicatesThinBackend++;
5521 auto *Func = HashToFunc[Hash];
5522 if (Func->hasAvailableExternallyLinkage()) {
5528 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5530 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5533 auto *PrevF = M.getFunction(Name);
5536 TakeDeclNameAndReplace(PrevF, Alias);
5538 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5541 CloneFuncAliases(Func,
I);
5545 HashToFunc[Hash] = NewF;
5546 FunctionClonesThinBackend++;
5549 for (
auto &BB : *NewF) {
5550 for (
auto &Inst : BB) {
5551 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5552 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5557 TakeDeclNameAndReplace(PrevF, NewF);
5559 NewF->setName(Name);
5562 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5565 CloneFuncAliases(NewF,
I);
5574 const Function *CallingFunc =
nullptr) {
5593 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5599 if (!SrcFileMD &&
F.isDeclaration()) {
5603 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5608 assert(SrcFileMD || OrigName ==
F.getName());
5610 StringRef SrcFile = M.getSourceFileName();
5622 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5623 F.getName().contains(
'.')) {
5624 OrigName =
F.getName().rsplit(
'.').first;
5633 assert(TheFnVI ||
F.isDeclaration());
5637bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5639 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5640 Symtab = std::make_unique<InstrProfSymtab>();
5651 if (
Error E = Symtab->create(M,
true,
false)) {
5652 std::string SymtabFailure =
toString(std::move(
E));
5653 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5666 auto MIBIter = AllocNode.
MIBs.begin();
5667 for (
auto &MDOp : MemProfMD->
operands()) {
5669 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5674 auto ContextIterBegin =
5678 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5680 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5685 if (LastStackContextId == *ContextIter)
5687 LastStackContextId = *ContextIter;
5688 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5698bool MemProfContextDisambiguation::applyImport(
Module &M) {
5705 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5707 for (
auto &
A :
M.aliases()) {
5708 auto *Aliasee =
A.getAliaseeObject();
5710 FuncToAliasMap[
F].insert(&
A);
5713 if (!initializeIndirectCallPromotionInfo(M))
5720 OptimizationRemarkEmitter ORE(&
F);
5723 bool ClonesCreated =
false;
5724 unsigned NumClonesCreated = 0;
5725 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5735 if (ClonesCreated) {
5736 assert(NumClonesCreated == NumClones);
5743 ClonesCreated =
true;
5744 NumClonesCreated = NumClones;
5747 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5748 Function *CalledFunction, FunctionSummary *
FS) {
5750 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5762 if (CalledFunction != CB->getCalledOperand() &&
5763 (!GA || CalledFunction != GA->getAliaseeObject())) {
5764 SkippedCallsCloning++;
5770 auto CalleeOrigName = CalledFunction->getName();
5771 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5774 if (J > 0 && VMaps[J - 1]->
empty())
5778 if (!StackNode.
Clones[J])
5780 auto NewF =
M.getOrInsertFunction(
5782 CalledFunction->getFunctionType());
5796 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5797 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5799 <<
" assigned to call function clone "
5800 <<
ore::NV(
"Callee", NewF.getCallee()));
5814 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5818 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5820 "enable-import-metadata is needed to emit thinlto_src_module");
5821 StringRef SrcModule =
5824 if (GVS->modulePath() == SrcModule) {
5825 GVSummary = GVS.get();
5829 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5839 if (
FS->allocs().empty() &&
FS->callsites().empty())
5842 auto SI =
FS->callsites().begin();
5843 auto AI =
FS->allocs().begin();
5848 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5851 for (
auto CallsiteIt =
FS->callsites().rbegin();
5852 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5853 auto &Callsite = *CallsiteIt;
5857 if (!Callsite.StackIdIndices.empty())
5859 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5868 for (
auto &BB :
F) {
5869 for (
auto &
I : BB) {
5875 auto *CalledValue = CB->getCalledOperand();
5876 auto *CalledFunction = CB->getCalledFunction();
5877 if (CalledValue && !CalledFunction) {
5878 CalledValue = CalledValue->stripPointerCasts();
5885 assert(!CalledFunction &&
5886 "Expected null called function in callsite for alias");
5890 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5891 I.getMetadata(LLVMContext::MD_callsite));
5892 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5898 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5899 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5900 ? AllocTypeColdThinBackend++
5901 : AllocTypeNotColdThinBackend++;
5902 OrigAllocsThinBackend++;
5903 AllocVersionsThinBackend++;
5904 if (!MaxAllocVersionsThinBackend)
5905 MaxAllocVersionsThinBackend = 1;
5912 auto &AllocNode = *(AI++);
5920 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5922 OrigAllocsThinBackend++;
5923 AllocVersionsThinBackend += AllocNode.Versions.size();
5924 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5925 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5935 if (AllocNode.Versions.size() == 1 &&
5938 AllocationType::NotCold ||
5940 AllocationType::None);
5941 UnclonableAllocsThinBackend++;
5947 return Type == ((uint8_t)AllocationType::NotCold |
5948 (uint8_t)AllocationType::Cold);
5952 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5955 if (J > 0 && VMaps[J - 1]->
empty())
5958 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5961 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5962 : AllocTypeNotColdThinBackend++;
5977 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5978 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5980 <<
" marked with memprof allocation attribute "
5981 <<
ore::NV(
"Attribute", AllocTypeString));
5983 }
else if (!CallsiteContext.empty()) {
5984 if (!CalledFunction) {
5988 assert(!CI || !CI->isInlineAsm());
5998 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6004 CloneFuncIfNeeded(NumClones, FS);
6009 assert(SI !=
FS->callsites().end());
6010 auto &StackNode = *(
SI++);
6016 for (
auto StackId : CallsiteContext) {
6018 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6024 CloneCallsite(StackNode, CB, CalledFunction, FS);
6026 }
else if (CB->isTailCall() && CalledFunction) {
6029 ValueInfo CalleeVI =
6031 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6032 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6033 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6034 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6041 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6051 for (
auto &BB :
F) {
6052 for (
auto &
I : BB) {
6055 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6056 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6064unsigned MemProfContextDisambiguation::recordICPInfo(
6069 uint32_t NumCandidates;
6070 uint64_t TotalCount;
6071 auto CandidateProfileData =
6072 ICallAnalysis->getPromotionCandidatesForInstruction(
6074 if (CandidateProfileData.empty())
6080 bool ICPNeeded =
false;
6081 unsigned NumClones = 0;
6082 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6083 for (
const auto &Candidate : CandidateProfileData) {
6085 auto CalleeValueInfo =
6087 ImportSummary->getValueInfo(Candidate.Value);
6090 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6092 auto &StackNode = *(
SI++);
6097 [](
unsigned CloneNo) { return CloneNo != 0; });
6107 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6108 TotalCount, CallsiteInfoStartIndex});
6112void MemProfContextDisambiguation::performICP(
6114 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6116 OptimizationRemarkEmitter &ORE) {
6123 for (
auto &
Info : ICallAnalysisInfo) {
6126 auto TotalCount =
Info.TotalCount;
6127 unsigned NumPromoted = 0;
6128 unsigned NumClones = 0;
6130 for (
auto &Candidate :
Info.CandidateProfileData) {
6141 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6142 if (TargetFunction ==
nullptr ||
6150 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6151 <<
"Memprof cannot promote indirect call: target with md5sum "
6152 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6161 const char *Reason =
nullptr;
6164 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6165 <<
"Memprof cannot promote indirect call to "
6166 <<
ore::NV(
"TargetFunction", TargetFunction)
6167 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6178 CallBase *CBClone = CB;
6179 for (
unsigned J = 0; J < NumClones; J++) {
6182 if (J > 0 && VMaps[J - 1]->
empty())
6192 TotalCount, isSamplePGO, &ORE);
6193 auto *TargetToUse = TargetFunction;
6196 if (StackNode.
Clones[J]) {
6215 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6217 <<
" promoted and assigned to call function clone "
6218 <<
ore::NV(
"Callee", TargetToUse));
6222 TotalCount -= Candidate.Count;
6227 CallBase *CBClone = CB;
6228 for (
unsigned J = 0; J < NumClones; J++) {
6231 if (J > 0 && VMaps[J - 1]->
empty())
6237 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6240 if (TotalCount != 0)
6242 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6243 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6248template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6249bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6251 dbgs() <<
"CCG before cloning:\n";
6255 exportToDot(
"postbuild");
6268 dbgs() <<
"CCG after cloning:\n";
6272 exportToDot(
"cloned");
6274 bool Changed = assignFunctions();
6277 dbgs() <<
"CCG after assigning function clones:\n";
6281 exportToDot(
"clonefuncassign");
6284 printTotalSizes(
errs());
6289bool MemProfContextDisambiguation::processModule(
6291 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6296 return applyImport(M);
6309 ModuleCallsiteContextGraph CCG(M, OREGetter);
6310 return CCG.process();
6315 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6320 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6324 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6328 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6329 "-memprof-dot-context-id");
6330 if (ImportSummary) {
6340 auto ReadSummaryFile =
6342 if (!ReadSummaryFile) {
6349 if (!ImportSummaryForTestingOrErr) {
6355 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6356 ImportSummary = ImportSummaryForTesting.get();
6365 if (!processModule(M, OREGetter))
6382 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6398 for (
auto &BB :
F) {
6399 for (
auto &
I : BB) {
6403 if (CI->hasFnAttr(
"memprof")) {
6404 CI->removeFnAttr(
"memprof");
6407 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6408 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6414 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6415 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
constexpr bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...