49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
128 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
137 cl::desc(
"Allow cloning of contexts through recursive cycles"));
148 cl::desc(
"Linking with hot/cold operator new interfaces"));
153 "Require target function definition when promoting indirect calls"));
174template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
175class CallsiteContextGraph {
177 CallsiteContextGraph() =
default;
178 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
179 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
186 void identifyClones();
193 bool assignFunctions();
200 const CallsiteContextGraph &CCG) {
206 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
208 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
210 void exportToDot(std::string Label)
const;
213 struct FuncInfo final
214 :
public std::pair<FuncTy *, unsigned > {
215 using Base = std::pair<FuncTy *, unsigned>;
216 FuncInfo(
const Base &
B) :
Base(
B) {}
217 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
218 explicit operator bool()
const {
return this->first !=
nullptr; }
219 FuncTy *
func()
const {
return this->first; }
220 unsigned cloneNo()
const {
return this->second; }
224 struct CallInfo final :
public std::pair<CallTy, unsigned > {
225 using Base = std::pair<CallTy, unsigned>;
227 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
229 explicit operator bool()
const {
return (
bool)this->first; }
230 CallTy call()
const {
return this->first; }
231 unsigned cloneNo()
const {
return this->second; }
232 void setCloneNo(
unsigned N) { this->second =
N; }
234 if (!
operator bool()) {
240 OS <<
"\t(clone " << cloneNo() <<
")";
263 bool Recursive =
false;
290 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
294 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
298 const std::vector<std::shared_ptr<ContextEdge>> *
299 getEdgesWithAllocInfo()
const {
302 if (!CalleeEdges.empty())
304 if (!CallerEdges.empty()) {
317 auto *Edges = getEdgesWithAllocInfo();
321 for (
auto &Edge : *Edges)
322 Count += Edge->getContextIds().size();
324 for (
auto &Edge : *Edges)
325 ContextIds.
insert(Edge->getContextIds().begin(),
326 Edge->getContextIds().end());
332 uint8_t computeAllocType()
const {
333 auto *Edges = getEdgesWithAllocInfo();
335 return (
uint8_t)AllocationType::None;
339 for (
auto &Edge : *Edges) {
350 bool emptyContextIds()
const {
351 auto *Edges = getEdgesWithAllocInfo();
354 for (
auto &Edge : *Edges) {
355 if (!Edge->getContextIds().empty())
362 std::vector<ContextNode *> Clones;
365 ContextNode *CloneOf =
nullptr;
367 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
369 ContextNode(
bool IsAllocation,
CallInfo C)
370 : IsAllocation(IsAllocation),
Call(
C) {}
372 void addClone(ContextNode *Clone) {
374 CloneOf->Clones.push_back(Clone);
375 Clone->CloneOf = CloneOf;
377 Clones.push_back(Clone);
379 Clone->CloneOf =
this;
383 ContextNode *getOrigNode() {
390 unsigned int ContextId);
392 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
393 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
394 void eraseCalleeEdge(
const ContextEdge *Edge);
395 void eraseCallerEdge(
const ContextEdge *Edge);
399 bool hasCall()
const {
return (
bool)
Call.call(); }
405 bool isRemoved()
const {
408 return AllocTypes == (
uint8_t)AllocationType::None;
436 ContextIds(
std::
move(ContextIds)) {}
442 inline void clear() {
444 AllocTypes = (
uint8_t)AllocationType::None;
452 inline bool isRemoved()
const {
453 if (Callee || Caller)
474 void removeNoneTypeCalleeEdges(ContextNode *
Node);
475 void removeNoneTypeCallerEdges(ContextNode *
Node);
477 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
483 template <
class NodeT,
class IteratorT>
484 std::vector<uint64_t>
489 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
492 template <
class NodeT,
class IteratorT>
493 void addStackNodesForMIB(ContextNode *AllocNode,
502 void updateStackNodes();
506 void handleCallsitesWithMultipleTargets();
512 bool partitionCallsByCallee(
514 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
521 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
524 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
529 struct CallContextInfo {
533 std::vector<uint64_t> StackIds;
547 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
548 bool CalleeIter =
true);
556 void assignStackNodesPostOrder(
569 void propagateDuplicateContextIds(
575 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
583 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
593 calleesMatch(CallTy Call, EdgeIter &EI,
598 const FuncTy *getCalleeFunc(CallTy Call) {
599 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(Call);
605 bool calleeMatchesFunc(
606 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
607 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
608 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
609 Call, Func, CallerFunc, FoundCalleeChain);
613 bool sameCallee(CallTy Call1, CallTy Call2) {
614 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
619 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
620 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
625 uint64_t getLastStackId(CallTy Call) {
626 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
631 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
632 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
637 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(Call);
642 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
643 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
649 FuncInfo cloneFunctionForCallsite(
650 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
651 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
652 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
653 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
658 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
659 unsigned CloneNo)
const {
660 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
664 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
666 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
667 auto *NewNode = NodeOwner.back().get();
669 NodeToCallingFunc[NewNode] =
F;
674 ContextNode *getNodeForInst(
const CallInfo &
C);
675 ContextNode *getNodeForAlloc(
const CallInfo &
C);
676 ContextNode *getNodeForStackId(
uint64_t StackId);
698 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
705 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
706 ContextNode *NewCallee,
707 bool NewClone =
false,
715 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
716 ContextNode *NewCaller);
745 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
751 unsigned int LastContextId = 0;
754template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
756 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
757template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
759 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
760template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
762 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
763template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
765 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
768class ModuleCallsiteContextGraph
769 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
772 ModuleCallsiteContextGraph(
777 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
782 bool calleeMatchesFunc(
784 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
786 bool findProfiledCalleeThroughTailCalls(
788 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
789 bool &FoundMultipleCalleeChains);
791 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
794 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
795 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
797 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
798 std::map<CallInfo, CallInfo> &CallMap,
799 std::vector<CallInfo> &CallsWithMetadataInFunc,
802 unsigned CloneNo)
const;
811struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
813 IndexCall(std::nullptr_t) : IndexCall() {}
818 IndexCall *operator->() {
return this; }
822 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(
Base)) {
825 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(
Base);
846class IndexCallsiteContextGraph
847 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
850 IndexCallsiteContextGraph(
855 ~IndexCallsiteContextGraph() {
860 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
862 for (
auto &Callsite :
I.second)
863 FS->addCallsite(*Callsite.second);
868 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
873 bool calleeMatchesFunc(
876 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
877 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
878 bool findProfiledCalleeThroughTailCalls(
880 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
881 bool &FoundMultipleCalleeChains);
882 uint64_t getLastStackId(IndexCall &Call);
883 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
886 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
889 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
890 std::map<CallInfo, CallInfo> &CallMap,
891 std::vector<CallInfo> &CallsWithMetadataInFunc,
893 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
894 unsigned CloneNo)
const;
898 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
909 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
910 FunctionCalleesToSynthesizedCallsiteInfos;
922 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
925 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
938 ((
uint8_t)AllocationType::NotCold | (
uint8_t)AllocationType::Cold))
939 return AllocationType::NotCold;
947template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
949 const std::vector<uint8_t> &InAllocTypes,
950 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
954 assert(InAllocTypes.size() == Edges.size());
956 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
958 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
962 if (l == (uint8_t)AllocationType::None ||
963 r->AllocTypes == (uint8_t)AllocationType::None)
965 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
974template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
975bool allocTypesMatchClone(
976 const std::vector<uint8_t> &InAllocTypes,
977 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
978 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
982 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
986 for (
const auto &E : Clone->CalleeEdges) {
988 EdgeCalleeMap[E->Callee] = E->AllocTypes;
992 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
993 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
995 if (Iter == EdgeCalleeMap.
end())
1000 if (InAllocTypes[
I] == (
uint8_t)AllocationType::None ||
1001 Iter->second == (
uint8_t)AllocationType::None)
1003 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1011template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1012typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1013CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1015 ContextNode *
Node = getNodeForAlloc(
C);
1019 return NonAllocationCallToContextNodeMap.lookup(
C);
1022template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1023typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1024CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1026 return AllocationCallToContextNodeMap.lookup(
C);
1029template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1030typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1031CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1033 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1034 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1035 return StackEntryNode->second;
1039template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1040void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1042 unsigned int ContextId) {
1043 for (
auto &Edge : CallerEdges) {
1044 if (Edge->Caller == Caller) {
1046 Edge->getContextIds().insert(ContextId);
1050 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
1052 CallerEdges.push_back(Edge);
1053 Caller->CalleeEdges.push_back(Edge);
1056template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1057void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1058 ContextEdge *Edge, EdgeIter *EI,
bool CalleeIter) {
1059 assert(!EI || (*EI)->get() == Edge);
1064 auto *
Callee = Edge->Callee;
1065 auto *
Caller = Edge->Caller;
1073 Callee->eraseCallerEdge(Edge);
1074 Caller->eraseCalleeEdge(Edge);
1075 }
else if (CalleeIter) {
1076 Callee->eraseCallerEdge(Edge);
1077 *EI =
Caller->CalleeEdges.erase(*EI);
1079 Caller->eraseCalleeEdge(Edge);
1080 *EI =
Callee->CallerEdges.erase(*EI);
1084template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1085void CallsiteContextGraph<
1086 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
1087 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1089 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1090 assert(Edge->ContextIds.empty());
1091 removeEdgeFromGraph(Edge.get(), &EI,
true);
1097template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1098void CallsiteContextGraph<
1099 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *
Node) {
1100 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1102 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1103 assert(Edge->ContextIds.empty());
1104 Edge->Caller->eraseCalleeEdge(Edge.get());
1105 EI =
Node->CallerEdges.erase(EI);
1111template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1112typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1113CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1114 findEdgeFromCallee(
const ContextNode *Callee) {
1115 for (
const auto &Edge : CalleeEdges)
1116 if (Edge->Callee == Callee)
1121template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1122typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1123CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1124 findEdgeFromCaller(
const ContextNode *Caller) {
1125 for (
const auto &Edge : CallerEdges)
1126 if (Edge->Caller == Caller)
1131template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1132void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1133 eraseCalleeEdge(
const ContextEdge *Edge) {
1135 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1136 return CalleeEdge.get() == Edge;
1138 assert(EI != CalleeEdges.end());
1139 CalleeEdges.erase(EI);
1142template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1143void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1144 eraseCallerEdge(
const ContextEdge *Edge) {
1146 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1147 return CallerEdge.get() == Edge;
1149 assert(EI != CallerEdges.end());
1150 CallerEdges.erase(EI);
1153template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1154uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1157 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1159 for (
auto Id : ContextIds) {
1168template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1170CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1173 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1175 for (
auto Id : Node1Ids) {
1176 if (!Node2Ids.
count(Id))
1186template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1187uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1189 if (Node1Ids.
size() < Node2Ids.
size())
1190 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1192 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1195template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1196typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1197CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1199 assert(!getNodeForAlloc(Call));
1200 ContextNode *AllocNode = createNewNode(
true,
F, Call);
1201 AllocationCallToContextNodeMap[
Call] = AllocNode;
1203 AllocNode->OrigStackOrAllocId = LastContextId;
1206 AllocNode->AllocTypes = (
uint8_t)AllocationType::None;
1215 if (AllocTypes & (
uint8_t)AllocationType::NotCold)
1217 if (AllocTypes & (
uint8_t)AllocationType::Cold)
1222template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1223template <
class NodeT,
class IteratorT>
1224void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1233 ContextIdToAllocationType[++LastContextId] =
AllocType;
1235 if (!ContextSizeInfo.
empty()) {
1236 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1246 ContextNode *PrevNode = AllocNode;
1253 ContextIter != StackContext.
end(); ++ContextIter) {
1254 auto StackId = getStackId(*ContextIter);
1255 ContextNode *
StackNode = getNodeForStackId(StackId);
1258 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1259 StackNode->OrigStackOrAllocId = StackId;
1274template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1276CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1280 for (
auto OldId : StackSequenceContextIds) {
1281 NewContextIds.
insert(++LastContextId);
1282 OldToNewContextIds[OldId].insert(LastContextId);
1283 assert(ContextIdToAllocationType.count(OldId));
1285 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1287 return NewContextIds;
1290template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1291void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1292 propagateDuplicateContextIds(
1297 for (
auto Id : ContextIds)
1298 if (
auto NewId = OldToNewContextIds.find(Id);
1299 NewId != OldToNewContextIds.end())
1300 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1305 auto UpdateCallers = [&](ContextNode *
Node,
1307 auto &&UpdateCallers) ->
void {
1308 for (
const auto &Edge :
Node->CallerEdges) {
1309 auto Inserted = Visited.insert(Edge.get());
1312 ContextNode *NextNode = Edge->Caller;
1316 if (!NewIdsToAdd.
empty()) {
1317 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1318 UpdateCallers(NextNode, Visited, UpdateCallers);
1324 for (
auto &Entry : AllocationCallToContextNodeMap) {
1326 UpdateCallers(
Node, Visited, UpdateCallers);
1330template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1331void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1332 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1337 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1339 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1345 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1346 NotFoundContextIds);
1347 RemainingContextIds.
swap(NotFoundContextIds);
1349 if (NewEdgeContextIds.
empty()) {
1353 if (TowardsCallee) {
1354 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1355 auto NewEdge = std::make_shared<ContextEdge>(
1356 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1357 NewNode->CalleeEdges.push_back(NewEdge);
1358 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1360 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1361 auto NewEdge = std::make_shared<ContextEdge>(
1362 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1363 NewNode->CallerEdges.push_back(NewEdge);
1364 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1367 if (Edge->getContextIds().empty()) {
1368 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1375template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1377 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1381 assert(!Edge->ContextIds.empty());
1384template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1386 bool CheckEdges =
true) {
1387 if (
Node->isRemoved())
1391 auto NodeContextIds =
Node->getContextIds();
1395 if (
Node->CallerEdges.size()) {
1397 Node->CallerEdges.front()->ContextIds);
1400 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1401 set_union(CallerEdgeContextIds, Edge->ContextIds);
1408 NodeContextIds == CallerEdgeContextIds ||
1411 if (
Node->CalleeEdges.size()) {
1413 Node->CalleeEdges.front()->ContextIds);
1416 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1417 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1419 assert(NodeContextIds == CalleeEdgeContextIds);
1428 for (
const auto &E :
Node->CalleeEdges)
1434template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1435void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1436 assignStackNodesPostOrder(
1439 &StackIdToMatchingCalls,
1448 auto CallerEdges =
Node->CallerEdges;
1449 for (
auto &Edge : CallerEdges) {
1451 if (Edge->isRemoved()) {
1455 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1456 CallToMatchingCall);
1465 if (
Node->IsAllocation ||
1466 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1469 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1473 if (Calls.size() == 1) {
1474 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1475 if (Ids.size() == 1) {
1476 assert(SavedContextIds.empty());
1479 if (
Node->Recursive)
1481 Node->setCall(Call);
1482 NonAllocationCallToContextNodeMap[
Call] =
Node;
1492 ContextNode *LastNode = getNodeForStackId(LastId);
1497 ContextNode *LastNode =
Node;
1504 bool PrevIterCreatedNode =
false;
1505 bool CreatedNode =
false;
1506 for (
unsigned I = 0;
I < Calls.size();
1507 I++, PrevIterCreatedNode = CreatedNode) {
1508 CreatedNode =
false;
1509 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1512 if (SavedContextIds.empty()) {
1517 if (!CallToMatchingCall.
contains(Call))
1519 auto MatchingCall = CallToMatchingCall[
Call];
1520 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1524 assert(
I > 0 && !PrevIterCreatedNode);
1527 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1532 assert(LastId == Ids.back());
1541 ContextNode *PrevNode = LastNode;
1545 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1547 ContextNode *CurNode = getNodeForStackId(Id);
1551 assert(!CurNode->Recursive);
1553 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1565 if (SavedContextIds.empty()) {
1574 ContextNode *NewNode = createNewNode(
false, Func, Call);
1575 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1577 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1579 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1585 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1590 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1595 for (
auto Id : Ids) {
1596 ContextNode *CurNode = getNodeForStackId(Id);
1603 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1605 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1606 if (PrevEdge->getContextIds().empty())
1607 removeEdgeFromGraph(PrevEdge);
1612 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1613 ? (
uint8_t)AllocationType::None
1614 : CurNode->computeAllocType();
1618 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1619 for (
auto Id : Ids) {
1620 ContextNode *CurNode = getNodeForStackId(Id);
1623 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1629template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1630void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1639 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1640 for (
auto &Call : CallsWithMetadata) {
1642 if (AllocationCallToContextNodeMap.count(Call))
1644 auto StackIdsWithContextNodes =
1645 getStackIdsWithContextNodesForCall(
Call.call());
1648 if (StackIdsWithContextNodes.empty())
1652 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1653 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1668 for (
auto &It : StackIdToMatchingCalls) {
1669 auto &Calls = It.getSecond();
1671 if (Calls.size() == 1) {
1672 auto &Ids = Calls[0].StackIds;
1673 if (Ids.size() == 1)
1687 for (
const auto &[
Idx, CallCtxInfo] :
enumerate(Calls))
1688 FuncToIndex.
insert({CallCtxInfo.Func,
Idx});
1690 Calls.begin(), Calls.end(),
1691 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1692 return A.StackIds.size() > B.StackIds.size() ||
1693 (A.StackIds.size() == B.StackIds.size() &&
1694 (A.StackIds < B.StackIds ||
1695 (A.StackIds == B.StackIds &&
1696 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
1703 ContextNode *LastNode = getNodeForStackId(LastId);
1707 if (LastNode->Recursive)
1723 for (
unsigned I = 0;
I < Calls.size();
I++) {
1724 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1725 assert(SavedContextIds.empty());
1726 assert(LastId == Ids.back());
1731 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1732 MatchingIdsFuncSet.
clear();
1741 ContextNode *PrevNode = LastNode;
1742 ContextNode *CurNode = LastNode;
1747 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1749 CurNode = getNodeForStackId(Id);
1753 if (CurNode->Recursive) {
1758 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1776 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1779 if (StackSequenceContextIds.
empty()) {
1792 if (Ids.back() != getLastStackId(Call)) {
1793 for (
const auto &PE : LastNode->CallerEdges) {
1794 set_subtract(StackSequenceContextIds, PE->getContextIds());
1795 if (StackSequenceContextIds.
empty())
1799 if (StackSequenceContextIds.
empty())
1811 MatchingIdsFuncSet.
insert(Func);
1818 bool DuplicateContextIds =
false;
1819 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1820 auto &CallCtxInfo = Calls[J];
1821 auto &NextIds = CallCtxInfo.StackIds;
1824 auto *NextFunc = CallCtxInfo.Func;
1825 if (NextFunc != Func) {
1828 DuplicateContextIds =
true;
1831 auto &NextCall = CallCtxInfo.Call;
1832 CallToMatchingCall[NextCall] =
Call;
1843 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1844 StackSequenceContextIds.
size());
1847 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1848 : StackSequenceContextIds;
1849 assert(!SavedContextIds.empty());
1851 if (!DuplicateContextIds) {
1855 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1856 if (LastNodeContextIds.
empty())
1863 propagateDuplicateContextIds(OldToNewContextIds);
1874 for (
auto &Entry : AllocationCallToContextNodeMap)
1875 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
1876 CallToMatchingCall);
1883 Call->getMetadata(LLVMContext::MD_callsite));
1884 return CallsiteContext.
back();
1887uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1888 assert(isa<CallsiteInfo *>(Call));
1890 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1892 return Index.getStackIdAtIndex(CallsiteContext.
back());
1909std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1911 unsigned CloneNo)
const {
1912 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1913 cast<CallBase>(Call)->getCalledFunction()->getName())
1917std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1918 const IndexCall &Call,
1919 unsigned CloneNo)
const {
1920 auto VI = FSToVIMap.find(Func);
1921 assert(VI != FSToVIMap.end());
1922 if (isa<AllocInfo *>(Call))
1923 return (
VI->second.name() +
" -> alloc").str();
1925 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call);
1926 return (
VI->second.name() +
" -> " +
1928 Callsite->Clones[CloneNo]))
1933std::vector<uint64_t>
1934ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1937 Call->getMetadata(LLVMContext::MD_callsite));
1938 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1942std::vector<uint64_t>
1943IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1944 assert(isa<CallsiteInfo *>(Call));
1946 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1952template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1953template <
class NodeT,
class IteratorT>
1954std::vector<uint64_t>
1955CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1957 std::vector<uint64_t> StackIds;
1958 for (
auto IdOrIndex : CallsiteContext) {
1959 auto StackId = getStackId(IdOrIndex);
1960 ContextNode *
Node = getNodeForStackId(StackId);
1963 StackIds.push_back(StackId);
1968ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1971 :
Mod(
M), OREGetter(OREGetter) {
1973 std::vector<CallInfo> CallsWithMetadata;
1974 for (
auto &BB :
F) {
1975 for (
auto &
I : BB) {
1976 if (!isa<CallBase>(
I))
1978 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1979 CallsWithMetadata.push_back(&
I);
1980 auto *AllocNode = addAllocNode(&
I, &
F);
1981 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1985 for (
auto &MDOp : MemProfMD->operands()) {
1986 auto *MIBMD = cast<const MDNode>(MDOp);
1987 std::vector<ContextTotalSize> ContextSizeInfo;
1989 if (MIBMD->getNumOperands() > 2) {
1990 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
1991 MDNode *ContextSizePair =
1992 dyn_cast<MDNode>(MIBMD->getOperand(
I));
1994 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
1997 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
2000 ContextSizeInfo.push_back({FullStackId, TotalSize});
2006 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2007 AllocNode, StackContext, CallsiteContext,
2010 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2013 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2014 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2017 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2018 CallsWithMetadata.push_back(&
I);
2022 if (!CallsWithMetadata.empty())
2023 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2027 dbgs() <<
"CCG before updating call stack chains:\n";
2032 exportToDot(
"prestackupdate");
2036 handleCallsitesWithMultipleTargets();
2039 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2040 for (
auto &Call : FuncEntry.second)
2041 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2044IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2049 for (
auto &
I : Index) {
2051 for (
auto &S :
VI.getSummaryList()) {
2060 !isPrevailing(
VI.getGUID(), S.get()))
2062 auto *
FS = dyn_cast<FunctionSummary>(S.get());
2065 std::vector<CallInfo> CallsWithMetadata;
2066 if (!
FS->allocs().empty()) {
2067 for (
auto &AN :
FS->mutableAllocs()) {
2072 if (AN.MIBs.empty())
2074 IndexCall AllocCall(&AN);
2075 CallsWithMetadata.push_back(AllocCall);
2076 auto *AllocNode = addAllocNode(AllocCall, FS);
2085 AN.ContextSizeInfos.size() == AN.MIBs.size());
2087 for (
auto &MIB : AN.MIBs) {
2090 std::vector<ContextTotalSize> ContextSizeInfo;
2091 if (!AN.ContextSizeInfos.empty()) {
2092 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2093 ContextSizeInfo.push_back({FullStackId, TotalSize});
2095 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2096 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2100 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2105 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2109 if (!
FS->callsites().empty())
2110 for (
auto &SN :
FS->mutableCallsites()) {
2111 IndexCall StackNodeCall(&SN);
2112 CallsWithMetadata.push_back(StackNodeCall);
2115 if (!CallsWithMetadata.empty())
2116 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2118 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2124 dbgs() <<
"CCG before updating call stack chains:\n";
2129 exportToDot(
"prestackupdate");
2133 handleCallsitesWithMultipleTargets();
2136template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2137void CallsiteContextGraph<DerivedCCG, FuncTy,
2138 CallTy>::handleCallsitesWithMultipleTargets() {
2153 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2154 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2164 std::vector<CallInfo> AllCalls;
2165 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2166 AllCalls.push_back(
Node->Call);
2167 AllCalls.insert(AllCalls.end(),
Node->MatchingCalls.begin(),
2168 Node->MatchingCalls.end());
2181 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2184 auto It = AllCalls.begin();
2186 for (; It != AllCalls.end(); ++It) {
2189 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2192 if (!Edge->Callee->hasCall())
2194 assert(NodeToCallingFunc.count(Edge->Callee));
2196 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2205 if (
Node->Call != ThisCall) {
2206 Node->setCall(ThisCall);
2217 Node->MatchingCalls.clear();
2220 if (It == AllCalls.end()) {
2221 RemovedEdgesWithMismatchedCallees++;
2230 for (++It; It != AllCalls.end(); ++It) {
2234 Node->MatchingCalls.push_back(ThisCall);
2243 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2244 return !it.second->hasCall() || it.second->Call != it.first;
2248 for (
auto &[Call,
Node] : NewCallToNode)
2249 NonAllocationCallToContextNodeMap[
Call] =
Node;
2253 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
2254 NonAllocationCallToContextNodeMap[
Call] =
Node;
2257template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2258bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2260 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2264 struct CallsWithSameCallee {
2265 std::vector<CallInfo> Calls;
2266 ContextNode *
Node =
nullptr;
2272 for (
auto ThisCall : AllCalls) {
2273 auto *
F = getCalleeFunc(
ThisCall.call());
2275 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2284 for (
const auto &Edge :
Node->CalleeEdges) {
2285 if (!Edge->Callee->hasCall())
2287 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2288 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2289 CalleeNodeToCallInfo[Edge->Callee] =
2290 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2296 if (CalleeNodeToCallInfo.
empty())
2308 ContextNode *UnmatchedCalleesNode =
nullptr;
2310 bool UsedOrigNode =
false;
2315 auto CalleeEdges =
Node->CalleeEdges;
2316 for (
auto &Edge : CalleeEdges) {
2317 if (!Edge->Callee->hasCall())
2322 ContextNode *CallerNodeToUse =
nullptr;
2326 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2327 if (!UnmatchedCalleesNode)
2328 UnmatchedCalleesNode =
2329 createNewNode(
false, NodeToCallingFunc[
Node]);
2330 CallerNodeToUse = UnmatchedCalleesNode;
2334 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2337 if (!UsedOrigNode) {
2340 Node->MatchingCalls.clear();
2341 UsedOrigNode =
true;
2344 createNewNode(
false, NodeToCallingFunc[
Node]);
2348 Info->Node->setCall(
Info->Calls.front());
2349 Info->Node->MatchingCalls.insert(
Info->Node->MatchingCalls.end(),
2350 Info->Calls.begin() + 1,
2355 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2357 CallerNodeToUse =
Info->Node;
2361 if (CallerNodeToUse ==
Node)
2364 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2371 for (
auto &
I : CalleeNodeToCallInfo)
2372 removeNoneTypeCallerEdges(
I.second->Node);
2373 if (UnmatchedCalleesNode)
2374 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2375 removeNoneTypeCallerEdges(
Node);
2388 return Index.getStackIdAtIndex(IdOrIndex);
2391template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2392bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2393 CallTy Call, EdgeIter &EI,
2396 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2397 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2400 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2401 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2406 if (FoundCalleeChain.empty())
2409 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
2410 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2414 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2415 Edge->ContextIds.end());
2416 CurEdge->AllocTypes |= Edge->AllocTypes;
2421 auto NewEdge = std::make_shared<ContextEdge>(
2422 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2423 Callee->CallerEdges.push_back(NewEdge);
2424 if (Caller == Edge->Caller) {
2428 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2431 "Iterator position not restored after insert and increment");
2433 Caller->CalleeEdges.push_back(NewEdge);
2438 auto *CurCalleeNode = Edge->Callee;
2439 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2440 ContextNode *NewNode =
nullptr;
2442 if (TailCallToContextNodeMap.
count(NewCall)) {
2443 NewNode = TailCallToContextNodeMap[NewCall];
2444 NewNode->AllocTypes |= Edge->AllocTypes;
2446 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2448 NewNode = createNewNode(
false, Func, NewCall);
2449 TailCallToContextNodeMap[NewCall] = NewNode;
2450 NewNode->AllocTypes = Edge->AllocTypes;
2454 AddEdge(NewNode, CurCalleeNode);
2456 CurCalleeNode = NewNode;
2460 AddEdge(Edge->Caller, CurCalleeNode);
2464 auto *
Caller = Edge->Caller;
2468 removeEdgeFromGraph(Edge.get(), &EI,
true);
2480bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2482 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2483 bool &FoundMultipleCalleeChains) {
2490 FoundCalleeChain.push_back({Callsite,
F});
2493 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2495 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2497 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2505 bool FoundSingleCalleeChain =
false;
2506 for (
auto &BB : *CalleeFunc) {
2507 for (
auto &
I : BB) {
2508 auto *CB = dyn_cast<CallBase>(&
I);
2509 if (!CB || !CB->isTailCall())
2511 auto *CalledValue = CB->getCalledOperand();
2512 auto *CalledFunction = CB->getCalledFunction();
2513 if (CalledValue && !CalledFunction) {
2514 CalledValue = CalledValue->stripPointerCasts();
2516 CalledFunction = dyn_cast<Function>(CalledValue);
2520 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2521 assert(!CalledFunction &&
2522 "Expected null called function in callsite for alias");
2523 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2525 if (!CalledFunction)
2527 if (CalledFunction == ProfiledCallee) {
2528 if (FoundSingleCalleeChain) {
2529 FoundMultipleCalleeChains =
true;
2532 FoundSingleCalleeChain =
true;
2533 FoundProfiledCalleeCount++;
2534 FoundProfiledCalleeDepth +=
Depth;
2535 if (
Depth > FoundProfiledCalleeMaxDepth)
2536 FoundProfiledCalleeMaxDepth =
Depth;
2537 SaveCallsiteInfo(&
I, CalleeFunc);
2538 }
else if (findProfiledCalleeThroughTailCalls(
2539 ProfiledCallee, CalledFunction,
Depth + 1,
2540 FoundCalleeChain, FoundMultipleCalleeChains)) {
2543 assert(!FoundMultipleCalleeChains);
2544 if (FoundSingleCalleeChain) {
2545 FoundMultipleCalleeChains =
true;
2548 FoundSingleCalleeChain =
true;
2549 SaveCallsiteInfo(&
I, CalleeFunc);
2550 }
else if (FoundMultipleCalleeChains)
2555 return FoundSingleCalleeChain;
2559 auto *CB = dyn_cast<CallBase>(Call);
2560 if (!CB->getCalledOperand() || CB->isIndirectCall())
2562 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2563 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2565 return dyn_cast<Function>(Alias->getAliasee());
2566 return dyn_cast<Function>(CalleeVal);
2569bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2571 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2572 auto *CB = dyn_cast<CallBase>(Call);
2573 if (!CB->getCalledOperand() || CB->isIndirectCall())
2575 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2576 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2577 if (CalleeFunc == Func)
2579 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2580 if (Alias && Alias->getAliasee() == Func)
2591 bool FoundMultipleCalleeChains =
false;
2592 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2594 FoundMultipleCalleeChains)) {
2595 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2596 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2597 <<
" that actually called " << CalleeVal->getName()
2598 << (FoundMultipleCalleeChains
2599 ?
" (found multiple possible chains)"
2602 if (FoundMultipleCalleeChains)
2603 FoundProfiledCalleeNonUniquelyCount++;
2610bool ModuleCallsiteContextGraph::sameCallee(
Instruction *Call1,
2612 auto *CB1 = cast<CallBase>(Call1);
2613 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2615 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2616 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2617 auto *CB2 = cast<CallBase>(Call2);
2618 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2620 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2621 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2622 return CalleeFunc1 == CalleeFunc2;
2625bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2627 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2628 bool &FoundMultipleCalleeChains) {
2637 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2638 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2641 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2644 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2645 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2652 bool FoundSingleCalleeChain =
false;
2655 !isPrevailing(CurCallee.
getGUID(), S.get()))
2657 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2660 auto FSVI = CurCallee;
2661 auto *AS = dyn_cast<AliasSummary>(S.get());
2663 FSVI = AS->getAliaseeVI();
2664 for (
auto &CallEdge :
FS->calls()) {
2665 if (!CallEdge.second.hasTailCall())
2667 if (CallEdge.first == ProfiledCallee) {
2668 if (FoundSingleCalleeChain) {
2669 FoundMultipleCalleeChains =
true;
2672 FoundSingleCalleeChain =
true;
2673 FoundProfiledCalleeCount++;
2674 FoundProfiledCalleeDepth +=
Depth;
2675 if (
Depth > FoundProfiledCalleeMaxDepth)
2676 FoundProfiledCalleeMaxDepth =
Depth;
2677 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2679 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2680 FSToVIMap[
FS] = FSVI;
2681 }
else if (findProfiledCalleeThroughTailCalls(
2682 ProfiledCallee, CallEdge.first,
Depth + 1,
2683 FoundCalleeChain, FoundMultipleCalleeChains)) {
2686 assert(!FoundMultipleCalleeChains);
2687 if (FoundSingleCalleeChain) {
2688 FoundMultipleCalleeChains =
true;
2691 FoundSingleCalleeChain =
true;
2692 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2694 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2695 FSToVIMap[
FS] = FSVI;
2696 }
else if (FoundMultipleCalleeChains)
2701 return FoundSingleCalleeChain;
2705IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2707 if (
Callee.getSummaryList().empty())
2709 return dyn_cast<FunctionSummary>(
Callee.getSummaryList()[0]->getBaseObject());
2712bool IndexCallsiteContextGraph::calleeMatchesFunc(
2715 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2720 Callee.getSummaryList().empty()
2722 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2723 assert(FSToVIMap.count(Func));
2724 auto FuncVI = FSToVIMap[
Func];
2725 if (Callee == FuncVI ||
2740 bool FoundMultipleCalleeChains =
false;
2741 if (!findProfiledCalleeThroughTailCalls(
2742 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2743 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2744 <<
" from " << FSToVIMap[CallerFunc]
2745 <<
" that actually called " << Callee
2746 << (FoundMultipleCalleeChains
2747 ?
" (found multiple possible chains)"
2750 if (FoundMultipleCalleeChains)
2751 FoundProfiledCalleeNonUniquelyCount++;
2758bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2759 ValueInfo Callee1 = dyn_cast_if_present<CallsiteInfo *>(Call1)->Callee;
2760 ValueInfo Callee2 = dyn_cast_if_present<CallsiteInfo *>(Call2)->Callee;
2761 return Callee1 == Callee2;
2764template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2765void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2771template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2772void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2774 OS <<
"Node " <<
this <<
"\n";
2778 OS <<
" (recursive)";
2780 if (!MatchingCalls.
empty()) {
2781 OS <<
"\tMatchingCalls:\n";
2782 for (
auto &MatchingCall : MatchingCalls) {
2784 MatchingCall.print(
OS);
2789 OS <<
"\tContextIds:";
2791 auto ContextIds = getContextIds();
2792 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2793 std::sort(SortedIds.begin(), SortedIds.end());
2794 for (
auto Id : SortedIds)
2797 OS <<
"\tCalleeEdges:\n";
2798 for (
auto &Edge : CalleeEdges)
2799 OS <<
"\t\t" << *Edge <<
"\n";
2800 OS <<
"\tCallerEdges:\n";
2801 for (
auto &Edge : CallerEdges)
2802 OS <<
"\t\t" << *Edge <<
"\n";
2803 if (!Clones.empty()) {
2806 for (
auto *Clone : Clones)
2809 }
else if (CloneOf) {
2810 OS <<
"\tClone of " << CloneOf <<
"\n";
2814template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2815void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2821template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2822void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2826 OS <<
" ContextIds:";
2827 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2828 std::sort(SortedIds.begin(), SortedIds.end());
2829 for (
auto Id : SortedIds)
2833template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2834void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2838template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2839void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2841 OS <<
"Callsite Context Graph:\n";
2842 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2843 for (
const auto Node : nodes<GraphType>(
this)) {
2844 if (
Node->isRemoved())
2851template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2852void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2854 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2855 for (
const auto Node : nodes<GraphType>(
this)) {
2856 if (
Node->isRemoved())
2858 if (!
Node->IsAllocation)
2861 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
2862 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2863 std::sort(SortedIds.begin(), SortedIds.end());
2864 for (
auto Id : SortedIds) {
2865 auto TypeI = ContextIdToAllocationType.find(Id);
2866 assert(TypeI != ContextIdToAllocationType.end());
2867 auto CSI = ContextIdToContextSizeInfos.find(Id);
2868 if (CSI != ContextIdToContextSizeInfos.end()) {
2869 for (
auto &Info : CSI->second) {
2870 OS <<
"MemProf hinting: "
2872 <<
" full allocation context " <<
Info.FullStackId
2873 <<
" with total size " <<
Info.TotalSize <<
" is "
2875 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
2877 <<
" due to cold byte percent";
2885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2886void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2887 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2888 for (
const auto Node : nodes<GraphType>(
this)) {
2889 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2890 for (
auto &Edge :
Node->CallerEdges)
2891 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2895template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2897 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2898 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2900 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2916 return G->NodeOwner.begin()->get();
2919 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2920 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2928 decltype(&GetCallee)>;
2939template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2944 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2950 std::string LabelString =
2951 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2952 Twine(Node->OrigStackOrAllocId))
2954 LabelString +=
"\n";
2955 if (Node->hasCall()) {
2956 auto Func =
G->NodeToCallingFunc.find(Node);
2957 assert(Func !=
G->NodeToCallingFunc.end());
2959 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2961 LabelString +=
"null call";
2962 if (Node->Recursive)
2963 LabelString +=
" (recursive)";
2965 LabelString +=
" (external)";
2971 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(Node) +
" " +
2972 getContextIds(Node->getContextIds()) +
"\"")
2975 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2976 AttributeString +=
",style=\"filled\"";
2977 if (Node->CloneOf) {
2978 AttributeString +=
",color=\"blue\"";
2979 AttributeString +=
",style=\"filled,bold,dashed\"";
2981 AttributeString +=
",style=\"filled\"";
2982 return AttributeString;
2987 auto &Edge = *(ChildIter.getCurrent());
2988 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2989 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2996 return Node->isRemoved();
3001 std::string IdString =
"ContextIds:";
3002 if (ContextIds.
size() < 100) {
3003 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3004 std::sort(SortedIds.begin(), SortedIds.end());
3005 for (
auto Id : SortedIds)
3006 IdString += (
" " +
Twine(Id)).str();
3008 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
3013 static std::string getColor(
uint8_t AllocTypes) {
3022 return "mediumorchid1";
3026 static std::string getNodeId(NodeRef
Node) {
3027 std::stringstream SStream;
3028 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
3029 std::string
Result = SStream.str();
3034template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3035void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3036 std::string Label)
const {
3041template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3042typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3043CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3044 const std::shared_ptr<ContextEdge> &Edge,
3046 ContextNode *
Node = Edge->Callee;
3048 ContextNode *Clone =
3049 createNewNode(
Node->IsAllocation, NodeToCallingFunc[
Node],
Node->Call);
3050 Node->addClone(Clone);
3051 Clone->MatchingCalls =
Node->MatchingCalls;
3052 moveEdgeToExistingCalleeClone(Edge, Clone,
true,
3057template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3058void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3059 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
3060 ContextNode *NewCallee,
bool NewClone,
3064 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3066 ContextNode *OldCallee = Edge->Callee;
3070 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3074 if (ContextIdsToMove.
empty())
3075 ContextIdsToMove = Edge->getContextIds();
3079 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3082 NewCallee->AllocTypes |= Edge->AllocTypes;
3084 if (ExistingEdgeToNewCallee) {
3087 ExistingEdgeToNewCallee->getContextIds().
insert(ContextIdsToMove.
begin(),
3088 ContextIdsToMove.
end());
3089 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3090 assert(Edge->ContextIds == ContextIdsToMove);
3091 removeEdgeFromGraph(Edge.get());
3094 Edge->Callee = NewCallee;
3095 NewCallee->CallerEdges.push_back(Edge);
3097 OldCallee->eraseCallerEdge(Edge.get());
3104 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3105 if (ExistingEdgeToNewCallee) {
3108 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
3109 ContextIdsToMove.
end());
3110 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3113 auto NewEdge = std::make_shared<ContextEdge>(
3114 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3115 Edge->Caller->CalleeEdges.push_back(NewEdge);
3116 NewCallee->CallerEdges.push_back(NewEdge);
3120 NewCallee->AllocTypes |= CallerEdgeAllocType;
3122 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3127 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3132 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3133 OldCalleeEdge->AllocTypes =
3134 computeAllocType(OldCalleeEdge->getContextIds());
3141 if (
auto *NewCalleeEdge =
3142 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3143 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3144 EdgeContextIdsToMove.
end());
3145 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3149 auto NewEdge = std::make_shared<ContextEdge>(
3150 OldCalleeEdge->Callee, NewCallee,
3151 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3152 NewCallee->CalleeEdges.push_back(NewEdge);
3153 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3157 OldCallee->AllocTypes = OldCallee->computeAllocType();
3160 OldCallee->emptyContextIds());
3162 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
3163 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
3164 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3165 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3167 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3168 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3173template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3174void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3175 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
3176 ContextNode *NewCaller) {
3178 ContextNode *OldCaller = Edge->Caller;
3179 OldCaller->eraseCalleeEdge(Edge.get());
3183 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3185 if (ExistingEdgeToNewCaller) {
3188 ExistingEdgeToNewCaller->getContextIds().insert(
3189 Edge->getContextIds().begin(), Edge->getContextIds().end());
3190 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3191 Edge->ContextIds.clear();
3193 Edge->Callee->eraseCallerEdge(Edge.get());
3196 Edge->Caller = NewCaller;
3197 NewCaller->CalleeEdges.push_back(Edge);
3202 NewCaller->AllocTypes |= Edge->AllocTypes;
3209 bool IsNewNode = NewCaller->CallerEdges.empty();
3211 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3216 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3217 OldCallerEdge->AllocTypes =
3218 computeAllocType(OldCallerEdge->getContextIds());
3223 auto *ExistingCallerEdge =
3224 NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3225 assert(IsNewNode || ExistingCallerEdge);
3226 if (ExistingCallerEdge) {
3227 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3228 EdgeContextIdsToMove.
end());
3229 ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3232 auto NewEdge = std::make_shared<ContextEdge>(
3233 NewCaller, OldCallerEdge->Caller,
3234 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3235 NewCaller->CallerEdges.push_back(NewEdge);
3236 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3240 OldCaller->AllocTypes = OldCaller->computeAllocType();
3243 OldCaller->emptyContextIds());
3245 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller,
false);
3246 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller,
false);
3247 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3248 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3250 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3251 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3257void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3258 recursivelyRemoveNoneTypeCalleeEdges(
3264 removeNoneTypeCalleeEdges(
Node);
3266 for (
auto *Clone :
Node->Clones)
3267 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3271 auto CallerEdges =
Node->CallerEdges;
3272 for (
auto &Edge : CallerEdges) {
3274 if (Edge->isRemoved()) {
3278 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3282template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3283void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3285 for (
auto &Entry : AllocationCallToContextNodeMap) {
3287 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3290 for (
auto &Entry : AllocationCallToContextNodeMap)
3291 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3304template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3305void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3309 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3317 if (!
Node->hasCall())
3332 auto CallerEdges =
Node->CallerEdges;
3333 for (
auto &Edge : CallerEdges) {
3335 if (Edge->isRemoved()) {
3340 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
3341 identifyClones(Edge->Caller, Visited, AllocContextIds);
3364 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3367 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
3368 [&](
const std::shared_ptr<ContextEdge> &
A,
3369 const std::shared_ptr<ContextEdge> &
B) {
3372 if (A->ContextIds.empty())
3378 if (B->ContextIds.empty())
3381 if (A->AllocTypes == B->AllocTypes)
3384 return *A->ContextIds.begin() < *B->ContextIds.begin();
3385 return AllocTypeCloningPriority[A->AllocTypes] <
3386 AllocTypeCloningPriority[B->AllocTypes];
3396 for (
auto &CE :
Node->CallerEdges) {
3399 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3400 for (
auto Id :
CE->getContextIds())
3401 if (!AllCallerContextIds.
insert(Id).second)
3402 RecursiveContextIds.
insert(Id);
3412 auto CallerEdges =
Node->CallerEdges;
3413 for (
auto &CallerEdge : CallerEdges) {
3421 if (!CallerEdge->Caller->hasCall())
3426 auto CallerEdgeContextsForAlloc =
3428 if (!RecursiveContextIds.
empty())
3429 CallerEdgeContextsForAlloc =
3431 if (CallerEdgeContextsForAlloc.empty())
3434 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3438 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3439 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3440 for (
auto &CalleeEdge :
Node->CalleeEdges)
3441 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3442 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3457 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
3458 allocTypeToUse(
Node->AllocTypes) &&
3459 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3460 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges))
3465 ContextNode *Clone =
nullptr;
3466 for (
auto *CurClone :
Node->Clones) {
3467 if (allocTypeToUse(CurClone->AllocTypes) !=
3468 allocTypeToUse(CallerAllocTypeForAlloc))
3475 assert(!BothSingleAlloc ||
3476 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3482 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3483 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3491 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3492 CallerEdgeContextsForAlloc);
3494 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3507 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3510void ModuleCallsiteContextGraph::updateAllocationCall(
3514 "memprof", AllocTypeString);
3515 cast<CallBase>(
Call.call())->addFnAttr(
A);
3516 OREGetter(
Call.call()->getFunction())
3518 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3520 <<
" marked with memprof allocation attribute "
3521 <<
ore::NV(
"Attribute", AllocTypeString));
3524void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
3526 auto *AI = cast<AllocInfo *>(
Call.call());
3528 assert(AI->Versions.size() >
Call.cloneNo());
3533ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3534 const auto *CB = cast<CallBase>(
Call.call());
3535 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3537 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3543IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3544 const auto *AI = cast<AllocInfo *>(
Call.call());
3545 assert(AI->Versions.size() >
Call.cloneNo());
3549void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3550 FuncInfo CalleeFunc) {
3551 if (CalleeFunc.cloneNo() > 0)
3552 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3553 OREGetter(CallerCall.call()->getFunction())
3555 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
3556 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
3557 <<
" assigned to call function clone "
3558 <<
ore::NV(
"Callee", CalleeFunc.func()));
3561void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3562 FuncInfo CalleeFunc) {
3563 auto *CI = cast<CallsiteInfo *>(CallerCall.call());
3565 "Caller cannot be an allocation which should not have profiled calls");
3566 assert(CI->Clones.size() > CallerCall.cloneNo());
3567 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3570CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
3572ModuleCallsiteContextGraph::cloneFunctionForCallsite(
3573 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3574 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3580 NewFunc->setName(
Name);
3581 for (
auto &Inst : CallsWithMetadataInFunc) {
3583 assert(Inst.cloneNo() == 0);
3584 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
3586 OREGetter(
Func.func())
3588 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
3589 return {NewFunc, CloneNo};
3593 IndexCall>::FuncInfo
3594IndexCallsiteContextGraph::cloneFunctionForCallsite(
3595 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3596 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3602 (isa<AllocInfo *>(
Call.call())
3611 for (
auto &Inst : CallsWithMetadataInFunc) {
3613 assert(Inst.cloneNo() == 0);
3614 if (
auto *AI = dyn_cast<AllocInfo *>(Inst.call())) {
3615 assert(AI->Versions.size() == CloneNo);
3618 AI->Versions.push_back(0);
3620 auto *CI = cast<CallsiteInfo *>(Inst.call());
3621 assert(CI && CI->Clones.size() == CloneNo);
3624 CI->Clones.push_back(0);
3626 CallMap[Inst] = {Inst.call(), CloneNo};
3628 return {
Func.func(), CloneNo};
3662template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3663bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3664 bool Changed =
false;
3672 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
3673 const FuncInfo &CalleeFunc) {
3675 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
3680 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3681 FuncInfo OrigFunc(Func);
3685 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3686 for (
auto &Call : CallsWithMetadata) {
3687 ContextNode *
Node = getNodeForInst(Call);
3694 "Not having a call should have prevented cloning");
3698 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3702 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3704 ContextNode *CallsiteClone,
3707 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3709 assert(FuncClonesToCallMap.count(FuncClone));
3710 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3712 if (CallMap.count(Call))
3713 CallClone = CallMap[
Call];
3714 CallsiteClone->setCall(CallClone);
3716 for (
auto &MatchingCall :
Node->MatchingCalls) {
3718 if (CallMap.count(MatchingCall))
3719 CallClone = CallMap[MatchingCall];
3721 MatchingCall = CallClone;
3728 std::deque<ContextNode *> ClonesWorklist;
3730 if (!
Node->emptyContextIds())
3731 ClonesWorklist.push_back(
Node);
3732 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3733 Node->Clones.end());
3738 unsigned NodeCloneCount = 0;
3739 while (!ClonesWorklist.empty()) {
3740 ContextNode *Clone = ClonesWorklist.front();
3741 ClonesWorklist.pop_front();
3744 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3750 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3752 if (NodeCloneCount == 1) {
3757 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3758 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3762 FuncClonesToCallMap[OrigFunc] = {};
3763 AssignCallsiteCloneToFuncClone(
3764 OrigFunc, Call, Clone,
3765 AllocationCallToContextNodeMap.count(Call));
3766 for (
auto &CE : Clone->CallerEdges) {
3768 if (!
CE->Caller->hasCall())
3770 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3780 FuncInfo PreviousAssignedFuncClone;
3782 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3783 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3785 bool CallerAssignedToCloneOfFunc =
false;
3786 if (EI != Clone->CallerEdges.end()) {
3787 const std::shared_ptr<ContextEdge> &Edge = *EI;
3788 PreviousAssignedFuncClone =
3789 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3790 CallerAssignedToCloneOfFunc =
true;
3795 std::map<CallInfo, CallInfo> NewCallMap;
3796 unsigned CloneNo = FuncClonesToCallMap.size();
3797 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3798 "should already exist in the map");
3799 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3800 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3801 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3802 FunctionClonesAnalysis++;
3808 if (!CallerAssignedToCloneOfFunc) {
3809 AssignCallsiteCloneToFuncClone(
3810 NewFuncClone, Call, Clone,
3811 AllocationCallToContextNodeMap.count(Call));
3812 for (
auto &CE : Clone->CallerEdges) {
3814 if (!
CE->Caller->hasCall())
3816 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3828 auto CallerEdges = Clone->CallerEdges;
3829 for (
auto CE : CallerEdges) {
3831 if (
CE->isRemoved()) {
3837 if (!
CE->Caller->hasCall())
3840 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3844 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3845 PreviousAssignedFuncClone)
3848 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3861 auto CalleeEdges =
CE->Caller->CalleeEdges;
3862 for (
auto CalleeEdge : CalleeEdges) {
3865 if (CalleeEdge->isRemoved()) {
3870 ContextNode *
Callee = CalleeEdge->Callee;
3874 if (Callee == Clone || !
Callee->hasCall())
3876 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3877 removeNoneTypeCalleeEdges(NewClone);
3880 removeNoneTypeCalleeEdges(Callee);
3885 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3886 RecordCalleeFuncOfCallsite(
3887 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3896 OrigCall.setCloneNo(0);
3897 std::map<CallInfo, CallInfo> &CallMap =
3898 FuncClonesToCallMap[NewFuncClone];
3899 assert(CallMap.count(OrigCall));
3900 CallInfo NewCall(CallMap[OrigCall]);
3902 NewClone->setCall(NewCall);
3904 for (
auto &MatchingCall : NewClone->MatchingCalls) {
3905 CallInfo OrigMatchingCall(MatchingCall);
3906 OrigMatchingCall.setCloneNo(0);
3907 assert(CallMap.count(OrigMatchingCall));
3908 CallInfo NewCall(CallMap[OrigMatchingCall]);
3911 MatchingCall = NewCall;
3934 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3935 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3939 auto CloneCallerEdges = Clone->CallerEdges;
3940 for (
auto &Edge : CloneCallerEdges) {
3942 if (!Edge->Caller->hasCall())
3946 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3947 FuncInfo FuncCloneCalledByCaller =
3948 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3958 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3959 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3967 (FuncCloneAssignedToCurCallsiteClone &&
3968 FuncCloneAssignedToCurCallsiteClone !=
3969 FuncCloneCalledByCaller)) {
3984 if (FuncCloneToNewCallsiteCloneMap.count(
3985 FuncCloneCalledByCaller)) {
3986 ContextNode *NewClone =
3987 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
3988 moveEdgeToExistingCalleeClone(Edge, NewClone);
3990 removeNoneTypeCalleeEdges(NewClone);
3993 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge);
3994 removeNoneTypeCalleeEdges(NewClone);
3995 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
3998 ClonesWorklist.push_back(NewClone);
4003 removeNoneTypeCalleeEdges(Clone);
4011 if (!FuncCloneAssignedToCurCallsiteClone) {
4012 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4014 AssignCallsiteCloneToFuncClone(
4015 FuncCloneCalledByCaller, Call, Clone,
4016 AllocationCallToContextNodeMap.count(Call));
4020 assert(FuncCloneAssignedToCurCallsiteClone ==
4021 FuncCloneCalledByCaller);
4030 if (!FuncCloneAssignedToCurCallsiteClone) {
4035 for (
auto &CF : FuncClonesToCallMap) {
4036 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4037 FuncCloneAssignedToCurCallsiteClone = CF.first;
4041 assert(FuncCloneAssignedToCurCallsiteClone);
4043 AssignCallsiteCloneToFuncClone(
4044 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4045 AllocationCallToContextNodeMap.count(Call));
4047 assert(FuncCloneToCurNodeCloneMap
4048 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4050 RecordCalleeFuncOfCallsite(Edge->Caller,
4051 FuncCloneAssignedToCurCallsiteClone);
4056 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
4057 for (
const auto &PE :
Node->CalleeEdges)
4058 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4059 for (
const auto &CE :
Node->CallerEdges)
4060 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4061 for (
auto *Clone :
Node->Clones) {
4062 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4063 for (
const auto &PE : Clone->CalleeEdges)
4064 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4065 for (
const auto &CE : Clone->CallerEdges)
4066 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4075 auto UpdateCalls = [&](ContextNode *
Node,
4077 auto &&UpdateCalls) {
4082 for (
auto *Clone :
Node->Clones)
4083 UpdateCalls(Clone, Visited, UpdateCalls);
4085 for (
auto &Edge :
Node->CallerEdges)
4086 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4090 if (!
Node->hasCall() ||
Node->emptyContextIds())
4093 if (
Node->IsAllocation) {
4094 auto AT = allocTypeToUse(
Node->AllocTypes);
4100 !ContextIdToContextSizeInfos.empty()) {
4103 for (
auto Id :
Node->getContextIds()) {
4104 auto TypeI = ContextIdToAllocationType.find(Id);
4105 assert(TypeI != ContextIdToAllocationType.end());
4106 auto CSI = ContextIdToContextSizeInfos.find(Id);
4107 if (CSI != ContextIdToContextSizeInfos.end()) {
4108 for (
auto &Info : CSI->second) {
4111 TotalCold +=
Info.TotalSize;
4118 updateAllocationCall(
Node->Call, AT);
4123 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
4126 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
4127 updateCall(
Node->Call, CalleeFunc);
4129 for (
auto &Call :
Node->MatchingCalls)
4130 updateCall(Call, CalleeFunc);
4139 for (
auto &Entry : AllocationCallToContextNodeMap)
4140 UpdateCalls(
Entry.second, Visited, UpdateCalls);
4154 FunctionsClonedThinBackend++;
4155 for (
unsigned I = 1;
I < NumClones;
I++) {
4156 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
4158 FunctionClonesThinBackend++;
4161 for (
auto &BB : *NewF) {
4162 for (
auto &Inst : BB) {
4163 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
4164 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
4168 auto *PrevF = M.getFunction(
Name);
4172 assert(PrevF->isDeclaration());
4173 NewF->takeName(PrevF);
4174 PrevF->replaceAllUsesWith(NewF);
4175 PrevF->eraseFromParent();
4177 NewF->setName(
Name);
4179 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
4182 if (!FuncToAliasMap.count(&
F))
4184 for (
auto *
A : FuncToAliasMap[&
F]) {
4186 auto *PrevA = M.getNamedAlias(
Name);
4188 A->getType()->getPointerAddressSpace(),
4189 A->getLinkage(),
Name, NewF);
4190 NewA->copyAttributesFrom(
A);
4194 assert(PrevA->isDeclaration());
4195 NewA->takeName(PrevA);
4196 PrevA->replaceAllUsesWith(NewA);
4197 PrevA->eraseFromParent();
4239bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
4241 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
4242 Symtab = std::make_unique<InstrProfSymtab>();
4253 if (
Error E = Symtab->create(M,
true,
false)) {
4254 std::string SymtabFailure =
toString(std::move(
E));
4255 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
4261bool MemProfContextDisambiguation::applyImport(
Module &M) {
4263 bool Changed =
false;
4268 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
4270 for (
auto &
A :
M.aliases()) {
4271 auto *Aliasee =
A.getAliaseeObject();
4272 if (
auto *
F = dyn_cast<Function>(Aliasee))
4273 FuncToAliasMap[
F].insert(&
A);
4276 if (!initializeIndirectCallPromotionInfo(M))
4286 bool ClonesCreated =
false;
4287 unsigned NumClonesCreated = 0;
4288 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
4298 if (ClonesCreated) {
4299 assert(NumClonesCreated == NumClones);
4306 ClonesCreated =
true;
4307 NumClonesCreated = NumClones;
4313 CloneFuncIfNeeded(
StackNode.Clones.size());
4320 auto CalleeOrigName = CalledFunction->getName();
4321 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
4326 auto NewF =
M.getOrInsertFunction(
4328 CalledFunction->getFunctionType());
4334 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4337 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4339 <<
" assigned to call function clone "
4340 <<
ore::NV(
"Callee", NewF.getCallee()));
4358 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
4360 "enable-import-metadata is needed to emit thinlto_src_module");
4362 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4364 if (GVS->modulePath() == SrcModule) {
4365 GVSummary = GVS.get();
4369 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4374 if (isa<AliasSummary>(GVSummary))
4377 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4379 if (
FS->allocs().empty() &&
FS->callsites().empty())
4382 auto SI =
FS->callsites().begin();
4383 auto AI =
FS->allocs().begin();
4391 for (
auto CallsiteIt =
FS->callsites().rbegin();
4392 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
4393 auto &Callsite = *CallsiteIt;
4397 if (!Callsite.StackIdIndices.empty())
4399 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
4408 for (
auto &BB :
F) {
4409 for (
auto &
I : BB) {
4410 auto *CB = dyn_cast<CallBase>(&
I);
4415 auto *CalledValue = CB->getCalledOperand();
4416 auto *CalledFunction = CB->getCalledFunction();
4417 if (CalledValue && !CalledFunction) {
4418 CalledValue = CalledValue->stripPointerCasts();
4420 CalledFunction = dyn_cast<Function>(CalledValue);
4424 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
4425 assert(!CalledFunction &&
4426 "Expected null called function in callsite for alias");
4427 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4431 I.getMetadata(LLVMContext::MD_callsite));
4432 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
4436 if (CB->getAttributes().hasFnAttr(
"memprof")) {
4438 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4439 ? AllocTypeColdThinBackend++
4440 : AllocTypeNotColdThinBackend++;
4441 OrigAllocsThinBackend++;
4442 AllocVersionsThinBackend++;
4443 if (!MaxAllocVersionsThinBackend)
4444 MaxAllocVersionsThinBackend = 1;
4451 auto &AllocNode = *(AI++);
4455 auto MIBIter = AllocNode.MIBs.begin();
4456 for (
auto &MDOp : MemProfMD->operands()) {
4457 assert(MIBIter != AllocNode.MIBs.end());
4459 MIBIter->StackIdIndices.begin();
4460 auto *MIBMD = cast<const MDNode>(MDOp);
4464 auto ContextIterBegin =
4468 (ContextIterBegin != StackContext.
end() &&
4469 *ContextIterBegin == 0)
4472 for (
auto ContextIter = ContextIterBegin;
4473 ContextIter != StackContext.
end(); ++ContextIter) {
4477 if (LastStackContextId == *ContextIter)
4479 LastStackContextId = *ContextIter;
4480 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4489 CloneFuncIfNeeded(AllocNode.Versions.size());
4491 OrigAllocsThinBackend++;
4492 AllocVersionsThinBackend += AllocNode.Versions.size();
4493 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
4494 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
4504 if (AllocNode.Versions.size() == 1 &&
4510 UnclonableAllocsThinBackend++;
4516 return Type == ((uint8_t)AllocationType::NotCold |
4517 (uint8_t)AllocationType::Cold);
4521 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
4527 : AllocTypeNotColdThinBackend++;
4539 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4542 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
4544 <<
" marked with memprof allocation attribute "
4545 <<
ore::NV(
"Attribute", AllocTypeString));
4547 }
else if (!CallsiteContext.empty()) {
4548 if (!CalledFunction) {
4551 auto *CI = dyn_cast<CallInst>(CB);
4552 assert(!CI || !CI->isInlineAsm());
4555 assert(CalledValue && !isa<Constant>(CalledValue));
4562 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
4568 CloneFuncIfNeeded(NumClones);
4573 assert(SI !=
FS->callsites().end());
4579 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
4580 for (
auto StackId : CallsiteContext) {
4588 CloneCallsite(
StackNode, CB, CalledFunction);
4590 }
else if (CB->isTailCall() && CalledFunction) {
4595 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
4596 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
4597 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
4598 CloneCallsite(Callsite->second, CB, CalledFunction);
4605 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4615 for (
auto &BB :
F) {
4616 for (
auto &
I : BB) {
4617 if (!isa<CallBase>(
I))
4619 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
4620 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
4628unsigned MemProfContextDisambiguation::recordICPInfo(
4635 auto CandidateProfileData =
4636 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4638 if (CandidateProfileData.empty())
4644 bool ICPNeeded =
false;
4645 unsigned NumClones = 0;
4646 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
4647 for (
const auto &Candidate : CandidateProfileData) {
4649 auto CalleeValueInfo =
4654 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
4661 [](
unsigned CloneNo) { return CloneNo != 0; });
4671 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
4672 TotalCount, CallsiteInfoStartIndex});
4676void MemProfContextDisambiguation::performICP(
4678 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
4687 for (
auto &
Info : ICallAnalysisInfo) {
4689 auto CallsiteIndex =
Info.CallsiteInfoStartIndex;
4690 auto TotalCount =
Info.TotalCount;
4691 unsigned NumPromoted = 0;
4692 unsigned NumClones = 0;
4694 for (
auto &Candidate :
Info.CandidateProfileData) {
4695 auto &
StackNode = AllCallsites[CallsiteIndex++];
4705 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4706 if (TargetFunction ==
nullptr ||
4715 <<
"Memprof cannot promote indirect call: target with md5sum "
4716 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
4725 const char *Reason =
nullptr;
4729 <<
"Memprof cannot promote indirect call to "
4730 <<
ore::NV(
"TargetFunction", TargetFunction)
4731 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
4743 for (
unsigned J = 0; J < NumClones; J++) {
4746 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4752 TotalCount, isSamplePGO, &ORE);
4753 auto *TargetToUse = TargetFunction;
4758 cast<Function>(
M.getOrInsertFunction(
4766 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4768 <<
" promoted and assigned to call function clone "
4769 <<
ore::NV(
"Callee", TargetToUse));
4773 TotalCount -= Candidate.Count;
4779 for (
unsigned J = 0; J < NumClones; J++) {
4782 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4784 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
4787 if (TotalCount != 0)
4790 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
4795template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4796bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
4798 dbgs() <<
"CCG before cloning:\n";
4802 exportToDot(
"postbuild");
4815 dbgs() <<
"CCG after cloning:\n";
4819 exportToDot(
"cloned");
4821 bool Changed = assignFunctions();
4824 dbgs() <<
"CCG after assigning function clones:\n";
4828 exportToDot(
"clonefuncassign");
4831 printTotalSizes(
errs());
4836bool MemProfContextDisambiguation::processModule(
4843 return applyImport(M);
4856 ModuleCallsiteContextGraph CCG(M, OREGetter);
4857 return CCG.process();
4862 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
4863 if (ImportSummary) {
4873 auto ReadSummaryFile =
4875 if (!ReadSummaryFile) {
4882 if (!ImportSummaryForTestingOrErr) {
4888 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
4889 ImportSummary = ImportSummaryForTesting.get();
4898 if (!processModule(M, OREGetter))
4915 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static bool isMemProfClone(const Function &F)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(false), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
const char * toString(DWARFSectionKind Kind)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
static SimpleType getSimplifiedValue(IndexCall &Val)
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...