48#include <unordered_map>
53#define DEBUG_TYPE "memprof-context-disambiguation"
56 "Number of function clones created during whole program analysis");
58 "Number of function clones created during ThinLTO backend");
60 "Number of functions that had clones created during ThinLTO backend");
61STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
62 "cloned) during whole program analysis");
63STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
64 "during whole program analysis");
66 "Number of not cold static allocations (possibly cloned) during "
68STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
69 "(possibly cloned) during ThinLTO backend");
71 "Number of original (not cloned) allocations with memprof profiles "
72 "during ThinLTO backend");
74 AllocVersionsThinBackend,
75 "Number of allocation versions (including clones) during ThinLTO backend");
77 "Maximum number of allocation versions created for an original "
78 "allocation during ThinLTO backend");
80 "Number of unclonable ambigous allocations during ThinLTO backend");
82 "Number of edges removed due to mismatched callees (profiled vs IR)");
84 "Number of profiled callees found via tail calls");
86 "Aggregate depth of profiled callees found via tail calls");
88 "Maximum depth of profiled callees found via tail calls");
90 "Number of profiled callees found via multiple tail call chains");
95 cl::desc(
"Specify the path prefix of the MemProf dot files."));
99 cl::desc(
"Export graph to dot files."));
103 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
107 cl::desc(
"Perform verification checks on CallingContextGraph."));
111 cl::desc(
"Perform frequent verification checks on nodes."));
114 "memprof-import-summary",
115 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
121 cl::desc(
"Max depth to recursively search for missing "
122 "frames through tail calls."));
133 cl::desc(
"Linking with hot/cold operator new interfaces"));
151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
152class CallsiteContextGraph {
154 CallsiteContextGraph() =
default;
155 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
156 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
163 void identifyClones();
170 bool assignFunctions();
176 const CallsiteContextGraph &CCG) {
182 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
184 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
186 void exportToDot(std::string Label)
const;
189 struct FuncInfo final
190 :
public std::pair<FuncTy *, unsigned > {
191 using Base = std::pair<FuncTy *, unsigned>;
192 FuncInfo(
const Base &
B) :
Base(
B) {}
193 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
194 explicit operator bool()
const {
return this->first !=
nullptr; }
195 FuncTy *func()
const {
return this->first; }
196 unsigned cloneNo()
const {
return this->second; }
200 struct CallInfo final :
public std::pair<CallTy, unsigned > {
201 using Base = std::pair<CallTy, unsigned>;
203 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
205 explicit operator bool()
const {
return (
bool)this->first; }
206 CallTy call()
const {
return this->first; }
207 unsigned cloneNo()
const {
return this->second; }
208 void setCloneNo(
unsigned N) { this->second =
N; }
210 if (!
operator bool()) {
216 OS <<
"\t(clone " << cloneNo() <<
")";
239 bool Recursive =
false;
255 uint8_t AllocTypes = 0;
259 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
263 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
269 std::vector<ContextNode *> Clones;
272 ContextNode *CloneOf =
nullptr;
274 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
276 ContextNode(
bool IsAllocation,
CallInfo C)
277 : IsAllocation(IsAllocation),
Call(
C) {}
279 void addClone(ContextNode *Clone) {
281 CloneOf->Clones.push_back(Clone);
282 Clone->CloneOf = CloneOf;
284 Clones.push_back(Clone);
286 Clone->CloneOf =
this;
290 ContextNode *getOrigNode() {
297 unsigned int ContextId);
299 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
300 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
301 void eraseCalleeEdge(
const ContextEdge *Edge);
302 void eraseCallerEdge(
const ContextEdge *Edge);
306 bool hasCall()
const {
return (
bool)
Call.call(); }
312 bool isRemoved()
const {
315 if (ContextIds.
empty())
316 assert(CalleeEdges.empty() && CallerEdges.empty() &&
317 "Context ids empty but at least one of callee and caller edges "
319 return ContextIds.
empty();
339 uint8_t AllocTypes = 0;
344 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
347 ContextIds(ContextIds) {}
362 void removeNoneTypeCalleeEdges(ContextNode *
Node);
364 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
370 template <
class NodeT,
class IteratorT>
371 std::vector<uint64_t>
376 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
379 template <
class NodeT,
class IteratorT>
380 void addStackNodesForMIB(ContextNode *AllocNode,
388 void updateStackNodes();
392 void handleCallsitesWithMultipleTargets();
399 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
402 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
404 using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
413 void assignStackNodesPostOrder(
425 void propagateDuplicateContextIds(
431 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
438 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
449 calleesMatch(CallTy Call, EdgeIter &EI,
455 bool calleeMatchesFunc(
456 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
457 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
458 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
459 Call, Func, CallerFunc, FoundCalleeChain);
464 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
465 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
470 uint64_t getLastStackId(CallTy Call) {
471 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
476 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
477 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
482 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
483 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
489 FuncInfo cloneFunctionForCallsite(
490 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
491 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
492 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
493 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
498 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
499 unsigned CloneNo)
const {
500 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
504 ContextNode *getNodeForInst(
const CallInfo &
C);
505 ContextNode *getNodeForAlloc(
const CallInfo &
C);
506 ContextNode *getNodeForStackId(
uint64_t StackId);
509 void unsetNodeForInst(
const CallInfo &
C);
532 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
533 EdgeIter *CallerEdgeI =
nullptr,
541 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
542 ContextNode *NewCallee,
543 EdgeIter *CallerEdgeI =
nullptr,
544 bool NewClone =
false,
555 std::map<uint32_t, AllocationType> ContextIdToAllocationType;
561 std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
568 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
574 unsigned int LastContextId = 0;
577template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
579 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
580template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
582 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
583template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
585 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
586template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
588 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
591class ModuleCallsiteContextGraph
592 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
595 ModuleCallsiteContextGraph(
600 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
604 bool calleeMatchesFunc(
606 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
607 bool findProfiledCalleeThroughTailCalls(
609 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
610 bool &FoundMultipleCalleeChains);
612 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
614 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
615 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
617 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
618 std::map<CallInfo, CallInfo> &CallMap,
619 std::vector<CallInfo> &CallsWithMetadataInFunc,
622 unsigned CloneNo)
const;
631struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
633 IndexCall(std::nullptr_t) : IndexCall() {}
638 IndexCall *operator->() {
return this; }
643 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
646 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
654class IndexCallsiteContextGraph
655 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
658 IndexCallsiteContextGraph(
663 ~IndexCallsiteContextGraph() {
668 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
670 for (
auto &Callsite :
I.second)
671 FS->addCallsite(*Callsite.second);
676 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
680 bool calleeMatchesFunc(
683 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
684 bool findProfiledCalleeThroughTailCalls(
686 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
687 bool &FoundMultipleCalleeChains);
688 uint64_t getLastStackId(IndexCall &Call);
689 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
691 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
694 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
695 std::map<CallInfo, CallInfo> &CallMap,
696 std::vector<CallInfo> &CallsWithMetadataInFunc,
698 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
699 unsigned CloneNo)
const;
703 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
714 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
715 FunctionCalleesToSynthesizedCallsiteInfos;
727 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
730 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
735struct FieldSeparator {
739 FieldSeparator(
const char *Sep =
", ") : Sep(Sep) {}
756 assert(AllocTypes != (uint8_t)AllocationType::None);
758 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
759 return AllocationType::NotCold;
767template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
769 const std::vector<uint8_t> &InAllocTypes,
770 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
773 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
775 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
779 if (l == (uint8_t)AllocationType::None ||
780 r->AllocTypes == (uint8_t)AllocationType::None)
782 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
788template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
789typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
790CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
792 ContextNode *
Node = getNodeForAlloc(
C);
796 return NonAllocationCallToContextNodeMap.lookup(
C);
799template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
800typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
801CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
803 return AllocationCallToContextNodeMap.lookup(
C);
806template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
807typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
808CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
810 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
811 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
812 return StackEntryNode->second;
816template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
817void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst(
819 AllocationCallToContextNodeMap.erase(
C) ||
820 NonAllocationCallToContextNodeMap.erase(
C);
821 assert(!AllocationCallToContextNodeMap.count(
C) &&
822 !NonAllocationCallToContextNodeMap.count(
C));
825template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
826void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
828 unsigned int ContextId) {
829 for (
auto &Edge : CallerEdges) {
830 if (Edge->Caller == Caller) {
832 Edge->getContextIds().insert(ContextId);
836 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
838 CallerEdges.push_back(Edge);
839 Caller->CalleeEdges.push_back(Edge);
842template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
843void CallsiteContextGraph<
844 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
845 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
847 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
848 assert(Edge->ContextIds.empty());
849 Edge->Callee->eraseCallerEdge(Edge.get());
850 EI =
Node->CalleeEdges.erase(EI);
856template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
857typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
858CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
859 findEdgeFromCallee(
const ContextNode *Callee) {
860 for (
const auto &Edge : CalleeEdges)
861 if (Edge->Callee == Callee)
866template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
867typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
868CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
869 findEdgeFromCaller(
const ContextNode *Caller) {
870 for (
const auto &Edge : CallerEdges)
871 if (Edge->Caller == Caller)
876template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
877void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
878 eraseCalleeEdge(
const ContextEdge *Edge) {
880 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
881 return CalleeEdge.get() == Edge;
883 assert(EI != CalleeEdges.end());
884 CalleeEdges.erase(EI);
887template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
888void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
889 eraseCallerEdge(
const ContextEdge *Edge) {
891 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
892 return CallerEdge.get() == Edge;
894 assert(EI != CallerEdges.end());
895 CallerEdges.erase(EI);
898template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
899uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
902 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
903 uint8_t
AllocType = (uint8_t)AllocationType::None;
904 for (
auto Id : ContextIds) {
905 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
913template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
915CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
918 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
919 uint8_t
AllocType = (uint8_t)AllocationType::None;
920 for (
auto Id : Node1Ids) {
921 if (!Node2Ids.
count(Id))
923 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
931template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
932uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
934 if (Node1Ids.
size() < Node2Ids.
size())
935 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
937 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
940template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
941typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
942CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
944 assert(!getNodeForAlloc(Call));
946 std::make_unique<ContextNode>(
true, Call));
947 ContextNode *AllocNode = NodeOwner.back().get();
948 AllocationCallToContextNodeMap[
Call] = AllocNode;
949 NodeToCallingFunc[AllocNode] =
F;
951 AllocNode->OrigStackOrAllocId = LastContextId;
954 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
959template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
960template <
class NodeT,
class IteratorT>
961void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
969 ContextIdToAllocationType[++LastContextId] =
AllocType;
972 AllocNode->AllocTypes |= (uint8_t)
AllocType;
973 AllocNode->ContextIds.insert(LastContextId);
978 ContextNode *PrevNode = AllocNode;
985 ContextIter != StackContext.
end(); ++ContextIter) {
986 auto StackId = getStackId(*ContextIter);
987 ContextNode *StackNode = getNodeForStackId(StackId);
990 std::make_unique<ContextNode>(
false));
991 StackNode = NodeOwner.back().get();
992 StackEntryIdToContextNodeMap[StackId] = StackNode;
993 StackNode->OrigStackOrAllocId = StackId;
997 StackNode->Recursive =
true;
998 StackNode->ContextIds.insert(LastContextId);
999 StackNode->AllocTypes |= (uint8_t)
AllocType;
1000 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1001 PrevNode = StackNode;
1005template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1007CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1011 for (
auto OldId : StackSequenceContextIds) {
1012 NewContextIds.
insert(++LastContextId);
1013 OldToNewContextIds[OldId].insert(LastContextId);
1014 assert(ContextIdToAllocationType.count(OldId));
1016 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1018 return NewContextIds;
1021template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1022void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1023 propagateDuplicateContextIds(
1028 for (
auto Id : ContextIds)
1029 if (
auto NewId = OldToNewContextIds.find(Id);
1030 NewId != OldToNewContextIds.end())
1031 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1036 auto UpdateCallers = [&](ContextNode *
Node,
1038 auto &&UpdateCallers) ->
void {
1039 for (
const auto &Edge :
Node->CallerEdges) {
1040 auto Inserted = Visited.insert(Edge.get());
1043 ContextNode *NextNode = Edge->Caller;
1047 if (!NewIdsToAdd.
empty()) {
1048 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1049 NextNode->ContextIds.insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1050 UpdateCallers(NextNode, Visited, UpdateCallers);
1056 for (
auto &Entry : AllocationCallToContextNodeMap) {
1057 auto *
Node = Entry.second;
1062 Node->ContextIds.insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1063 UpdateCallers(
Node, Visited, UpdateCallers);
1067template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1068void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1069 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee) {
1074 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1076 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1082 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1083 NotFoundContextIds);
1084 RemainingContextIds.
swap(NotFoundContextIds);
1086 if (NewEdgeContextIds.
empty()) {
1090 if (TowardsCallee) {
1091 auto NewEdge = std::make_shared<ContextEdge>(
1092 Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds),
1094 NewNode->CalleeEdges.push_back(NewEdge);
1095 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1097 auto NewEdge = std::make_shared<ContextEdge>(
1098 NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds),
1100 NewNode->CallerEdges.push_back(NewEdge);
1101 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1104 if (Edge->getContextIds().empty()) {
1105 if (TowardsCallee) {
1106 Edge->Callee->eraseCallerEdge(Edge.get());
1107 EI = OrigNode->CalleeEdges.erase(EI);
1109 Edge->Caller->eraseCalleeEdge(Edge.get());
1110 EI = OrigNode->CallerEdges.erase(EI);
1118template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1119void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1120 assignStackNodesPostOrder(ContextNode *
Node,
1123 &StackIdToMatchingCalls) {
1131 auto CallerEdges =
Node->CallerEdges;
1132 for (
auto &Edge : CallerEdges) {
1136 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1145 if (
Node->IsAllocation ||
1146 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1149 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1153 if (Calls.size() == 1) {
1154 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1155 if (Ids.size() == 1) {
1156 assert(SavedContextIds.empty());
1159 if (
Node->Recursive)
1161 Node->setCall(Call);
1162 NonAllocationCallToContextNodeMap[
Call] =
Node;
1171 ContextNode *LastNode = getNodeForStackId(LastId);
1175 for (
unsigned I = 0;
I < Calls.size();
I++) {
1176 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1179 if (SavedContextIds.empty())
1182 assert(LastId == Ids.back());
1184 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1194 ContextNode *PrevNode =
nullptr;
1195 for (
auto Id : Ids) {
1196 ContextNode *CurNode = getNodeForStackId(Id);
1200 assert(!CurNode->Recursive);
1205 auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1207 SavedContextIds.clear();
1214 if (SavedContextIds.empty())
1217 if (SavedContextIds.empty())
1221 NodeOwner.push_back(
1222 std::make_unique<ContextNode>(
false, Call));
1223 ContextNode *NewNode = NodeOwner.back().get();
1224 NodeToCallingFunc[NewNode] =
Func;
1225 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1226 NewNode->ContextIds = SavedContextIds;
1227 NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
1232 connectNewNode(NewNode, FirstNode,
true);
1237 connectNewNode(NewNode, LastNode,
false);
1242 for (
auto Id : Ids) {
1243 ContextNode *CurNode = getNodeForStackId(Id);
1249 set_subtract(CurNode->ContextIds, NewNode->ContextIds);
1251 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1253 set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
1254 if (PrevEdge->getContextIds().empty()) {
1255 PrevNode->eraseCallerEdge(PrevEdge);
1256 CurNode->eraseCalleeEdge(PrevEdge);
1264template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1265void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1274 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1275 for (
auto &Call : CallsWithMetadata) {
1277 if (AllocationCallToContextNodeMap.count(Call))
1279 auto StackIdsWithContextNodes =
1280 getStackIdsWithContextNodesForCall(
Call.call());
1283 if (StackIdsWithContextNodes.empty())
1287 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1288 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1299 for (
auto &It : StackIdToMatchingCalls) {
1300 auto &Calls = It.getSecond();
1302 if (Calls.size() == 1) {
1303 auto &Ids = std::get<1>(Calls[0]);
1304 if (Ids.size() == 1)
1313 std::stable_sort(Calls.begin(), Calls.end(),
1314 [](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1315 auto &IdsA = std::get<1>(A);
1316 auto &IdsB = std::get<1>(B);
1317 return IdsA.size() > IdsB.size() ||
1318 (IdsA.size() == IdsB.size() && IdsA < IdsB);
1325 ContextNode *LastNode = getNodeForStackId(LastId);
1329 if (LastNode->Recursive)
1337 for (
unsigned I = 0;
I < Calls.size();
I++) {
1338 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1339 assert(SavedContextIds.empty());
1340 assert(LastId == Ids.back());
1348 ContextNode *PrevNode = LastNode;
1349 ContextNode *CurNode = LastNode;
1354 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1356 CurNode = getNodeForStackId(Id);
1360 if (CurNode->Recursive) {
1365 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1383 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1386 if (StackSequenceContextIds.
empty()) {
1399 if (Ids.back() != getLastStackId(Call)) {
1400 for (
const auto &PE : LastNode->CallerEdges) {
1401 set_subtract(StackSequenceContextIds, PE->getContextIds());
1402 if (StackSequenceContextIds.
empty())
1406 if (StackSequenceContextIds.
empty())
1412 bool DuplicateContextIds =
false;
1413 if (
I + 1 < Calls.size()) {
1414 auto NextIds = std::get<1>(Calls[
I + 1]);
1415 DuplicateContextIds = Ids == NextIds;
1424 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1425 StackSequenceContextIds.
size());
1428 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1429 : StackSequenceContextIds;
1430 assert(!SavedContextIds.empty());
1432 if (!DuplicateContextIds) {
1436 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1437 if (LastNodeContextIds.
empty())
1444 propagateDuplicateContextIds(OldToNewContextIds);
1455 for (
auto &Entry : AllocationCallToContextNodeMap)
1456 assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
1461 Call->getMetadata(LLVMContext::MD_callsite));
1462 return CallsiteContext.
back();
1465uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1468 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1470 return Index.getStackIdAtIndex(CallsiteContext.
back());
1483std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1485 unsigned CloneNo)
const {
1486 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1487 cast<CallBase>(Call)->getCalledFunction()->getName())
1491std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1492 const IndexCall &Call,
1493 unsigned CloneNo)
const {
1494 auto VI = FSToVIMap.find(Func);
1495 assert(VI != FSToVIMap.end());
1496 if (isa<AllocInfo *>(
Call.getBase()))
1497 return (
VI->second.name() +
" -> alloc").str();
1499 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1500 return (
VI->second.name() +
" -> " +
1502 Callsite->Clones[CloneNo]))
1507std::vector<uint64_t>
1508ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1511 Call->getMetadata(LLVMContext::MD_callsite));
1512 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1516std::vector<uint64_t>
1517IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1520 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1526template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1527template <
class NodeT,
class IteratorT>
1528std::vector<uint64_t>
1529CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1531 std::vector<uint64_t> StackIds;
1532 for (
auto IdOrIndex : CallsiteContext) {
1533 auto StackId = getStackId(IdOrIndex);
1534 ContextNode *
Node = getNodeForStackId(StackId);
1537 StackIds.push_back(StackId);
1542ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1545 :
Mod(
M), OREGetter(OREGetter) {
1547 std::vector<CallInfo> CallsWithMetadata;
1548 for (
auto &BB :
F) {
1549 for (
auto &
I : BB) {
1550 if (!isa<CallBase>(
I))
1552 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1553 CallsWithMetadata.push_back(&
I);
1554 auto *AllocNode = addAllocNode(&
I, &
F);
1555 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1559 for (
auto &MDOp : MemProfMD->operands()) {
1560 auto *MIBMD = cast<const MDNode>(MDOp);
1564 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1565 AllocNode, StackContext, CallsiteContext,
1568 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1571 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
1572 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
1575 else if (
I.getMetadata(LLVMContext::MD_callsite))
1576 CallsWithMetadata.push_back(&
I);
1579 if (!CallsWithMetadata.empty())
1580 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
1584 dbgs() <<
"CCG before updating call stack chains:\n";
1589 exportToDot(
"prestackupdate");
1593 handleCallsitesWithMultipleTargets();
1596 for (
auto &FuncEntry : FuncToCallsWithMetadata)
1597 for (
auto &Call : FuncEntry.second)
1598 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
1601IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1608 for (
auto &S :
VI.getSummaryList()) {
1617 !isPrevailing(
VI.getGUID(), S.get()))
1619 auto *
FS = dyn_cast<FunctionSummary>(S.get());
1622 std::vector<CallInfo> CallsWithMetadata;
1623 if (!
FS->allocs().empty()) {
1624 for (
auto &AN :
FS->mutableAllocs()) {
1629 if (AN.MIBs.empty())
1631 CallsWithMetadata.push_back({&AN});
1632 auto *AllocNode = addAllocNode({&AN},
FS);
1639 for (
auto &MIB : AN.MIBs) {
1642 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1643 AllocNode, StackContext, EmptyContext, MIB.AllocType);
1645 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1650 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1654 if (!
FS->callsites().empty())
1655 for (
auto &SN :
FS->mutableCallsites())
1656 CallsWithMetadata.push_back({&SN});
1658 if (!CallsWithMetadata.empty())
1659 FuncToCallsWithMetadata[FS] = CallsWithMetadata;
1661 if (!
FS->allocs().empty() || !
FS->callsites().empty())
1667 dbgs() <<
"CCG before updating call stack chains:\n";
1672 exportToDot(
"prestackupdate");
1676 handleCallsitesWithMultipleTargets();
1679template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1680void CallsiteContextGraph<DerivedCCG, FuncTy,
1681 CallTy>::handleCallsitesWithMultipleTargets() {
1696 for (
auto Entry = NonAllocationCallToContextNodeMap.begin();
1697 Entry != NonAllocationCallToContextNodeMap.end();) {
1698 auto *
Node = Entry->second;
1701 bool Removed =
false;
1703 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1705 if (!Edge->Callee->hasCall()) {
1709 assert(NodeToCallingFunc.count(Edge->Callee));
1711 if (calleesMatch(Call, EI, TailCallToContextNodeMap))
1713 RemovedEdgesWithMismatchedCallees++;
1717 Entry = NonAllocationCallToContextNodeMap.erase(Entry);
1728 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
1729 NonAllocationCallToContextNodeMap[
Call] =
Node;
1740 return Index.getStackIdAtIndex(IdOrIndex);
1743template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1744bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
1745 CallTy Call, EdgeIter &EI,
1748 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
1749 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
1752 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
1753 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
1754 FoundCalleeChain)) {
1760 if (FoundCalleeChain.empty()) {
1765 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
1766 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
1770 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
1771 Edge->ContextIds.end());
1772 CurEdge->AllocTypes |= Edge->AllocTypes;
1777 auto NewEdge = std::make_shared<ContextEdge>(
1778 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
1779 Callee->CallerEdges.push_back(NewEdge);
1780 if (Caller == Edge->Caller) {
1784 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
1787 "Iterator position not restored after insert and increment");
1789 Caller->CalleeEdges.push_back(NewEdge);
1794 auto *CurCalleeNode = Edge->Callee;
1795 for (
auto &[NewCall, Func] : FoundCalleeChain) {
1796 ContextNode *NewNode =
nullptr;
1798 if (TailCallToContextNodeMap.
count(NewCall)) {
1799 NewNode = TailCallToContextNodeMap[NewCall];
1800 NewNode->ContextIds.
insert(Edge->ContextIds.begin(),
1801 Edge->ContextIds.end());
1802 NewNode->AllocTypes |= Edge->AllocTypes;
1804 FuncToCallsWithMetadata[
Func].push_back({NewCall});
1806 NodeOwner.push_back(
1807 std::make_unique<ContextNode>(
false, NewCall));
1808 NewNode = NodeOwner.back().get();
1809 NodeToCallingFunc[NewNode] =
Func;
1810 TailCallToContextNodeMap[NewCall] = NewNode;
1811 NewNode->ContextIds = Edge->ContextIds;
1812 NewNode->AllocTypes = Edge->AllocTypes;
1816 AddEdge(NewNode, CurCalleeNode);
1818 CurCalleeNode = NewNode;
1822 AddEdge(Edge->Caller, CurCalleeNode);
1825 Edge->Callee->eraseCallerEdge(Edge.get());
1826 EI = Edge->Caller->CalleeEdges.erase(EI);
1831bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
1833 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
1834 bool &FoundMultipleCalleeChains) {
1841 FoundCalleeChain.push_back({Callsite,
F});
1844 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
1846 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
1848 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
1856 bool FoundSingleCalleeChain =
false;
1857 for (
auto &BB : *CalleeFunc) {
1858 for (
auto &
I : BB) {
1859 auto *CB = dyn_cast<CallBase>(&
I);
1860 if (!CB || !CB->isTailCall())
1862 auto *CalledValue = CB->getCalledOperand();
1863 auto *CalledFunction = CB->getCalledFunction();
1864 if (CalledValue && !CalledFunction) {
1865 CalledValue = CalledValue->stripPointerCasts();
1867 CalledFunction = dyn_cast<Function>(CalledValue);
1871 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
1872 assert(!CalledFunction &&
1873 "Expected null called function in callsite for alias");
1874 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
1876 if (!CalledFunction)
1878 if (CalledFunction == ProfiledCallee) {
1879 if (FoundSingleCalleeChain) {
1880 FoundMultipleCalleeChains =
true;
1883 FoundSingleCalleeChain =
true;
1884 FoundProfiledCalleeCount++;
1885 FoundProfiledCalleeDepth +=
Depth;
1886 if (
Depth > FoundProfiledCalleeMaxDepth)
1887 FoundProfiledCalleeMaxDepth =
Depth;
1888 SaveCallsiteInfo(&
I, CalleeFunc);
1889 }
else if (findProfiledCalleeThroughTailCalls(
1890 ProfiledCallee, CalledFunction,
Depth + 1,
1891 FoundCalleeChain, FoundMultipleCalleeChains)) {
1892 if (FoundMultipleCalleeChains)
1894 if (FoundSingleCalleeChain) {
1895 FoundMultipleCalleeChains =
true;
1898 FoundSingleCalleeChain =
true;
1899 SaveCallsiteInfo(&
I, CalleeFunc);
1904 return FoundSingleCalleeChain;
1907bool ModuleCallsiteContextGraph::calleeMatchesFunc(
1909 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
1910 auto *CB = dyn_cast<CallBase>(Call);
1911 if (!CB->getCalledOperand())
1913 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
1914 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
1915 if (CalleeFunc == Func)
1917 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
1918 if (Alias && Alias->getAliasee() == Func)
1929 bool FoundMultipleCalleeChains =
false;
1930 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
1932 FoundMultipleCalleeChains)) {
1933 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
1934 <<
Func->getName() <<
" from " << CallerFunc->
getName()
1935 <<
" that actually called " << CalleeVal->getName()
1936 << (FoundMultipleCalleeChains
1937 ?
" (found multiple possible chains)"
1940 if (FoundMultipleCalleeChains)
1941 FoundProfiledCalleeNonUniquelyCount++;
1948bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
1950 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1951 bool &FoundMultipleCalleeChains) {
1960 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
1961 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
1964 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
1967 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
1968 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
1975 bool FoundSingleCalleeChain =
false;
1978 !isPrevailing(CurCallee.
getGUID(), S.get()))
1980 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
1983 auto FSVI = CurCallee;
1984 auto *AS = dyn_cast<AliasSummary>(S.get());
1986 FSVI = AS->getAliaseeVI();
1987 for (
auto &CallEdge :
FS->calls()) {
1988 if (!CallEdge.second.hasTailCall())
1990 if (CallEdge.first == ProfiledCallee) {
1991 if (FoundSingleCalleeChain) {
1992 FoundMultipleCalleeChains =
true;
1995 FoundSingleCalleeChain =
true;
1996 FoundProfiledCalleeCount++;
1997 FoundProfiledCalleeDepth +=
Depth;
1998 if (
Depth > FoundProfiledCalleeMaxDepth)
1999 FoundProfiledCalleeMaxDepth =
Depth;
2000 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2002 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2003 FSToVIMap[
FS] = FSVI;
2004 }
else if (findProfiledCalleeThroughTailCalls(
2005 ProfiledCallee, CallEdge.first,
Depth + 1,
2006 FoundCalleeChain, FoundMultipleCalleeChains)) {
2007 if (FoundMultipleCalleeChains)
2009 if (FoundSingleCalleeChain) {
2010 FoundMultipleCalleeChains =
true;
2013 FoundSingleCalleeChain =
true;
2014 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2016 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2017 FSToVIMap[
FS] = FSVI;
2022 return FoundSingleCalleeChain;
2025bool IndexCallsiteContextGraph::calleeMatchesFunc(
2028 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2030 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2034 Callee.getSummaryList().empty()
2036 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2037 assert(FSToVIMap.count(Func));
2038 auto FuncVI = FSToVIMap[
Func];
2039 if (Callee == FuncVI ||
2054 bool FoundMultipleCalleeChains =
false;
2055 if (!findProfiledCalleeThroughTailCalls(
2056 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2057 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2058 <<
" from " << FSToVIMap[CallerFunc]
2059 <<
" that actually called " << Callee
2060 << (FoundMultipleCalleeChains
2061 ?
" (found multiple possible chains)"
2064 if (FoundMultipleCalleeChains)
2065 FoundProfiledCalleeNonUniquelyCount++;
2076 if (AllocTypes & (uint8_t)AllocationType::NotCold)
2078 if (AllocTypes & (uint8_t)AllocationType::Cold)
2083template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2084void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2090template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2091void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2093 OS <<
"Node " <<
this <<
"\n";
2097 OS <<
" (recursive)";
2100 OS <<
"\tContextIds:";
2101 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2102 std::sort(SortedIds.begin(), SortedIds.end());
2103 for (
auto Id : SortedIds)
2106 OS <<
"\tCalleeEdges:\n";
2107 for (
auto &Edge : CalleeEdges)
2108 OS <<
"\t\t" << *Edge <<
"\n";
2109 OS <<
"\tCallerEdges:\n";
2110 for (
auto &Edge : CallerEdges)
2111 OS <<
"\t\t" << *Edge <<
"\n";
2112 if (!Clones.empty()) {
2115 for (
auto *Clone : Clones)
2118 }
else if (CloneOf) {
2119 OS <<
"\tClone of " << CloneOf <<
"\n";
2123template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2124void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2130template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2131void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2135 OS <<
" ContextIds:";
2136 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2137 std::sort(SortedIds.begin(), SortedIds.end());
2138 for (
auto Id : SortedIds)
2142template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2143void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2147template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2148void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2150 OS <<
"Callsite Context Graph:\n";
2151 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2152 for (
const auto Node : nodes<GraphType>(
this)) {
2153 if (
Node->isRemoved())
2160template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2162 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
2165 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
2166 assert(!Edge->ContextIds.empty());
2169template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2171 bool CheckEdges =
true) {
2172 if (
Node->isRemoved())
2176 if (
Node->CallerEdges.size()) {
2177 auto EI =
Node->CallerEdges.begin();
2178 auto &FirstEdge = *EI;
2181 for (; EI !=
Node->CallerEdges.end(); EI++) {
2182 const auto &Edge = *EI;
2184 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2185 set_union(CallerEdgeContextIds, Edge->ContextIds);
2189 assert(
Node->ContextIds == CallerEdgeContextIds ||
2192 if (
Node->CalleeEdges.size()) {
2193 auto EI =
Node->CalleeEdges.begin();
2194 auto &FirstEdge = *EI;
2197 for (; EI !=
Node->CalleeEdges.end(); EI++) {
2198 const auto &Edge = *EI;
2200 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2201 set_union(CalleeEdgeContextIds, Edge->ContextIds);
2203 assert(
Node->ContextIds == CalleeEdgeContextIds);
2207template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2208void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2209 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2210 for (
const auto Node : nodes<GraphType>(
this)) {
2211 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2212 for (
auto &Edge :
Node->CallerEdges)
2213 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2217template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2219 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2220 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2222 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2227 decltype(&getNode)>;
2238 return G->NodeOwner.begin()->get();
2241 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2242 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2250 decltype(&GetCallee)>;
2261template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2266 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2272 std::string LabelString =
2273 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2274 Twine(Node->OrigStackOrAllocId))
2276 LabelString +=
"\n";
2277 if (Node->hasCall()) {
2278 auto Func =
G->NodeToCallingFunc.find(Node);
2279 assert(Func !=
G->NodeToCallingFunc.end());
2281 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2283 LabelString +=
"null call";
2284 if (Node->Recursive)
2285 LabelString +=
" (recursive)";
2287 LabelString +=
" (external)";
2294 getContextIds(Node->ContextIds) +
"\"")
2297 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2299 if (Node->CloneOf) {
2309 auto &Edge = *(ChildIter.getCurrent());
2310 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2311 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2318 return Node->isRemoved();
2323 std::string IdString =
"ContextIds:";
2324 if (ContextIds.
size() < 100) {
2325 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2326 std::sort(SortedIds.begin(), SortedIds.end());
2327 for (
auto Id : SortedIds)
2328 IdString += (
" " +
Twine(Id)).str();
2330 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
2335 static std::string getColor(uint8_t AllocTypes) {
2344 return "mediumorchid1";
2348 static std::string getNodeId(NodeRef
Node) {
2349 std::stringstream SStream;
2350 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
2351 std::string
Result = SStream.str();
2356template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2357void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
2358 std::string Label)
const {
2363template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2364typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
2365CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2366 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
2368 ContextNode *
Node = Edge->Callee;
2369 NodeOwner.push_back(
2370 std::make_unique<ContextNode>(
Node->IsAllocation,
Node->Call));
2371 ContextNode *Clone = NodeOwner.back().get();
2372 Node->addClone(Clone);
2374 NodeToCallingFunc[Clone] = NodeToCallingFunc[
Node];
2375 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
2380template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2381void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2382 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
2383 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
2388 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
2390 ContextNode *OldCallee = Edge->Callee;
2394 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
2398 if (ContextIdsToMove.
empty())
2399 ContextIdsToMove = Edge->getContextIds();
2403 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
2406 *CallerEdgeI = OldCallee->CallerEdges.
erase(*CallerEdgeI);
2408 OldCallee->eraseCallerEdge(Edge.get());
2409 if (ExistingEdgeToNewCallee) {
2412 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2413 ContextIdsToMove.
end());
2414 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
2415 assert(Edge->ContextIds == ContextIdsToMove);
2416 Edge->ContextIds.clear();
2418 Edge->Caller->eraseCalleeEdge(Edge.get());
2421 Edge->Callee = NewCallee;
2422 NewCallee->CallerEdges.push_back(Edge);
2427 NewCallee->AllocTypes |= Edge->AllocTypes;
2433 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
2434 if (ExistingEdgeToNewCallee) {
2437 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2438 ContextIdsToMove.
end());
2439 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
2442 auto NewEdge = std::make_shared<ContextEdge>(
2443 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
2444 Edge->Caller->CalleeEdges.push_back(NewEdge);
2445 NewCallee->CallerEdges.push_back(NewEdge);
2449 NewCallee->AllocTypes |= CallerEdgeAllocType;
2451 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
2456 NewCallee->ContextIds.insert(ContextIdsToMove.
begin(),
2457 ContextIdsToMove.
end());
2459 OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
2462 OldCallee->ContextIds.empty());
2466 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2471 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2472 OldCalleeEdge->AllocTypes =
2473 computeAllocType(OldCalleeEdge->getContextIds());
2480 if (
auto *NewCalleeEdge =
2481 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2482 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
2483 EdgeContextIdsToMove.
end());
2484 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2488 auto NewEdge = std::make_shared<ContextEdge>(
2489 OldCalleeEdge->Callee, NewCallee,
2490 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2491 NewCallee->CalleeEdges.push_back(NewEdge);
2492 NewEdge->Callee->CallerEdges.push_back(NewEdge);
2495 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
2496 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
2497 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2498 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2500 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2501 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2506template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2507void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2508 recursivelyRemoveNoneTypeCalleeEdges(
2514 removeNoneTypeCalleeEdges(
Node);
2516 for (
auto *Clone :
Node->Clones)
2517 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
2521 auto CallerEdges =
Node->CallerEdges;
2522 for (
auto &Edge : CallerEdges) {
2524 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2525 assert(!std::count(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
2529 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
2533template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2534void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2536 for (
auto &Entry : AllocationCallToContextNodeMap) {
2538 identifyClones(Entry.second, Visited, Entry.second->ContextIds);
2541 for (
auto &Entry : AllocationCallToContextNodeMap)
2542 recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited);
2555template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2556void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2560 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2568 if (!
Node->hasCall())
2583 auto CallerEdges =
Node->CallerEdges;
2584 for (
auto &Edge : CallerEdges) {
2586 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2591 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
2592 identifyClones(Edge->Caller, Visited, AllocContextIds);
2615 const unsigned AllocTypeCloningPriority[] = { 3, 4,
2618 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
2619 [&](
const std::shared_ptr<ContextEdge> &
A,
2620 const std::shared_ptr<ContextEdge> &
B) {
2623 if (A->ContextIds.empty())
2629 if (B->ContextIds.empty())
2632 if (A->AllocTypes == B->AllocTypes)
2635 return *A->ContextIds.begin() < *B->ContextIds.begin();
2636 return AllocTypeCloningPriority[A->AllocTypes] <
2637 AllocTypeCloningPriority[B->AllocTypes];
2647 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
2648 auto CallerEdge = *EI;
2657 auto CallerEdgeContextsForAlloc =
2659 if (CallerEdgeContextsForAlloc.empty()) {
2663 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
2667 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2668 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
2669 for (
auto &CalleeEdge :
Node->CalleeEdges)
2670 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2671 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
2686 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
2687 allocTypeToUse(
Node->AllocTypes) &&
2688 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2689 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
2696 ContextNode *Clone =
nullptr;
2697 for (
auto *CurClone :
Node->Clones) {
2698 if (allocTypeToUse(CurClone->AllocTypes) !=
2699 allocTypeToUse(CallerAllocTypeForAlloc))
2702 if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2703 CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2711 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
2712 CallerEdgeContextsForAlloc);
2715 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
2730 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2733void ModuleCallsiteContextGraph::updateAllocationCall(
2737 "memprof", AllocTypeString);
2738 cast<CallBase>(
Call.call())->addFnAttr(
A);
2739 OREGetter(
Call.call()->getFunction())
2741 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
2743 <<
" marked with memprof allocation attribute "
2744 <<
ore::NV(
"Attribute", AllocTypeString));
2747void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
2751 assert(AI->Versions.size() >
Call.cloneNo());
2755void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2756 FuncInfo CalleeFunc) {
2757 if (CalleeFunc.cloneNo() > 0)
2758 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2759 OREGetter(CallerCall.call()->getFunction())
2761 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
2762 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
2763 <<
" assigned to call function clone "
2764 <<
ore::NV(
"Callee", CalleeFunc.func()));
2767void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2768 FuncInfo CalleeFunc) {
2769 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
2771 "Caller cannot be an allocation which should not have profiled calls");
2772 assert(CI->Clones.size() > CallerCall.cloneNo());
2773 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2776CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
2778ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2779 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2780 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2786 NewFunc->setName(
Name);
2787 for (
auto &Inst : CallsWithMetadataInFunc) {
2789 assert(Inst.cloneNo() == 0);
2790 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2792 OREGetter(
Func.func())
2794 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
2795 return {NewFunc, CloneNo};
2799 IndexCall>::FuncInfo
2800IndexCallsiteContextGraph::cloneFunctionForCallsite(
2801 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2802 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2817 for (
auto &Inst : CallsWithMetadataInFunc) {
2819 assert(Inst.cloneNo() == 0);
2820 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
2821 assert(AI->Versions.size() == CloneNo);
2824 AI->Versions.push_back(0);
2827 assert(CI && CI->Clones.size() == CloneNo);
2830 CI->Clones.push_back(0);
2832 CallMap[Inst] = {Inst.call(), CloneNo};
2834 return {
Func.func(), CloneNo};
2868template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2869bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
2870 bool Changed =
false;
2878 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
2879 const FuncInfo &CalleeFunc) {
2881 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
2886 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2887 FuncInfo OrigFunc(Func);
2891 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
2892 for (
auto &Call : CallsWithMetadata) {
2893 ContextNode *
Node = getNodeForInst(Call);
2900 "Not having a call should have prevented cloning");
2904 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
2908 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
2910 ContextNode *CallsiteClone,
2913 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
2915 assert(FuncClonesToCallMap.count(FuncClone));
2916 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
2918 if (CallMap.count(Call))
2919 CallClone = CallMap[
Call];
2920 CallsiteClone->setCall(CallClone);
2926 std::deque<ContextNode *> ClonesWorklist;
2928 if (!
Node->ContextIds.empty())
2929 ClonesWorklist.push_back(
Node);
2930 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
2931 Node->Clones.end());
2936 unsigned NodeCloneCount = 0;
2937 while (!ClonesWorklist.empty()) {
2938 ContextNode *Clone = ClonesWorklist.front();
2939 ClonesWorklist.pop_front();
2942 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2948 if (FuncClonesToCallMap.size() < NodeCloneCount) {
2950 if (NodeCloneCount == 1) {
2955 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
2956 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2960 FuncClonesToCallMap[OrigFunc] = {};
2961 AssignCallsiteCloneToFuncClone(
2962 OrigFunc, Call, Clone,
2963 AllocationCallToContextNodeMap.count(Call));
2964 for (
auto &CE : Clone->CallerEdges) {
2966 if (!
CE->Caller->hasCall())
2968 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
2978 FuncInfo PreviousAssignedFuncClone;
2980 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
2981 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2983 bool CallerAssignedToCloneOfFunc =
false;
2984 if (EI != Clone->CallerEdges.end()) {
2985 const std::shared_ptr<ContextEdge> &Edge = *EI;
2986 PreviousAssignedFuncClone =
2987 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2988 CallerAssignedToCloneOfFunc =
true;
2993 std::map<CallInfo, CallInfo> NewCallMap;
2994 unsigned CloneNo = FuncClonesToCallMap.size();
2995 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
2996 "should already exist in the map");
2997 FuncInfo NewFuncClone = cloneFunctionForCallsite(
2998 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
2999 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3000 FunctionClonesAnalysis++;
3006 if (!CallerAssignedToCloneOfFunc) {
3007 AssignCallsiteCloneToFuncClone(
3008 NewFuncClone, Call, Clone,
3009 AllocationCallToContextNodeMap.count(Call));
3010 for (
auto &CE : Clone->CallerEdges) {
3012 if (!
CE->Caller->hasCall())
3014 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3023 for (
auto CE : Clone->CallerEdges) {
3028 if (!
CE->Caller->hasCall())
3031 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3035 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3036 PreviousAssignedFuncClone)
3039 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3049 for (
auto CalleeEdge :
CE->Caller->CalleeEdges) {
3054 ContextNode *
Callee = CalleeEdge->Callee;
3058 if (Callee == Clone || !
Callee->hasCall())
3060 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3061 removeNoneTypeCalleeEdges(NewClone);
3064 removeNoneTypeCalleeEdges(Callee);
3069 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3070 RecordCalleeFuncOfCallsite(
3071 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3080 OrigCall.setCloneNo(0);
3081 std::map<CallInfo, CallInfo> &CallMap =
3082 FuncClonesToCallMap[NewFuncClone];
3083 assert(CallMap.count(OrigCall));
3084 CallInfo NewCall(CallMap[OrigCall]);
3086 NewClone->setCall(NewCall);
3108 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3109 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3112 for (
auto EI = Clone->CallerEdges.begin();
3113 EI != Clone->CallerEdges.end();) {
3116 if (!Edge->Caller->hasCall()) {
3122 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3123 FuncInfo FuncCloneCalledByCaller =
3124 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3134 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3135 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3143 (FuncCloneAssignedToCurCallsiteClone &&
3144 FuncCloneAssignedToCurCallsiteClone !=
3145 FuncCloneCalledByCaller)) {
3160 if (FuncCloneToNewCallsiteCloneMap.count(
3161 FuncCloneCalledByCaller)) {
3162 ContextNode *NewClone =
3163 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
3164 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
3166 removeNoneTypeCalleeEdges(NewClone);
3169 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
3170 removeNoneTypeCalleeEdges(NewClone);
3171 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
3174 ClonesWorklist.push_back(NewClone);
3175 assert(EI == Clone->CallerEdges.end() ||
3181 removeNoneTypeCalleeEdges(Clone);
3190 if (!FuncCloneAssignedToCurCallsiteClone) {
3191 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
3193 AssignCallsiteCloneToFuncClone(
3194 FuncCloneCalledByCaller, Call, Clone,
3195 AllocationCallToContextNodeMap.count(Call));
3199 assert(FuncCloneAssignedToCurCallsiteClone ==
3200 FuncCloneCalledByCaller);
3209 if (!FuncCloneAssignedToCurCallsiteClone) {
3214 for (
auto &CF : FuncClonesToCallMap) {
3215 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
3216 FuncCloneAssignedToCurCallsiteClone = CF.first;
3220 assert(FuncCloneAssignedToCurCallsiteClone);
3222 AssignCallsiteCloneToFuncClone(
3223 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
3224 AllocationCallToContextNodeMap.count(Call));
3226 assert(FuncCloneToCurNodeCloneMap
3227 [FuncCloneAssignedToCurCallsiteClone] == Clone);
3229 RecordCalleeFuncOfCallsite(Edge->Caller,
3230 FuncCloneAssignedToCurCallsiteClone);
3237 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
3238 for (
const auto &PE :
Node->CalleeEdges)
3239 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3240 for (
const auto &CE :
Node->CallerEdges)
3241 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3242 for (
auto *Clone :
Node->Clones) {
3243 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3244 for (
const auto &PE : Clone->CalleeEdges)
3245 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3246 for (
const auto &CE : Clone->CallerEdges)
3247 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3253 auto UpdateCalls = [&](ContextNode *
Node,
3255 auto &&UpdateCalls) {
3260 for (
auto *Clone :
Node->Clones)
3261 UpdateCalls(Clone, Visited, UpdateCalls);
3263 for (
auto &Edge :
Node->CallerEdges)
3264 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
3268 if (!
Node->hasCall() ||
Node->ContextIds.empty())
3271 if (
Node->IsAllocation) {
3272 updateAllocationCall(
Node->Call, allocTypeToUse(
Node->AllocTypes));
3276 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
3279 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
3280 updateCall(
Node->Call, CalleeFunc);
3289 for (
auto &Entry : AllocationCallToContextNodeMap)
3290 UpdateCalls(Entry.second, Visited, UpdateCalls);
3304 FunctionsClonedThinBackend++;
3305 for (
unsigned I = 1;
I < NumClones;
I++) {
3306 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
3308 FunctionClonesThinBackend++;
3311 for (
auto &BB : *NewF) {
3312 for (
auto &Inst : BB) {
3313 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
3314 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
3318 auto *PrevF = M.getFunction(
Name);
3322 assert(PrevF->isDeclaration());
3323 NewF->takeName(PrevF);
3324 PrevF->replaceAllUsesWith(NewF);
3325 PrevF->eraseFromParent();
3327 NewF->setName(
Name);
3329 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
3332 if (!FuncToAliasMap.count(&
F))
3334 for (
auto *
A : FuncToAliasMap[&
F]) {
3336 auto *PrevA = M.getNamedAlias(
Name);
3338 A->getType()->getPointerAddressSpace(),
3339 A->getLinkage(),
Name, NewF);
3340 NewA->copyAttributesFrom(
A);
3344 assert(PrevA->isDeclaration());
3345 NewA->takeName(PrevA);
3346 PrevA->replaceAllUsesWith(NewA);
3347 PrevA->eraseFromParent();
3389bool MemProfContextDisambiguation::applyImport(
Module &M) {
3391 bool Changed =
false;
3393 auto IsMemProfClone = [](
const Function &
F) {
3400 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
3402 for (
auto &
A :
M.aliases()) {
3403 auto *Aliasee =
A.getAliaseeObject();
3404 if (
auto *
F = dyn_cast<Function>(Aliasee))
3405 FuncToAliasMap[
F].insert(&
A);
3409 if (
F.isDeclaration() || IsMemProfClone(
F))
3415 bool ClonesCreated =
false;
3416 unsigned NumClonesCreated = 0;
3417 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
3427 if (ClonesCreated) {
3428 assert(NumClonesCreated == NumClones);
3435 ClonesCreated =
true;
3436 NumClonesCreated = NumClones;
3446 assert(!IsMemProfClone(*CalledFunction));
3451 auto CalleeOrigName = CalledFunction->getName();
3452 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
3455 if (!StackNode.
Clones[J])
3457 auto NewF =
M.getOrInsertFunction(
3459 CalledFunction->getFunctionType());
3465 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3468 <<
ore::NV(
"Call", CBClone) <<
" in clone "
3470 <<
" assigned to call function clone "
3471 <<
ore::NV(
"Callee", NewF.getCallee()));
3489 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
3491 "enable-import-metadata is needed to emit thinlto_src_module");
3493 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
3495 if (GVS->modulePath() == SrcModule) {
3496 GVSummary = GVS.get();
3500 assert(GVSummary && GVSummary->modulePath() == SrcModule);
3505 if (isa<AliasSummary>(GVSummary))
3508 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
3510 if (
FS->allocs().empty() &&
FS->callsites().empty())
3513 auto SI =
FS->callsites().begin();
3514 auto AI =
FS->allocs().begin();
3522 for (
auto CallsiteIt =
FS->callsites().rbegin();
3523 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
3524 auto &Callsite = *CallsiteIt;
3528 if (!Callsite.StackIdIndices.empty())
3530 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
3536 for (
auto &BB :
F) {
3537 for (
auto &
I : BB) {
3538 auto *CB = dyn_cast<CallBase>(&
I);
3543 auto *CalledValue = CB->getCalledOperand();
3544 auto *CalledFunction = CB->getCalledFunction();
3545 if (CalledValue && !CalledFunction) {
3546 CalledValue = CalledValue->stripPointerCasts();
3548 CalledFunction = dyn_cast<Function>(CalledValue);
3552 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
3553 assert(!CalledFunction &&
3554 "Expected null called function in callsite for alias");
3555 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
3559 I.getMetadata(LLVMContext::MD_callsite));
3560 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
3564 if (CB->getAttributes().hasFnAttr(
"memprof")) {
3566 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3567 ? AllocTypeColdThinBackend++
3568 : AllocTypeNotColdThinBackend++;
3569 OrigAllocsThinBackend++;
3570 AllocVersionsThinBackend++;
3571 if (!MaxAllocVersionsThinBackend)
3572 MaxAllocVersionsThinBackend = 1;
3575 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3582 auto &AllocNode = *(AI++);
3586 auto MIBIter = AllocNode.MIBs.begin();
3587 for (
auto &MDOp : MemProfMD->operands()) {
3588 assert(MIBIter != AllocNode.MIBs.end());
3590 MIBIter->StackIdIndices.begin();
3591 auto *MIBMD = cast<const MDNode>(MDOp);
3595 auto ContextIterBegin =
3599 (ContextIterBegin != StackContext.
end() &&
3600 *ContextIterBegin == 0)
3603 for (
auto ContextIter = ContextIterBegin;
3604 ContextIter != StackContext.
end(); ++ContextIter) {
3608 if (LastStackContextId == *ContextIter)
3610 LastStackContextId = *ContextIter;
3611 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3620 CloneFuncIfNeeded(AllocNode.Versions.size());
3622 OrigAllocsThinBackend++;
3623 AllocVersionsThinBackend += AllocNode.Versions.size();
3624 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3625 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3632 if (AllocNode.Versions.size() == 1) {
3637 UnclonableAllocsThinBackend++;
3643 return Type == ((uint8_t)AllocationType::NotCold |
3644 (uint8_t)AllocationType::Cold);
3648 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3654 : AllocTypeNotColdThinBackend++;
3666 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3669 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
3671 <<
" marked with memprof allocation attribute "
3672 <<
ore::NV(
"Attribute", AllocTypeString));
3674 }
else if (!CallsiteContext.empty()) {
3676 assert(SI !=
FS->callsites().end());
3677 auto &StackNode = *(
SI++);
3683 for (
auto StackId : CallsiteContext) {
3691 CloneCallsite(StackNode, CB, CalledFunction);
3692 }
else if (CB->isTailCall()) {
3697 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
3698 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
3699 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
3700 CloneCallsite(Callsite->second, CB, CalledFunction);
3704 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
3705 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3713template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3714bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3716 dbgs() <<
"CCG before cloning:\n";
3720 exportToDot(
"postbuild");
3733 dbgs() <<
"CCG after cloning:\n";
3737 exportToDot(
"cloned");
3739 bool Changed = assignFunctions();
3742 dbgs() <<
"CCG after assigning function clones:\n";
3746 exportToDot(
"clonefuncassign");
3751bool MemProfContextDisambiguation::processModule(
3758 return applyImport(M);
3771 ModuleCallsiteContextGraph CCG(M, OREGetter);
3772 return CCG.process();
3777 : ImportSummary(Summary) {
3778 if (ImportSummary) {
3788 auto ReadSummaryFile =
3790 if (!ReadSummaryFile) {
3797 if (!ImportSummaryForTestingOrErr) {
3803 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
3804 ImportSummary = ImportSummaryForTesting.get();
3813 if (!processModule(M, OREGetter))
3830 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void swap(DenseSetImpl &RHS)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
StringRef AttributeString(unsigned Attribute)
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const