52#define DEBUG_TYPE "memprof-context-disambiguation"
55 "Number of function clones created during whole program analysis");
57 "Number of function clones created during ThinLTO backend");
59 "Number of functions that had clones created during ThinLTO backend");
60STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
61 "cloned) during whole program analysis");
62STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
63 "during whole program analysis");
65 "Number of not cold static allocations (possibly cloned) during "
67STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
68 "(possibly cloned) during ThinLTO backend");
70 "Number of original (not cloned) allocations with memprof profiles "
71 "during ThinLTO backend");
73 AllocVersionsThinBackend,
74 "Number of allocation versions (including clones) during ThinLTO backend");
76 "Maximum number of allocation versions created for an original "
77 "allocation during ThinLTO backend");
79 "Number of unclonable ambigous allocations during ThinLTO backend");
84 cl::desc(
"Specify the path prefix of the MemProf dot files."));
88 cl::desc(
"Export graph to dot files."));
92 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
96 cl::desc(
"Perform verification checks on CallingContextGraph."));
100 cl::desc(
"Perform frequent verification checks on nodes."));
103 "memprof-import-summary",
104 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
112 cl::desc(
"Linking with hot/cold operator new interfaces"));
130template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
131class CallsiteContextGraph {
133 CallsiteContextGraph() =
default;
134 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
135 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
142 void identifyClones();
149 bool assignFunctions();
155 const CallsiteContextGraph &CCG) {
161 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
163 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
165 void exportToDot(std::string Label)
const;
168 struct FuncInfo final
169 :
public std::pair<FuncTy *, unsigned > {
170 using Base = std::pair<FuncTy *, unsigned>;
171 FuncInfo(
const Base &
B) :
Base(
B) {}
172 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
173 explicit operator bool()
const {
return this->first !=
nullptr; }
174 FuncTy *func()
const {
return this->first; }
175 unsigned cloneNo()
const {
return this->second; }
179 struct CallInfo final :
public std::pair<CallTy, unsigned > {
180 using Base = std::pair<CallTy, unsigned>;
182 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
184 explicit operator bool()
const {
return (
bool)this->first; }
185 CallTy call()
const {
return this->first; }
186 unsigned cloneNo()
const {
return this->second; }
187 void setCloneNo(
unsigned N) { this->second =
N; }
189 if (!
operator bool()) {
195 OS <<
"\t(clone " << cloneNo() <<
")";
218 bool Recursive =
false;
234 uint8_t AllocTypes = 0;
238 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
242 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
248 std::vector<ContextNode *> Clones;
251 ContextNode *CloneOf =
nullptr;
253 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
255 ContextNode(
bool IsAllocation,
CallInfo C)
256 : IsAllocation(IsAllocation),
Call(
C) {}
258 void addClone(ContextNode *Clone) {
260 CloneOf->Clones.push_back(Clone);
261 Clone->CloneOf = CloneOf;
263 Clones.push_back(Clone);
265 Clone->CloneOf =
this;
269 ContextNode *getOrigNode() {
276 unsigned int ContextId);
278 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
279 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
280 void eraseCalleeEdge(
const ContextEdge *Edge);
281 void eraseCallerEdge(
const ContextEdge *Edge);
285 bool hasCall()
const {
return (
bool)
Call.call(); }
291 bool isRemoved()
const {
293 (CalleeEdges.empty() && CallerEdges.empty()));
294 return ContextIds.
empty();
314 uint8_t AllocTypes = 0;
319 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
322 ContextIds(ContextIds) {}
337 void removeNoneTypeCalleeEdges(ContextNode *
Node);
342 template <
class NodeT,
class IteratorT>
343 std::vector<uint64_t>
348 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
351 template <
class NodeT,
class IteratorT>
352 void addStackNodesForMIB(ContextNode *AllocNode,
360 void updateStackNodes();
364 void handleCallsitesWithMultipleTargets();
368 std::vector<std::pair<FuncTy *, std::vector<CallInfo>>>
369 FuncToCallsWithMetadata;
372 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
375 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
377 using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
386 void assignStackNodesPostOrder(
398 void propagateDuplicateContextIds(
404 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
411 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
415 bool calleeMatchesFunc(CallTy Call,
const FuncTy *Func) {
416 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(Call, Func);
421 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
422 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
427 uint64_t getLastStackId(CallTy Call) {
428 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
433 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
434 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
439 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
440 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
446 FuncInfo cloneFunctionForCallsite(
447 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
448 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
449 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
450 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
455 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
456 unsigned CloneNo)
const {
457 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
461 ContextNode *getNodeForInst(
const CallInfo &
C);
462 ContextNode *getNodeForAlloc(
const CallInfo &
C);
463 ContextNode *getNodeForStackId(
uint64_t StackId);
466 void unsetNodeForInst(
const CallInfo &
C);
488 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
489 EdgeIter *CallerEdgeI =
nullptr);
495 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
496 ContextNode *NewCallee,
497 EdgeIter *CallerEdgeI =
nullptr,
498 bool NewClone =
false);
503 void identifyClones(ContextNode *
Node,
507 std::map<uint32_t, AllocationType> ContextIdToAllocationType;
513 std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
520 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
526 unsigned int LastContextId = 0;
529template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
531 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
532template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
534 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
535template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
537 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
538template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
540 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
543class ModuleCallsiteContextGraph
544 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
547 ModuleCallsiteContextGraph(
552 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
558 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
560 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
561 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
563 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
564 std::map<CallInfo, CallInfo> &CallMap,
565 std::vector<CallInfo> &CallsWithMetadataInFunc,
568 unsigned CloneNo)
const;
577struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
579 IndexCall(std::nullptr_t) : IndexCall() {}
584 IndexCall *operator->() {
return this; }
589 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
592 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
600class IndexCallsiteContextGraph
601 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
604 IndexCallsiteContextGraph(
610 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
615 uint64_t getLastStackId(IndexCall &Call);
616 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
618 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
621 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
622 std::map<CallInfo, CallInfo> &CallMap,
623 std::vector<CallInfo> &CallsWithMetadataInFunc,
625 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
626 unsigned CloneNo)
const;
630 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
644 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
647 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
652struct FieldSeparator {
656 FieldSeparator(
const char *Sep =
", ") : Sep(Sep) {}
673 assert(AllocTypes != (uint8_t)AllocationType::None);
675 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
676 return AllocationType::NotCold;
684template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
686 const std::vector<uint8_t> &InAllocTypes,
687 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
690 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
692 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
696 if (l == (uint8_t)AllocationType::None ||
697 r->AllocTypes == (uint8_t)AllocationType::None)
699 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
705template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
706typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
707CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
709 ContextNode *
Node = getNodeForAlloc(
C);
713 return NonAllocationCallToContextNodeMap.lookup(
C);
716template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
717typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
718CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
720 return AllocationCallToContextNodeMap.lookup(
C);
723template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
724typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
725CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
727 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
728 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
729 return StackEntryNode->second;
733template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
734void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst(
736 AllocationCallToContextNodeMap.erase(
C) ||
737 NonAllocationCallToContextNodeMap.erase(
C);
738 assert(!AllocationCallToContextNodeMap.count(
C) &&
739 !NonAllocationCallToContextNodeMap.count(
C));
742template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
743void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
745 unsigned int ContextId) {
746 for (
auto &Edge : CallerEdges) {
747 if (Edge->Caller == Caller) {
749 Edge->getContextIds().insert(ContextId);
753 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
755 CallerEdges.push_back(Edge);
756 Caller->CalleeEdges.push_back(Edge);
759template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
760void CallsiteContextGraph<
761 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
762 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
764 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
765 assert(Edge->ContextIds.empty());
766 Edge->Callee->eraseCallerEdge(Edge.get());
767 EI =
Node->CalleeEdges.erase(EI);
773template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
774typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
775CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
776 findEdgeFromCallee(
const ContextNode *Callee) {
777 for (
const auto &Edge : CalleeEdges)
778 if (Edge->Callee == Callee)
783template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
784typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
785CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
786 findEdgeFromCaller(
const ContextNode *Caller) {
787 for (
const auto &Edge : CallerEdges)
788 if (Edge->Caller == Caller)
793template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
794void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
795 eraseCalleeEdge(
const ContextEdge *Edge) {
797 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
798 return CalleeEdge.get() == Edge;
800 assert(EI != CalleeEdges.end());
801 CalleeEdges.erase(EI);
804template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
805void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
806 eraseCallerEdge(
const ContextEdge *Edge) {
808 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
809 return CallerEdge.get() == Edge;
811 assert(EI != CallerEdges.end());
812 CallerEdges.erase(EI);
815template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
816uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
819 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
820 uint8_t
AllocType = (uint8_t)AllocationType::None;
821 for (
auto Id : ContextIds) {
822 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
830template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
832CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
835 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
836 uint8_t
AllocType = (uint8_t)AllocationType::None;
837 for (
auto Id : Node1Ids) {
838 if (!Node2Ids.
count(Id))
840 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
848template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
849uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
851 if (Node1Ids.
size() < Node2Ids.
size())
852 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
854 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
857template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
858typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
859CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
861 assert(!getNodeForAlloc(Call));
863 std::make_unique<ContextNode>(
true, Call));
864 ContextNode *AllocNode = NodeOwner.back().get();
865 AllocationCallToContextNodeMap[
Call] = AllocNode;
866 NodeToCallingFunc[AllocNode] =
F;
868 AllocNode->OrigStackOrAllocId = LastContextId;
871 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
876template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
877template <
class NodeT,
class IteratorT>
878void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
886 ContextIdToAllocationType[++LastContextId] =
AllocType;
889 AllocNode->AllocTypes |= (uint8_t)
AllocType;
890 AllocNode->ContextIds.insert(LastContextId);
895 ContextNode *PrevNode = AllocNode;
902 ContextIter != StackContext.
end(); ++ContextIter) {
903 auto StackId = getStackId(*ContextIter);
904 ContextNode *StackNode = getNodeForStackId(StackId);
907 std::make_unique<ContextNode>(
false));
908 StackNode = NodeOwner.back().get();
909 StackEntryIdToContextNodeMap[StackId] = StackNode;
910 StackNode->OrigStackOrAllocId = StackId;
914 StackNode->Recursive =
true;
915 StackNode->ContextIds.insert(LastContextId);
916 StackNode->AllocTypes |= (uint8_t)
AllocType;
917 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
918 PrevNode = StackNode;
922template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
924CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
928 for (
auto OldId : StackSequenceContextIds) {
929 NewContextIds.
insert(++LastContextId);
930 OldToNewContextIds[OldId].insert(LastContextId);
931 assert(ContextIdToAllocationType.count(OldId));
933 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
935 return NewContextIds;
938template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
939void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
940 propagateDuplicateContextIds(
945 for (
auto Id : ContextIds)
946 if (
auto NewId = OldToNewContextIds.find(Id);
947 NewId != OldToNewContextIds.end())
948 NewIds.
insert(NewId->second.begin(), NewId->second.end());
953 auto UpdateCallers = [&](ContextNode *
Node,
955 auto &&UpdateCallers) ->
void {
956 for (
const auto &Edge :
Node->CallerEdges) {
957 auto Inserted = Visited.insert(Edge.get());
960 ContextNode *NextNode = Edge->Caller;
964 if (!NewIdsToAdd.
empty()) {
965 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
966 NextNode->ContextIds.insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
967 UpdateCallers(NextNode, Visited, UpdateCallers);
973 for (
auto &Entry : AllocationCallToContextNodeMap) {
974 auto *
Node = Entry.second;
979 Node->ContextIds.insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
980 UpdateCallers(
Node, Visited, UpdateCallers);
984template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
985void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
986 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee) {
991 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
993 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
999 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1000 NotFoundContextIds);
1001 RemainingContextIds.
swap(NotFoundContextIds);
1003 if (NewEdgeContextIds.
empty()) {
1007 if (TowardsCallee) {
1008 auto NewEdge = std::make_shared<ContextEdge>(
1009 Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds),
1011 NewNode->CalleeEdges.push_back(NewEdge);
1012 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1014 auto NewEdge = std::make_shared<ContextEdge>(
1015 NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds),
1017 NewNode->CallerEdges.push_back(NewEdge);
1018 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1021 if (Edge->getContextIds().empty()) {
1022 if (TowardsCallee) {
1023 Edge->Callee->eraseCallerEdge(Edge.get());
1024 EI = OrigNode->CalleeEdges.erase(EI);
1026 Edge->Caller->eraseCalleeEdge(Edge.get());
1027 EI = OrigNode->CallerEdges.erase(EI);
1035template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1036void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1037 assignStackNodesPostOrder(ContextNode *
Node,
1040 &StackIdToMatchingCalls) {
1048 auto CallerEdges =
Node->CallerEdges;
1049 for (
auto &Edge : CallerEdges) {
1053 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1062 if (
Node->IsAllocation ||
1063 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1066 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1070 if (Calls.size() == 1) {
1071 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1072 if (Ids.size() == 1) {
1073 assert(SavedContextIds.empty());
1076 if (
Node->Recursive)
1078 Node->setCall(Call);
1079 NonAllocationCallToContextNodeMap[
Call] =
Node;
1088 ContextNode *LastNode = getNodeForStackId(LastId);
1092 for (
unsigned I = 0;
I < Calls.size();
I++) {
1093 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1096 if (SavedContextIds.empty())
1099 assert(LastId == Ids.back());
1101 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1111 ContextNode *PrevNode =
nullptr;
1112 for (
auto Id : Ids) {
1113 ContextNode *CurNode = getNodeForStackId(Id);
1117 assert(!CurNode->Recursive);
1122 auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1124 SavedContextIds.clear();
1131 if (SavedContextIds.empty())
1134 if (SavedContextIds.empty())
1138 NodeOwner.push_back(
1139 std::make_unique<ContextNode>(
false, Call));
1140 ContextNode *NewNode = NodeOwner.back().get();
1141 NodeToCallingFunc[NewNode] =
Func;
1142 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1143 NewNode->ContextIds = SavedContextIds;
1144 NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
1149 connectNewNode(NewNode, FirstNode,
true);
1154 connectNewNode(NewNode, LastNode,
false);
1159 for (
auto Id : Ids) {
1160 ContextNode *CurNode = getNodeForStackId(Id);
1166 set_subtract(CurNode->ContextIds, NewNode->ContextIds);
1168 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1170 set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
1171 if (PrevEdge->getContextIds().empty()) {
1172 PrevNode->eraseCallerEdge(PrevEdge);
1173 CurNode->eraseCalleeEdge(PrevEdge);
1181template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1182void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1191 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1192 for (
auto &Call : CallsWithMetadata) {
1194 if (AllocationCallToContextNodeMap.count(Call))
1196 auto StackIdsWithContextNodes =
1197 getStackIdsWithContextNodesForCall(
Call.call());
1200 if (StackIdsWithContextNodes.empty())
1204 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1205 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1216 for (
auto &It : StackIdToMatchingCalls) {
1217 auto &Calls = It.getSecond();
1219 if (Calls.size() == 1) {
1220 auto &Ids = std::get<1>(Calls[0]);
1221 if (Ids.size() == 1)
1230 std::stable_sort(Calls.begin(), Calls.end(),
1231 [](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1232 auto &IdsA = std::get<1>(A);
1233 auto &IdsB = std::get<1>(B);
1234 return IdsA.size() > IdsB.size() ||
1235 (IdsA.size() == IdsB.size() && IdsA < IdsB);
1242 ContextNode *LastNode = getNodeForStackId(LastId);
1246 if (LastNode->Recursive)
1254 for (
unsigned I = 0;
I < Calls.size();
I++) {
1255 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1256 assert(SavedContextIds.empty());
1257 assert(LastId == Ids.back());
1265 ContextNode *PrevNode = LastNode;
1266 ContextNode *CurNode = LastNode;
1271 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1273 CurNode = getNodeForStackId(Id);
1277 if (CurNode->Recursive) {
1282 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1300 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1303 if (StackSequenceContextIds.
empty()) {
1316 if (Ids.back() != getLastStackId(Call)) {
1317 for (
const auto &PE : CurNode->CallerEdges) {
1318 set_subtract(StackSequenceContextIds, PE->getContextIds());
1319 if (StackSequenceContextIds.
empty())
1323 if (StackSequenceContextIds.
empty())
1329 bool DuplicateContextIds =
false;
1330 if (
I + 1 < Calls.size()) {
1331 auto NextIds = std::get<1>(Calls[
I + 1]);
1332 DuplicateContextIds = Ids == NextIds;
1341 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1342 StackSequenceContextIds.
size());
1345 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1346 : StackSequenceContextIds;
1347 assert(!SavedContextIds.empty());
1349 if (!DuplicateContextIds) {
1353 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1354 if (LastNodeContextIds.
empty())
1361 propagateDuplicateContextIds(OldToNewContextIds);
1372 for (
auto &Entry : AllocationCallToContextNodeMap)
1373 assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
1378 Call->getMetadata(LLVMContext::MD_callsite));
1379 return CallsiteContext.
back();
1382uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1385 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1387 return Index.getStackIdAtIndex(CallsiteContext.
back());
1400std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1402 unsigned CloneNo)
const {
1403 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1404 cast<CallBase>(Call)->getCalledFunction()->getName())
1408std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1409 const IndexCall &Call,
1410 unsigned CloneNo)
const {
1411 auto VI = FSToVIMap.find(Func);
1412 assert(VI != FSToVIMap.end());
1413 if (isa<AllocInfo *>(
Call.getBase()))
1414 return (
VI->second.name() +
" -> alloc").str();
1416 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1417 return (
VI->second.name() +
" -> " +
1419 Callsite->Clones[CloneNo]))
1424std::vector<uint64_t>
1425ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1428 Call->getMetadata(LLVMContext::MD_callsite));
1429 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1433std::vector<uint64_t>
1434IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1437 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1443template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1444template <
class NodeT,
class IteratorT>
1445std::vector<uint64_t>
1446CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1448 std::vector<uint64_t> StackIds;
1449 for (
auto IdOrIndex : CallsiteContext) {
1450 auto StackId = getStackId(IdOrIndex);
1451 ContextNode *
Node = getNodeForStackId(StackId);
1454 StackIds.push_back(StackId);
1459ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1461 :
Mod(
M), OREGetter(OREGetter) {
1463 std::vector<CallInfo> CallsWithMetadata;
1464 for (
auto &BB :
F) {
1465 for (
auto &
I : BB) {
1466 if (!isa<CallBase>(
I))
1468 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1469 CallsWithMetadata.push_back(&
I);
1470 auto *AllocNode = addAllocNode(&
I, &
F);
1471 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1475 for (
auto &MDOp : MemProfMD->operands()) {
1476 auto *MIBMD = cast<const MDNode>(MDOp);
1480 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1481 AllocNode, StackContext, CallsiteContext,
1484 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1487 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
1488 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
1491 else if (
I.getMetadata(LLVMContext::MD_callsite))
1492 CallsWithMetadata.push_back(&
I);
1495 if (!CallsWithMetadata.empty())
1496 FuncToCallsWithMetadata.push_back({&
F, CallsWithMetadata});
1500 dbgs() <<
"CCG before updating call stack chains:\n";
1505 exportToDot(
"prestackupdate");
1509 handleCallsitesWithMultipleTargets();
1512 for (
auto &FuncEntry : FuncToCallsWithMetadata)
1513 for (
auto &Call : FuncEntry.second)
1514 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
1517IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1524 for (
auto &S :
VI.getSummaryList()) {
1533 !isPrevailing(
VI.getGUID(), S.get()))
1535 auto *
FS = dyn_cast<FunctionSummary>(S.get());
1538 std::vector<CallInfo> CallsWithMetadata;
1539 if (!
FS->allocs().empty()) {
1540 for (
auto &AN :
FS->mutableAllocs()) {
1545 if (AN.MIBs.empty())
1547 CallsWithMetadata.push_back({&AN});
1548 auto *AllocNode = addAllocNode({&AN},
FS);
1555 for (
auto &MIB : AN.MIBs) {
1558 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1559 AllocNode, StackContext, EmptyContext, MIB.AllocType);
1561 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1566 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1570 if (!
FS->callsites().empty())
1571 for (
auto &SN :
FS->mutableCallsites())
1572 CallsWithMetadata.push_back({&SN});
1574 if (!CallsWithMetadata.empty())
1575 FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata});
1577 if (!
FS->allocs().empty() || !
FS->callsites().empty())
1583 dbgs() <<
"CCG before updating call stack chains:\n";
1588 exportToDot(
"prestackupdate");
1592 handleCallsitesWithMultipleTargets();
1595template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1596void CallsiteContextGraph<DerivedCCG, FuncTy,
1597 CallTy>::handleCallsitesWithMultipleTargets() {
1607 for (
auto Entry = NonAllocationCallToContextNodeMap.begin();
1608 Entry != NonAllocationCallToContextNodeMap.end();) {
1609 auto *
Node = Entry->second;
1612 bool Removed =
false;
1614 for (
auto &Edge :
Node->CalleeEdges) {
1615 if (!Edge->Callee->hasCall())
1617 assert(NodeToCallingFunc.count(Edge->Callee));
1619 if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee]))
1624 Entry = NonAllocationCallToContextNodeMap.erase(Entry);
1642 return Index.getStackIdAtIndex(IdOrIndex);
1645bool ModuleCallsiteContextGraph::calleeMatchesFunc(
Instruction *Call,
1647 auto *CB = dyn_cast<CallBase>(Call);
1648 if (!CB->getCalledOperand())
1650 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
1651 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
1652 if (CalleeFunc == Func)
1654 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
1655 return Alias && Alias->getAliasee() ==
Func;
1658bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
1661 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
1665 Callee.getSummaryList().empty()
1667 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
1668 assert(FSToVIMap.count(Func));
1680 if (AllocTypes & (uint8_t)AllocationType::NotCold)
1682 if (AllocTypes & (uint8_t)AllocationType::Cold)
1687template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1688void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
1694template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1695void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
1697 OS <<
"Node " <<
this <<
"\n";
1701 OS <<
" (recursive)";
1704 OS <<
"\tContextIds:";
1705 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1706 std::sort(SortedIds.begin(), SortedIds.end());
1707 for (
auto Id : SortedIds)
1710 OS <<
"\tCalleeEdges:\n";
1711 for (
auto &Edge : CalleeEdges)
1712 OS <<
"\t\t" << *Edge <<
"\n";
1713 OS <<
"\tCallerEdges:\n";
1714 for (
auto &Edge : CallerEdges)
1715 OS <<
"\t\t" << *Edge <<
"\n";
1716 if (!Clones.empty()) {
1719 for (
auto *Clone : Clones)
1722 }
else if (CloneOf) {
1723 OS <<
"\tClone of " << CloneOf <<
"\n";
1727template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1728void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
1734template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1735void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
1739 OS <<
" ContextIds:";
1740 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1741 std::sort(SortedIds.begin(), SortedIds.end());
1742 for (
auto Id : SortedIds)
1746template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1747void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
1751template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1752void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
1754 OS <<
"Callsite Context Graph:\n";
1755 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1756 for (
const auto Node : nodes<GraphType>(
this)) {
1757 if (
Node->isRemoved())
1764template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1766 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1769 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1770 assert(!Edge->ContextIds.empty());
1773template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1775 bool CheckEdges =
true) {
1776 if (
Node->isRemoved())
1780 if (
Node->CallerEdges.size()) {
1781 auto EI =
Node->CallerEdges.begin();
1782 auto &FirstEdge = *EI;
1785 for (; EI !=
Node->CallerEdges.end(); EI++) {
1786 const auto &Edge = *EI;
1788 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1789 set_union(CallerEdgeContextIds, Edge->ContextIds);
1793 assert(
Node->ContextIds == CallerEdgeContextIds ||
1796 if (
Node->CalleeEdges.size()) {
1797 auto EI =
Node->CalleeEdges.begin();
1798 auto &FirstEdge = *EI;
1801 for (; EI !=
Node->CalleeEdges.end(); EI++) {
1802 const auto &Edge = *EI;
1804 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1805 set_union(CalleeEdgeContextIds, Edge->ContextIds);
1807 assert(
Node->ContextIds == CalleeEdgeContextIds);
1811template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1812void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
1813 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1814 for (
const auto Node : nodes<GraphType>(
this)) {
1815 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
1816 for (
auto &Edge :
Node->CallerEdges)
1817 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1821template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1823 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1824 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
1826 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
1831 decltype(&getNode)>;
1842 return G->NodeOwner.begin()->get();
1845 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
1846 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
1854 decltype(&GetCallee)>;
1865template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1870 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1876 std::string LabelString =
1877 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
1878 Twine(Node->OrigStackOrAllocId))
1880 LabelString +=
"\n";
1881 if (Node->hasCall()) {
1882 auto Func =
G->NodeToCallingFunc.find(Node);
1883 assert(Func !=
G->NodeToCallingFunc.end());
1885 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
1887 LabelString +=
"null call";
1888 if (Node->Recursive)
1889 LabelString +=
" (recursive)";
1891 LabelString +=
" (external)";
1898 getContextIds(Node->ContextIds) +
"\"")
1901 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
1903 if (Node->CloneOf) {
1913 auto &Edge = *(ChildIter.getCurrent());
1914 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
1915 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
1922 return Node->isRemoved();
1927 std::string IdString =
"ContextIds:";
1928 if (ContextIds.
size() < 100) {
1929 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
1930 std::sort(SortedIds.begin(), SortedIds.end());
1931 for (
auto Id : SortedIds)
1932 IdString += (
" " +
Twine(Id)).str();
1934 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
1939 static std::string getColor(uint8_t AllocTypes) {
1948 return "mediumorchid1";
1952 static std::string getNodeId(NodeRef
Node) {
1953 std::stringstream SStream;
1954 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
1955 std::string
Result = SStream.str();
1960template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1961void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
1962 std::string Label)
const {
1967template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1968typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1969CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
1970 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
1971 ContextNode *
Node = Edge->Callee;
1972 NodeOwner.push_back(
1973 std::make_unique<ContextNode>(
Node->IsAllocation,
Node->Call));
1974 ContextNode *Clone = NodeOwner.back().get();
1975 Node->addClone(Clone);
1977 NodeToCallingFunc[Clone] = NodeToCallingFunc[
Node];
1978 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true);
1982template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1983void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1984 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
1985 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
1989 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
1990 auto &EdgeContextIds = Edge->getContextIds();
1991 ContextNode *OldCallee = Edge->Callee;
1993 *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
1995 OldCallee->eraseCallerEdge(Edge.get());
1996 Edge->Callee = NewCallee;
1997 NewCallee->CallerEdges.push_back(Edge);
2001 NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
2002 NewCallee->AllocTypes |= Edge->AllocTypes;
2003 OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
2006 OldCallee->ContextIds.empty());
2010 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2015 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2016 OldCalleeEdge->AllocTypes =
2017 computeAllocType(OldCalleeEdge->getContextIds());
2024 if (
auto *NewCalleeEdge =
2025 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2026 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
2027 EdgeContextIdsToMove.
end());
2028 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2032 auto NewEdge = std::make_shared<ContextEdge>(
2033 OldCalleeEdge->Callee, NewCallee,
2034 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2035 NewCallee->CalleeEdges.push_back(NewEdge);
2036 NewEdge->Callee->CallerEdges.push_back(NewEdge);
2039 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
2040 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
2041 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2042 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2044 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2045 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2050template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2051void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2053 for (
auto &Entry : AllocationCallToContextNodeMap)
2054 identifyClones(Entry.second, Visited);
2065template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2066void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2069 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
2077 if (!
Node->hasCall())
2092 auto CallerEdges =
Node->CallerEdges;
2093 for (
auto &Edge : CallerEdges) {
2095 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2100 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
2101 identifyClones(Edge->Caller, Visited);
2124 const unsigned AllocTypeCloningPriority[] = { 3, 4,
2127 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
2128 [&](
const std::shared_ptr<ContextEdge> &
A,
2129 const std::shared_ptr<ContextEdge> &
B) {
2130 assert(checkColdOrNotCold(A->AllocTypes) &&
2131 checkColdOrNotCold(B->AllocTypes));
2133 if (A->AllocTypes == B->AllocTypes)
2136 return *A->ContextIds.begin() < *B->ContextIds.begin();
2137 return AllocTypeCloningPriority[A->AllocTypes] <
2138 AllocTypeCloningPriority[B->AllocTypes];
2148 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
2149 auto CallerEdge = *EI;
2158 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2159 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
2160 for (
auto &CalleeEdge :
Node->CalleeEdges)
2161 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2162 CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
2177 if (allocTypeToUse(CallerEdge->AllocTypes) ==
2178 allocTypeToUse(
Node->AllocTypes) &&
2179 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2180 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
2187 ContextNode *Clone =
nullptr;
2188 for (
auto *CurClone :
Node->Clones) {
2189 if (allocTypeToUse(CurClone->AllocTypes) !=
2190 allocTypeToUse(CallerEdge->AllocTypes))
2193 if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2194 CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2202 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
2204 Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
2211 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
2212 return E->AllocTypes == (uint8_t)AllocationType::None;
2218 for (
auto *Clone :
Node->Clones) {
2219 removeNoneTypeCalleeEdges(Clone);
2221 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2228 removeNoneTypeCalleeEdges(
Node);
2233 [&](
const std::shared_ptr<ContextEdge> &
E) {
2234 return E->AllocTypes == (uint8_t)AllocationType::None;
2237 [&](
const std::shared_ptr<ContextEdge> &
E) {
2238 return E->AllocTypes == (uint8_t)AllocationType::None;
2242 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
2245void ModuleCallsiteContextGraph::updateAllocationCall(
2249 "memprof", AllocTypeString);
2250 cast<CallBase>(
Call.call())->addFnAttr(
A);
2251 OREGetter(
Call.call()->getFunction())
2253 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
2255 <<
" marked with memprof allocation attribute "
2256 <<
ore::NV(
"Attribute", AllocTypeString));
2259void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
2263 assert(AI->Versions.size() >
Call.cloneNo());
2267void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2268 FuncInfo CalleeFunc) {
2269 if (CalleeFunc.cloneNo() > 0)
2270 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2271 OREGetter(CallerCall.call()->getFunction())
2273 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
2274 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
2275 <<
" assigned to call function clone "
2276 <<
ore::NV(
"Callee", CalleeFunc.func()));
2279void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2280 FuncInfo CalleeFunc) {
2281 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
2283 "Caller cannot be an allocation which should not have profiled calls");
2284 assert(CI->Clones.size() > CallerCall.cloneNo());
2285 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2288CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
2290ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2291 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2292 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2298 NewFunc->setName(
Name);
2299 for (
auto &Inst : CallsWithMetadataInFunc) {
2301 assert(Inst.cloneNo() == 0);
2302 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2304 OREGetter(
Func.func())
2306 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
2307 return {NewFunc, CloneNo};
2311 IndexCall>::FuncInfo
2312IndexCallsiteContextGraph::cloneFunctionForCallsite(
2313 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2314 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2329 for (
auto &Inst : CallsWithMetadataInFunc) {
2331 assert(Inst.cloneNo() == 0);
2332 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
2333 assert(AI->Versions.size() == CloneNo);
2336 AI->Versions.push_back(0);
2339 assert(CI && CI->Clones.size() == CloneNo);
2342 CI->Clones.push_back(0);
2344 CallMap[Inst] = {Inst.call(), CloneNo};
2346 return {
Func.func(), CloneNo};
2380template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2381bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
2382 bool Changed =
false;
2390 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
2391 const FuncInfo &CalleeFunc) {
2393 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
2398 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2399 FuncInfo OrigFunc(Func);
2403 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
2404 for (
auto &Call : CallsWithMetadata) {
2405 ContextNode *
Node = getNodeForInst(Call);
2412 "Not having a call should have prevented cloning");
2416 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
2420 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
2422 ContextNode *CallsiteClone,
2425 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
2427 assert(FuncClonesToCallMap.count(FuncClone));
2428 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
2430 if (CallMap.count(Call))
2431 CallClone = CallMap[
Call];
2432 CallsiteClone->setCall(CallClone);
2438 std::deque<ContextNode *> ClonesWorklist;
2440 if (!
Node->ContextIds.empty())
2441 ClonesWorklist.push_back(
Node);
2442 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
2443 Node->Clones.end());
2448 unsigned NodeCloneCount = 0;
2449 while (!ClonesWorklist.empty()) {
2450 ContextNode *Clone = ClonesWorklist.front();
2451 ClonesWorklist.pop_front();
2454 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2460 if (FuncClonesToCallMap.size() < NodeCloneCount) {
2462 if (NodeCloneCount == 1) {
2467 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
2468 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2472 FuncClonesToCallMap[OrigFunc] = {};
2473 AssignCallsiteCloneToFuncClone(
2474 OrigFunc, Call, Clone,
2475 AllocationCallToContextNodeMap.count(Call));
2476 for (
auto &CE : Clone->CallerEdges) {
2478 if (!
CE->Caller->hasCall())
2480 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
2490 FuncInfo PreviousAssignedFuncClone;
2492 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
2493 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2495 bool CallerAssignedToCloneOfFunc =
false;
2496 if (EI != Clone->CallerEdges.end()) {
2497 const std::shared_ptr<ContextEdge> &Edge = *EI;
2498 PreviousAssignedFuncClone =
2499 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2500 CallerAssignedToCloneOfFunc =
true;
2505 std::map<CallInfo, CallInfo> NewCallMap;
2506 unsigned CloneNo = FuncClonesToCallMap.size();
2507 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
2508 "should already exist in the map");
2509 FuncInfo NewFuncClone = cloneFunctionForCallsite(
2510 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
2511 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
2512 FunctionClonesAnalysis++;
2518 if (!CallerAssignedToCloneOfFunc) {
2519 AssignCallsiteCloneToFuncClone(
2520 NewFuncClone, Call, Clone,
2521 AllocationCallToContextNodeMap.count(Call));
2522 for (
auto &CE : Clone->CallerEdges) {
2524 if (!
CE->Caller->hasCall())
2526 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
2535 for (
auto CE : Clone->CallerEdges) {
2537 if (!
CE->Caller->hasCall())
2540 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
2544 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
2545 PreviousAssignedFuncClone)
2548 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
2558 for (
auto CalleeEdge :
CE->Caller->CalleeEdges) {
2563 ContextNode *
Callee = CalleeEdge->Callee;
2567 if (Callee == Clone || !
Callee->hasCall())
2569 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
2570 removeNoneTypeCalleeEdges(NewClone);
2573 removeNoneTypeCalleeEdges(Callee);
2578 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
2579 RecordCalleeFuncOfCallsite(
2580 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
2589 OrigCall.setCloneNo(0);
2590 std::map<CallInfo, CallInfo> &CallMap =
2591 FuncClonesToCallMap[NewFuncClone];
2592 assert(CallMap.count(OrigCall));
2593 CallInfo NewCall(CallMap[OrigCall]);
2595 NewClone->setCall(NewCall);
2617 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
2618 FuncInfo FuncCloneAssignedToCurCallsiteClone;
2621 for (
auto EI = Clone->CallerEdges.begin();
2622 EI != Clone->CallerEdges.end();) {
2625 if (!Edge->Caller->hasCall()) {
2631 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
2632 FuncInfo FuncCloneCalledByCaller =
2633 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2643 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
2644 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
2652 (FuncCloneAssignedToCurCallsiteClone &&
2653 FuncCloneAssignedToCurCallsiteClone !=
2654 FuncCloneCalledByCaller)) {
2669 if (FuncCloneToNewCallsiteCloneMap.count(
2670 FuncCloneCalledByCaller)) {
2671 ContextNode *NewClone =
2672 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
2673 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
2675 removeNoneTypeCalleeEdges(NewClone);
2678 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
2679 removeNoneTypeCalleeEdges(NewClone);
2680 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
2683 ClonesWorklist.push_back(NewClone);
2684 assert(EI == Clone->CallerEdges.end() ||
2690 removeNoneTypeCalleeEdges(Clone);
2699 if (!FuncCloneAssignedToCurCallsiteClone) {
2700 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
2702 AssignCallsiteCloneToFuncClone(
2703 FuncCloneCalledByCaller, Call, Clone,
2704 AllocationCallToContextNodeMap.count(Call));
2708 assert(FuncCloneAssignedToCurCallsiteClone ==
2709 FuncCloneCalledByCaller);
2718 if (!FuncCloneAssignedToCurCallsiteClone) {
2723 for (
auto &CF : FuncClonesToCallMap) {
2724 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
2725 FuncCloneAssignedToCurCallsiteClone = CF.first;
2729 assert(FuncCloneAssignedToCurCallsiteClone);
2731 AssignCallsiteCloneToFuncClone(
2732 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
2733 AllocationCallToContextNodeMap.count(Call));
2735 assert(FuncCloneToCurNodeCloneMap
2736 [FuncCloneAssignedToCurCallsiteClone] == Clone);
2738 RecordCalleeFuncOfCallsite(Edge->Caller,
2739 FuncCloneAssignedToCurCallsiteClone);
2746 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
2747 for (
const auto &PE :
Node->CalleeEdges)
2748 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
2749 for (
const auto &CE :
Node->CallerEdges)
2750 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
2751 for (
auto *Clone :
Node->Clones) {
2752 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2753 for (
const auto &PE : Clone->CalleeEdges)
2754 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
2755 for (
const auto &CE : Clone->CallerEdges)
2756 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
2762 auto UpdateCalls = [&](ContextNode *
Node,
2764 auto &&UpdateCalls) {
2769 for (
auto *Clone :
Node->Clones)
2770 UpdateCalls(Clone, Visited, UpdateCalls);
2772 for (
auto &Edge :
Node->CallerEdges)
2773 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
2777 if (!
Node->hasCall() ||
Node->ContextIds.empty())
2780 if (
Node->IsAllocation) {
2781 updateAllocationCall(
Node->Call, allocTypeToUse(
Node->AllocTypes));
2785 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
2788 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
2789 updateCall(
Node->Call, CalleeFunc);
2798 for (
auto &Entry : AllocationCallToContextNodeMap)
2799 UpdateCalls(Entry.second, Visited, UpdateCalls);
2813 FunctionsClonedThinBackend++;
2814 for (
unsigned I = 1;
I < NumClones;
I++) {
2815 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
2817 FunctionClonesThinBackend++;
2820 for (
auto &BB : *NewF) {
2821 for (
auto &Inst : BB) {
2822 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
2823 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
2827 auto *PrevF = M.getFunction(
Name);
2831 assert(PrevF->isDeclaration());
2832 NewF->takeName(PrevF);
2833 PrevF->replaceAllUsesWith(NewF);
2834 PrevF->eraseFromParent();
2836 NewF->setName(
Name);
2838 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
2841 if (!FuncToAliasMap.count(&
F))
2843 for (
auto *
A : FuncToAliasMap[&
F]) {
2845 auto *PrevA = M.getNamedAlias(
Name);
2847 A->getType()->getPointerAddressSpace(),
2848 A->getLinkage(),
Name, NewF);
2849 NewA->copyAttributesFrom(
A);
2853 assert(PrevA->isDeclaration());
2854 NewA->takeName(PrevA);
2855 PrevA->replaceAllUsesWith(NewA);
2856 PrevA->eraseFromParent();
2898bool MemProfContextDisambiguation::applyImport(
Module &M) {
2900 bool Changed =
false;
2902 auto IsMemProfClone = [](
const Function &
F) {
2909 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
2911 for (
auto &
A :
M.aliases()) {
2912 auto *Aliasee =
A.getAliaseeObject();
2913 if (
auto *
F = dyn_cast<Function>(Aliasee))
2914 FuncToAliasMap[
F].insert(&
A);
2918 if (
F.isDeclaration() || IsMemProfClone(
F))
2924 bool ClonesCreated =
false;
2925 unsigned NumClonesCreated = 0;
2926 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
2936 if (ClonesCreated) {
2937 assert(NumClonesCreated == NumClones);
2944 ClonesCreated =
true;
2945 NumClonesCreated = NumClones;
2966 if (isa<AliasSummary>(GVSummary))
2969 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
2971 if (
FS->allocs().empty() &&
FS->callsites().empty())
2974 auto SI =
FS->callsites().begin();
2975 auto AI =
FS->allocs().begin();
2980 for (
auto &BB :
F) {
2981 for (
auto &
I : BB) {
2982 auto *CB = dyn_cast<CallBase>(&
I);
2987 auto *CalledValue = CB->getCalledOperand();
2988 auto *CalledFunction = CB->getCalledFunction();
2989 if (CalledValue && !CalledFunction) {
2990 CalledValue = CalledValue->stripPointerCasts();
2992 CalledFunction = dyn_cast<Function>(CalledValue);
2996 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2997 assert(!CalledFunction &&
2998 "Expected null called function in callsite for alias");
2999 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
3003 I.getMetadata(LLVMContext::MD_callsite));
3004 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
3008 if (CB->getAttributes().hasFnAttr(
"memprof")) {
3010 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3011 ? AllocTypeColdThinBackend++
3012 : AllocTypeNotColdThinBackend++;
3013 OrigAllocsThinBackend++;
3014 AllocVersionsThinBackend++;
3015 if (!MaxAllocVersionsThinBackend)
3016 MaxAllocVersionsThinBackend = 1;
3019 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3026 auto &AllocNode = *(AI++);
3030 auto MIBIter = AllocNode.MIBs.begin();
3031 for (
auto &MDOp : MemProfMD->operands()) {
3032 assert(MIBIter != AllocNode.MIBs.end());
3034 MIBIter->StackIdIndices.begin();
3035 auto *MIBMD = cast<const MDNode>(MDOp);
3040 for (
auto ContextIter =
3042 ContextIter != StackContext.
end(); ++ContextIter) {
3046 if (!StackIdsFromMetadata.
empty() &&
3047 StackIdsFromMetadata.
back() == *ContextIter)
3049 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3058 CloneFuncIfNeeded(AllocNode.Versions.size());
3060 OrigAllocsThinBackend++;
3061 AllocVersionsThinBackend += AllocNode.Versions.size();
3062 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3063 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3070 if (AllocNode.Versions.size() == 1) {
3075 UnclonableAllocsThinBackend++;
3081 return Type == ((uint8_t)AllocationType::NotCold |
3082 (uint8_t)AllocationType::Cold);
3086 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3092 : AllocTypeNotColdThinBackend++;
3104 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3107 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
3109 <<
" marked with memprof allocation attribute "
3110 <<
ore::NV(
"Attribute", AllocTypeString));
3112 }
else if (!CallsiteContext.empty()) {
3114 assert(SI !=
FS->callsites().end());
3115 auto &StackNode = *(
SI++);
3120 auto StackIdIndexIter = StackNode.StackIdIndices.begin();
3121 for (
auto StackId : CallsiteContext) {
3122 assert(StackIdIndexIter != StackNode.StackIdIndices.end());
3130 CloneFuncIfNeeded(StackNode.Clones.size());
3134 assert(!IsMemProfClone(*CalledFunction));
3139 auto CalleeOrigName = CalledFunction->getName();
3140 for (
unsigned J = 0; J < StackNode.Clones.size(); J++) {
3143 if (!StackNode.Clones[J])
3145 auto NewF =
M.getOrInsertFunction(
3147 CalledFunction->getFunctionType());
3153 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3156 <<
ore::NV(
"Call", CBClone) <<
" in clone "
3158 <<
" assigned to call function clone "
3159 <<
ore::NV(
"Callee", NewF.getCallee()));
3163 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
3164 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3172template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3173bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3175 dbgs() <<
"CCG before cloning:\n";
3179 exportToDot(
"postbuild");
3192 dbgs() <<
"CCG after cloning:\n";
3196 exportToDot(
"cloned");
3198 bool Changed = assignFunctions();
3201 dbgs() <<
"CCG after assigning function clones:\n";
3205 exportToDot(
"clonefuncassign");
3210bool MemProfContextDisambiguation::processModule(
3217 return applyImport(M);
3230 ModuleCallsiteContextGraph CCG(M, OREGetter);
3231 return CCG.process();
3236 : ImportSummary(Summary) {
3237 if (ImportSummary) {
3247 auto ReadSummaryFile =
3249 if (!ReadSummaryFile) {
3256 if (!ImportSummaryForTestingOrErr) {
3262 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
3263 ImportSummary = ImportSummaryForTesting.get();
3272 if (!processModule(M, OREGetter))
3289 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
This class implements a map that also provides access to all stored values in a deterministic order.
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
std::pair< iterator, bool > insert(const ValueT &V)
void swap(DenseSetImpl &RHS)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
StringRef AttributeString(unsigned Attribute)
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const