52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
114 cl::desc(
"Specify the path prefix of the MemProf dot files."));
118 cl::desc(
"Export graph to dot files."));
123 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
133 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
138 "Export only nodes with contexts feeding given "
139 "-memprof-dot-alloc-id"),
141 "Export only nodes with given -memprof-dot-context-id")));
145 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
146 "or to highlight if -memprof-dot-scope=all"));
150 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
151 "highlight otherwise"));
155 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
159 cl::desc(
"Perform verification checks on CallingContextGraph."));
163 cl::desc(
"Perform frequent verification checks on nodes."));
166 "memprof-import-summary",
167 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
173 cl::desc(
"Max depth to recursively search for missing "
174 "frames through tail calls."));
179 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
183 cl::desc(
"Allow cloning of contexts through recursive cycles"));
190 cl::desc(
"Merge clones before assigning functions"));
199 cl::desc(
"Allow cloning of contexts having recursive cycles"));
205 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
216 cl::desc(
"Linking with hot/cold operator new interfaces"));
221 "Require target function definition when promoting indirect calls"));
243template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
244class CallsiteContextGraph {
246 CallsiteContextGraph() =
default;
247 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
248 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
255 void identifyClones();
262 bool assignFunctions();
269 const CallsiteContextGraph &CCG) {
275 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
277 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
279 void exportToDot(std::string Label)
const;
282 struct FuncInfo final
283 :
public std::pair<FuncTy *, unsigned > {
284 using Base = std::pair<FuncTy *, unsigned>;
286 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
287 explicit operator bool()
const {
return this->first !=
nullptr; }
288 FuncTy *
func()
const {
return this->first; }
289 unsigned cloneNo()
const {
return this->second; }
293 struct CallInfo final :
public std::pair<CallTy, unsigned > {
294 using Base = std::pair<CallTy, unsigned>;
296 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
298 explicit operator bool()
const {
return (
bool)this->first; }
299 CallTy call()
const {
return this->first; }
300 unsigned cloneNo()
const {
return this->second; }
301 void setCloneNo(
unsigned N) { this->second =
N; }
303 if (!
operator bool()) {
309 OS <<
"\t(clone " << cloneNo() <<
")";
335 bool Recursive =
false;
362 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
366 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
370 bool useCallerEdgesForContextInfo()
const {
375 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
393 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
394 Count += Edge->getContextIds().size();
398 CalleeEdges, useCallerEdgesForContextInfo()
400 : std::vector<std::shared_ptr<ContextEdge>>());
401 for (
const auto &Edge : Edges)
408 uint8_t computeAllocType()
const {
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges) {
427 bool emptyContextIds()
const {
429 CalleeEdges, useCallerEdgesForContextInfo()
431 : std::vector<std::shared_ptr<ContextEdge>>());
432 for (
const auto &Edge : Edges) {
433 if (!Edge->getContextIds().empty())
440 std::vector<ContextNode *> Clones;
443 ContextNode *CloneOf =
nullptr;
445 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
447 ContextNode(
bool IsAllocation, CallInfo
C)
448 : IsAllocation(IsAllocation),
Call(
C) {}
450 void addClone(ContextNode *Clone) {
452 CloneOf->Clones.push_back(Clone);
453 Clone->CloneOf = CloneOf;
455 Clones.push_back(Clone);
457 Clone->CloneOf =
this;
461 ContextNode *getOrigNode() {
468 unsigned int ContextId);
470 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
471 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
472 void eraseCalleeEdge(
const ContextEdge *Edge);
473 void eraseCallerEdge(
const ContextEdge *Edge);
475 void setCall(CallInfo
C) {
Call =
C; }
477 bool hasCall()
const {
return (
bool)
Call.call(); }
483 bool isRemoved()
const {
519 bool IsBackedge =
false;
526 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
527 ContextIds(std::move(ContextIds)) {}
533 inline void clear() {
543 inline bool isRemoved()
const {
544 if (Callee || Caller)
565 void removeNoneTypeCalleeEdges(ContextNode *
Node);
566 void removeNoneTypeCallerEdges(ContextNode *
Node);
568 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
574 template <
class NodeT,
class IteratorT>
575 std::vector<uint64_t>
580 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
583 template <
class NodeT,
class IteratorT>
584 void addStackNodesForMIB(ContextNode *AllocNode,
593 void updateStackNodes();
597 void handleCallsitesWithMultipleTargets();
600 void markBackedges();
610 bool partitionCallsByCallee(
612 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
619 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
626 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
631 struct CallContextInfo {
635 std::vector<uint64_t> StackIds;
649 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
650 bool CalleeIter =
true);
658 void assignStackNodesPostOrder(
671 void propagateDuplicateContextIds(
677 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
685 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
695 calleesMatch(CallTy
Call, EdgeIter &EI,
700 const FuncTy *getCalleeFunc(CallTy
Call) {
701 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
707 bool calleeMatchesFunc(
708 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
709 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
710 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
711 Call, Func, CallerFunc, FoundCalleeChain);
715 bool sameCallee(CallTy Call1, CallTy Call2) {
716 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
721 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
722 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
728 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
734 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
739 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
744 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
745 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
751 FuncInfo cloneFunctionForCallsite(
753 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
754 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
755 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
760 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
761 unsigned CloneNo)
const {
762 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
766 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
767 CallInfo
C = CallInfo()) {
768 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
769 auto *NewNode = NodeOwner.back().get();
771 NodeToCallingFunc[NewNode] =
F;
772 NewNode->NodeId = NodeOwner.size();
777 ContextNode *getNodeForInst(
const CallInfo &
C);
778 ContextNode *getNodeForAlloc(
const CallInfo &
C);
779 ContextNode *getNodeForStackId(
uint64_t StackId);
801 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
808 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
809 ContextNode *NewCallee,
810 bool NewClone =
false,
818 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
819 ContextNode *NewCaller);
830 void mergeNodeCalleeClones(
835 void findOtherCallersToShareMerge(
836 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
867 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
873 unsigned int LastContextId = 0;
876template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
878 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
879template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
881 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
882template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
884 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
887 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
890class ModuleCallsiteContextGraph
891 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
894 ModuleCallsiteContextGraph(
896 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
899 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
902 uint64_t getStackId(uint64_t IdOrIndex)
const;
904 bool calleeMatchesFunc(
905 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
906 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
907 bool sameCallee(Instruction *Call1, Instruction *Call2);
908 bool findProfiledCalleeThroughTailCalls(
909 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
910 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
911 bool &FoundMultipleCalleeChains);
912 uint64_t getLastStackId(Instruction *
Call);
913 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
916 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
917 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
919 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
920 DenseMap<CallInfo, CallInfo> &CallMap,
921 std::vector<CallInfo> &CallsWithMetadataInFunc,
923 std::string getLabel(
const Function *Func,
const Instruction *
Call,
924 unsigned CloneNo)
const;
927 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
933struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
934 IndexCall() : PointerUnion() {}
935 IndexCall(std::nullptr_t) : IndexCall() {}
936 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
937 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
938 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
940 IndexCall *operator->() {
return this; }
942 void print(raw_ostream &OS)
const {
943 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
968class IndexCallsiteContextGraph
969 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
972 IndexCallsiteContextGraph(
973 ModuleSummaryIndex &Index,
977 ~IndexCallsiteContextGraph() {
982 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
984 for (
auto &Callsite :
I.second)
985 FS->addCallsite(*Callsite.second);
990 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
993 uint64_t getStackId(uint64_t IdOrIndex)
const;
994 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
995 bool calleeMatchesFunc(
996 IndexCall &
Call,
const FunctionSummary *Func,
997 const FunctionSummary *CallerFunc,
998 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
999 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1000 bool findProfiledCalleeThroughTailCalls(
1001 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1002 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1003 bool &FoundMultipleCalleeChains);
1004 uint64_t getLastStackId(IndexCall &
Call);
1005 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1008 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1009 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1010 IndexCall>::FuncInfo
1011 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1012 DenseMap<CallInfo, CallInfo> &CallMap,
1013 std::vector<CallInfo> &CallsWithMetadataInFunc,
1015 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1016 unsigned CloneNo)
const;
1020 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1022 const ModuleSummaryIndex &Index;
1030 std::unordered_map<FunctionSummary *,
1031 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1032 FunctionCalleesToSynthesizedCallsiteInfos;
1043 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1046 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1067template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1068bool allocTypesMatch(
1069 const std::vector<uint8_t> &InAllocTypes,
1070 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1074 assert(InAllocTypes.size() == Edges.size());
1076 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1078 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1082 if (l == (uint8_t)AllocationType::None ||
1083 r->AllocTypes == (uint8_t)AllocationType::None)
1085 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1094template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1095bool allocTypesMatchClone(
1096 const std::vector<uint8_t> &InAllocTypes,
1097 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1098 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1102 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1106 for (
const auto &
E : Clone->CalleeEdges) {
1108 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1112 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1113 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1115 if (Iter == EdgeCalleeMap.
end())
1123 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1131template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1132typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1133CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1134 const CallInfo &
C) {
1135 ContextNode *
Node = getNodeForAlloc(
C);
1139 return NonAllocationCallToContextNodeMap.lookup(
C);
1142template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1143typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1144CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1145 const CallInfo &
C) {
1146 return AllocationCallToContextNodeMap.lookup(
C);
1149template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1150typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1151CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1153 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1154 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1155 return StackEntryNode->second;
1159template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1160void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1162 unsigned int ContextId) {
1163 for (
auto &
Edge : CallerEdges) {
1164 if (
Edge->Caller == Caller) {
1166 Edge->getContextIds().insert(ContextId);
1170 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1171 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1172 CallerEdges.push_back(
Edge);
1176template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1177void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1178 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1194 auto CalleeCallerCount =
Callee->CallerEdges.size();
1195 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1200 }
else if (CalleeIter) {
1202 *EI =
Caller->CalleeEdges.erase(*EI);
1205 *EI =
Callee->CallerEdges.erase(*EI);
1207 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1208 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1212void CallsiteContextGraph<
1213 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1214 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1216 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1218 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1224template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1225void CallsiteContextGraph<
1226 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1227 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1229 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1231 Edge->Caller->eraseCalleeEdge(
Edge.get());
1232 EI =
Node->CallerEdges.erase(EI);
1238template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1239typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1240CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1241 findEdgeFromCallee(
const ContextNode *Callee) {
1242 for (
const auto &
Edge : CalleeEdges)
1243 if (
Edge->Callee == Callee)
1248template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1249typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1250CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1251 findEdgeFromCaller(
const ContextNode *Caller) {
1252 for (
const auto &
Edge : CallerEdges)
1253 if (
Edge->Caller == Caller)
1258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1259void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1260 eraseCalleeEdge(
const ContextEdge *
Edge) {
1262 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1263 return CalleeEdge.get() ==
Edge;
1265 assert(EI != CalleeEdges.end());
1266 CalleeEdges.erase(EI);
1269template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1270void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1271 eraseCallerEdge(
const ContextEdge *
Edge) {
1273 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1274 return CallerEdge.get() ==
Edge;
1276 assert(EI != CallerEdges.end());
1277 CallerEdges.erase(EI);
1280template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1281uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1282 DenseSet<uint32_t> &ContextIds)
const {
1284 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1285 uint8_t
AllocType = (uint8_t)AllocationType::None;
1286 for (
auto Id : ContextIds) {
1287 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1295template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1297CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1298 const DenseSet<uint32_t> &Node1Ids,
1299 const DenseSet<uint32_t> &Node2Ids)
const {
1301 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1302 uint8_t
AllocType = (uint8_t)AllocationType::None;
1303 for (
auto Id : Node1Ids) {
1304 if (!Node2Ids.
count(Id))
1306 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1314template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1315uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1316 const DenseSet<uint32_t> &Node1Ids,
1317 const DenseSet<uint32_t> &Node2Ids)
const {
1318 if (Node1Ids.
size() < Node2Ids.
size())
1319 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1321 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1324template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1325typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1326CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1327 CallInfo
Call,
const FuncTy *
F) {
1329 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1330 AllocationCallToContextNodeMap[
Call] = AllocNode;
1332 AllocNode->OrigStackOrAllocId = LastContextId;
1335 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1351template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1352template <
class NodeT,
class IteratorT>
1353void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1354 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1362 ContextIdToAllocationType[++LastContextId] =
AllocType;
1364 if (!ContextSizeInfo.
empty()) {
1365 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1370 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1375 ContextNode *PrevNode = AllocNode;
1379 SmallSet<uint64_t, 8> StackIdSet;
1382 ContextIter != StackContext.
end(); ++ContextIter) {
1383 auto StackId = getStackId(*ContextIter);
1384 ContextNode *StackNode = getNodeForStackId(StackId);
1386 StackNode = createNewNode(
false);
1387 StackEntryIdToContextNodeMap[StackId] = StackNode;
1388 StackNode->OrigStackOrAllocId = StackId;
1395 StackNode->Recursive =
true;
1397 StackNode->AllocTypes |= (uint8_t)
AllocType;
1398 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1399 PrevNode = StackNode;
1403template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1405CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1406 const DenseSet<uint32_t> &StackSequenceContextIds,
1407 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1408 DenseSet<uint32_t> NewContextIds;
1409 for (
auto OldId : StackSequenceContextIds) {
1410 NewContextIds.
insert(++LastContextId);
1411 OldToNewContextIds[OldId].insert(LastContextId);
1412 assert(ContextIdToAllocationType.count(OldId));
1414 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1415 if (DotAllocContextIds.
contains(OldId))
1416 DotAllocContextIds.
insert(LastContextId);
1418 return NewContextIds;
1421template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1422void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1423 propagateDuplicateContextIds(
1424 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1426 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1427 DenseSet<uint32_t> NewIds;
1428 for (
auto Id : ContextIds)
1429 if (
auto NewId = OldToNewContextIds.find(Id);
1430 NewId != OldToNewContextIds.end())
1436 auto UpdateCallers = [&](ContextNode *
Node,
1437 DenseSet<const ContextEdge *> &Visited,
1438 auto &&UpdateCallers) ->
void {
1439 for (
const auto &
Edge :
Node->CallerEdges) {
1443 ContextNode *NextNode =
Edge->Caller;
1444 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1447 if (!NewIdsToAdd.
empty()) {
1448 Edge->getContextIds().insert_range(NewIdsToAdd);
1449 UpdateCallers(NextNode, Visited, UpdateCallers);
1454 DenseSet<const ContextEdge *> Visited;
1455 for (
auto &Entry : AllocationCallToContextNodeMap) {
1457 UpdateCallers(Node, Visited, UpdateCallers);
1461template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1462void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1463 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1466 DenseSet<uint32_t> RemainingContextIds) {
1468 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1469 DenseSet<uint32_t> RecursiveContextIds;
1470 DenseSet<uint32_t> AllCallerContextIds;
1475 for (
auto &CE : OrigEdges) {
1476 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1477 for (
auto Id :
CE->getContextIds())
1478 if (!AllCallerContextIds.
insert(Id).second)
1479 RecursiveContextIds.
insert(Id);
1483 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1485 DenseSet<uint32_t> NewEdgeContextIds;
1486 DenseSet<uint32_t> NotFoundContextIds;
1490 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1491 NotFoundContextIds);
1494 if (RecursiveContextIds.
empty()) {
1497 RemainingContextIds.
swap(NotFoundContextIds);
1507 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1509 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1512 if (NewEdgeContextIds.
empty()) {
1516 if (TowardsCallee) {
1517 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1518 auto NewEdge = std::make_shared<ContextEdge>(
1519 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1520 NewNode->CalleeEdges.push_back(NewEdge);
1521 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1523 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1524 auto NewEdge = std::make_shared<ContextEdge>(
1525 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1526 NewNode->CallerEdges.push_back(NewEdge);
1527 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1530 if (
Edge->getContextIds().empty()) {
1531 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1538template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1540 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1544 assert(!Edge->ContextIds.empty());
1547template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1549 bool CheckEdges =
true) {
1550 if (
Node->isRemoved())
1554 auto NodeContextIds =
Node->getContextIds();
1558 if (
Node->CallerEdges.size()) {
1560 Node->CallerEdges.front()->ContextIds);
1564 set_union(CallerEdgeContextIds, Edge->ContextIds);
1571 NodeContextIds == CallerEdgeContextIds ||
1574 if (
Node->CalleeEdges.size()) {
1576 Node->CalleeEdges.front()->ContextIds);
1580 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1586 NodeContextIds == CalleeEdgeContextIds);
1595 for (
const auto &
E :
Node->CalleeEdges)
1601template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1602void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1603 assignStackNodesPostOrder(
1604 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1605 DenseMap<uint64_t, std::vector<CallContextInfo>>
1606 &StackIdToMatchingCalls,
1607 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1615 auto CallerEdges =
Node->CallerEdges;
1616 for (
auto &
Edge : CallerEdges) {
1618 if (
Edge->isRemoved()) {
1622 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1623 CallToMatchingCall);
1632 if (
Node->IsAllocation ||
1633 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1636 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1640 if (Calls.size() == 1) {
1641 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1642 if (Ids.size() == 1) {
1643 assert(SavedContextIds.empty());
1645 assert(Node == getNodeForStackId(Ids[0]));
1646 if (
Node->Recursive)
1649 NonAllocationCallToContextNodeMap[
Call] =
Node;
1658 uint64_t LastId =
Node->OrigStackOrAllocId;
1659 ContextNode *LastNode = getNodeForStackId(LastId);
1662 assert(LastNode == Node);
1664 ContextNode *LastNode =
Node;
1669 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1671 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1672 bool CreatedNode =
false;
1673 for (
unsigned I = 0;
I < Calls.size();
1674 I++, PrevIterCreatedNode = CreatedNode) {
1675 CreatedNode =
false;
1676 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1679 if (SavedContextIds.empty()) {
1686 auto MatchingCall = CallToMatchingCall[
Call];
1687 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1691 assert(
I > 0 && !PrevIterCreatedNode);
1694 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1699 assert(LastId == Ids.back());
1708 ContextNode *PrevNode = LastNode;
1712 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1714 ContextNode *CurNode = getNodeForStackId(Id);
1718 assert(!CurNode->Recursive);
1720 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1732 if (SavedContextIds.empty()) {
1741 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1742 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1744 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1746 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1752 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1757 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1762 for (
auto Id : Ids) {
1763 ContextNode *CurNode = getNodeForStackId(Id);
1770 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1777 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1778 if (PrevEdge->getContextIds().empty())
1779 removeEdgeFromGraph(PrevEdge);
1784 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1785 ? (uint8_t)AllocationType::None
1786 : CurNode->computeAllocType();
1791 for (
auto Id : Ids) {
1792 ContextNode *CurNode = getNodeForStackId(Id);
1801template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1802void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1810 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1811 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1812 for (
auto &
Call : CallsWithMetadata) {
1814 if (AllocationCallToContextNodeMap.count(
Call))
1816 auto StackIdsWithContextNodes =
1817 getStackIdsWithContextNodesForCall(
Call.call());
1820 if (StackIdsWithContextNodes.empty())
1824 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1825 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
1835 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1839 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1840 for (
auto &It : StackIdToMatchingCalls) {
1841 auto &Calls = It.getSecond();
1843 if (Calls.size() == 1) {
1844 auto &Ids = Calls[0].StackIds;
1845 if (Ids.size() == 1)
1858 DenseMap<const FuncTy *, unsigned> FuncToIndex;
1859 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
1860 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
1863 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1864 return A.StackIds.size() >
B.StackIds.size() ||
1865 (
A.StackIds.size() ==
B.StackIds.size() &&
1866 (
A.StackIds <
B.StackIds ||
1867 (
A.StackIds ==
B.StackIds &&
1868 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1874 uint64_t LastId = It.getFirst();
1875 ContextNode *LastNode = getNodeForStackId(LastId);
1879 if (LastNode->Recursive)
1884 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1892 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1895 for (
unsigned I = 0;
I < Calls.size();
I++) {
1896 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1897 assert(SavedContextIds.empty());
1898 assert(LastId == Ids.back());
1903 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1904 MatchingIdsFuncSet.
clear();
1911 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1913 ContextNode *PrevNode = LastNode;
1914 ContextNode *CurNode = LastNode;
1919 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1921 CurNode = getNodeForStackId(Id);
1925 if (CurNode->Recursive) {
1930 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1951 if (StackSequenceContextIds.
empty()) {
1964 if (Ids.back() != getLastStackId(
Call)) {
1965 for (
const auto &PE : LastNode->CallerEdges) {
1966 set_subtract(StackSequenceContextIds, PE->getContextIds());
1967 if (StackSequenceContextIds.
empty())
1971 if (StackSequenceContextIds.
empty())
1983 MatchingIdsFuncSet.
insert(Func);
1990 bool DuplicateContextIds =
false;
1991 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1992 auto &CallCtxInfo = Calls[J];
1993 auto &NextIds = CallCtxInfo.StackIds;
1996 auto *NextFunc = CallCtxInfo.Func;
1997 if (NextFunc != Func) {
2000 DuplicateContextIds =
true;
2003 auto &NextCall = CallCtxInfo.Call;
2004 CallToMatchingCall[NextCall] =
Call;
2015 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2016 StackSequenceContextIds.
size());
2019 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2020 : StackSequenceContextIds;
2021 assert(!SavedContextIds.empty());
2023 if (!DuplicateContextIds) {
2027 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2028 if (LastNodeContextIds.
empty())
2035 propagateDuplicateContextIds(OldToNewContextIds);
2045 DenseSet<const ContextNode *> Visited;
2046 for (
auto &Entry : AllocationCallToContextNodeMap)
2047 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2048 CallToMatchingCall);
2053uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2054 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2056 return CallsiteContext.
back();
2059uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2061 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2064 return Index.getStackIdAtIndex(CallsiteContext.
back());
2086 auto Pos =
F.getName().find_last_of(
'.');
2089 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2095std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2096 const Instruction *
Call,
2097 unsigned CloneNo)
const {
2103std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2104 const IndexCall &
Call,
2105 unsigned CloneNo)
const {
2106 auto VI = FSToVIMap.find(Func);
2107 assert(VI != FSToVIMap.end());
2110 return CallerName +
" -> alloc";
2113 return CallerName +
" -> " +
2115 Callsite->Clones[CloneNo]);
2119std::vector<uint64_t>
2120ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2121 Instruction *
Call) {
2122 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2124 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2128std::vector<uint64_t>
2129IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2131 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2133 return getStackIdsWithContextNodes<CallsiteInfo,
2134 SmallVector<unsigned>::const_iterator>(
2138template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2139template <
class NodeT,
class IteratorT>
2140std::vector<uint64_t>
2141CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2142 CallStack<NodeT, IteratorT> &CallsiteContext) {
2143 std::vector<uint64_t> StackIds;
2144 for (
auto IdOrIndex : CallsiteContext) {
2145 auto StackId = getStackId(IdOrIndex);
2146 ContextNode *
Node = getNodeForStackId(StackId);
2149 StackIds.push_back(StackId);
2154ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2156 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2157 :
Mod(
M), OREGetter(OREGetter) {
2159 std::vector<CallInfo> CallsWithMetadata;
2160 for (
auto &BB :
F) {
2161 for (
auto &
I : BB) {
2164 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2165 CallsWithMetadata.push_back(&
I);
2166 auto *AllocNode = addAllocNode(&
I, &
F);
2167 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2171 for (
auto &MDOp : MemProfMD->operands()) {
2173 std::vector<ContextTotalSize> ContextSizeInfo;
2175 if (MIBMD->getNumOperands() > 2) {
2176 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2177 MDNode *ContextSizePair =
2186 ContextSizeInfo.push_back({FullStackId, TotalSize});
2192 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2193 AllocNode, StackContext, CallsiteContext,
2199 DotAllocContextIds = AllocNode->getContextIds();
2203 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2204 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2207 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2208 CallsWithMetadata.push_back(&
I);
2212 if (!CallsWithMetadata.empty())
2213 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2217 dbgs() <<
"CCG before updating call stack chains:\n";
2222 exportToDot(
"prestackupdate");
2227 exportToDot(
"poststackupdate");
2229 handleCallsitesWithMultipleTargets();
2234 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2235 for (
auto &
Call : FuncEntry.second)
2236 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2239IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2243 : Index(Index), isPrevailing(isPrevailing) {
2244 for (
auto &
I : Index) {
2245 auto VI = Index.getValueInfo(
I);
2246 for (
auto &S : VI.getSummaryList()) {
2255 !isPrevailing(VI.getGUID(), S.get()))
2260 std::vector<CallInfo> CallsWithMetadata;
2261 if (!
FS->allocs().empty()) {
2262 for (
auto &AN :
FS->mutableAllocs()) {
2267 if (AN.MIBs.empty())
2269 IndexCall AllocCall(&AN);
2270 CallsWithMetadata.push_back(AllocCall);
2271 auto *AllocNode = addAllocNode(AllocCall, FS);
2279 AN.ContextSizeInfos.size() == AN.MIBs.size());
2281 for (
auto &MIB : AN.MIBs) {
2284 std::vector<ContextTotalSize> ContextSizeInfo;
2285 if (!AN.ContextSizeInfos.empty()) {
2286 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2287 ContextSizeInfo.push_back({FullStackId, TotalSize});
2289 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2290 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2297 DotAllocContextIds = AllocNode->getContextIds();
2303 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2307 if (!
FS->callsites().empty())
2308 for (
auto &SN :
FS->mutableCallsites()) {
2309 IndexCall StackNodeCall(&SN);
2310 CallsWithMetadata.push_back(StackNodeCall);
2313 if (!CallsWithMetadata.empty())
2314 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2316 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2322 dbgs() <<
"CCG before updating call stack chains:\n";
2327 exportToDot(
"prestackupdate");
2332 exportToDot(
"poststackupdate");
2334 handleCallsitesWithMultipleTargets();
2339template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2340void CallsiteContextGraph<DerivedCCG, FuncTy,
2341 CallTy>::handleCallsitesWithMultipleTargets() {
2356 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2357 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2358 auto *
Node = Entry.second;
2367 std::vector<CallInfo> AllCalls;
2368 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2369 AllCalls.push_back(
Node->Call);
2383 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2386 auto It = AllCalls.begin();
2388 for (; It != AllCalls.end(); ++It) {
2391 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2394 if (!Edge->Callee->hasCall())
2396 assert(NodeToCallingFunc.count(Edge->Callee));
2398 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2407 if (
Node->Call != ThisCall) {
2408 Node->setCall(ThisCall);
2419 Node->MatchingCalls.clear();
2422 if (It == AllCalls.end()) {
2423 RemovedEdgesWithMismatchedCallees++;
2427 Node->setCall(CallInfo());
2432 for (++It; It != AllCalls.end(); ++It) {
2436 Node->MatchingCalls.push_back(ThisCall);
2445 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2446 return !it.second->hasCall() || it.second->Call != it.first;
2450 for (
auto &[
Call,
Node] : NewCallToNode)
2451 NonAllocationCallToContextNodeMap[
Call] =
Node;
2455 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2456 NonAllocationCallToContextNodeMap[
Call] =
Node;
2459template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2460bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2462 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2466 struct CallsWithSameCallee {
2467 std::vector<CallInfo> Calls;
2468 ContextNode *
Node =
nullptr;
2474 for (
auto ThisCall : AllCalls) {
2475 auto *
F = getCalleeFunc(
ThisCall.call());
2477 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2486 for (
const auto &Edge :
Node->CalleeEdges) {
2487 if (!Edge->Callee->hasCall())
2489 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2490 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2491 CalleeNodeToCallInfo[Edge->Callee] =
2492 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2498 if (CalleeNodeToCallInfo.
empty())
2510 ContextNode *UnmatchedCalleesNode =
nullptr;
2512 bool UsedOrigNode =
false;
2517 auto CalleeEdges =
Node->CalleeEdges;
2518 for (
auto &Edge : CalleeEdges) {
2519 if (!Edge->Callee->hasCall())
2524 ContextNode *CallerNodeToUse =
nullptr;
2528 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2529 if (!UnmatchedCalleesNode)
2530 UnmatchedCalleesNode =
2531 createNewNode(
false, NodeToCallingFunc[
Node]);
2532 CallerNodeToUse = UnmatchedCalleesNode;
2536 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2539 if (!UsedOrigNode) {
2542 Node->MatchingCalls.clear();
2543 UsedOrigNode =
true;
2546 createNewNode(
false, NodeToCallingFunc[
Node]);
2550 Info->Node->setCall(
Info->Calls.front());
2556 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2558 CallerNodeToUse =
Info->Node;
2562 if (CallerNodeToUse ==
Node)
2565 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2572 for (
auto &
I : CalleeNodeToCallInfo)
2573 removeNoneTypeCallerEdges(
I.second->Node);
2574 if (UnmatchedCalleesNode)
2575 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2576 removeNoneTypeCallerEdges(
Node);
2586uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2589 return Index.getStackIdAtIndex(IdOrIndex);
2592template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2593bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2594 CallTy
Call, EdgeIter &EI,
2595 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2597 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2598 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2601 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2602 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2607 if (FoundCalleeChain.empty())
2611 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2615 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2616 CurEdge->AllocTypes |=
Edge->AllocTypes;
2621 auto NewEdge = std::make_shared<ContextEdge>(
2622 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2623 Callee->CallerEdges.push_back(NewEdge);
2624 if (Caller ==
Edge->Caller) {
2628 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2631 "Iterator position not restored after insert and increment");
2633 Caller->CalleeEdges.push_back(NewEdge);
2638 auto *CurCalleeNode =
Edge->Callee;
2639 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2640 ContextNode *NewNode =
nullptr;
2642 if (TailCallToContextNodeMap.
count(NewCall)) {
2643 NewNode = TailCallToContextNodeMap[NewCall];
2644 NewNode->AllocTypes |=
Edge->AllocTypes;
2646 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2648 NewNode = createNewNode(
false, Func, NewCall);
2649 TailCallToContextNodeMap[NewCall] = NewNode;
2650 NewNode->AllocTypes =
Edge->AllocTypes;
2654 AddEdge(NewNode, CurCalleeNode);
2656 CurCalleeNode = NewNode;
2660 AddEdge(
Edge->Caller, CurCalleeNode);
2668 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2680bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2681 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2682 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2683 bool &FoundMultipleCalleeChains) {
2690 FoundCalleeChain.push_back({Callsite,
F});
2705 bool FoundSingleCalleeChain =
false;
2706 for (
auto &BB : *CalleeFunc) {
2707 for (
auto &
I : BB) {
2709 if (!CB || !CB->isTailCall())
2711 auto *CalledValue = CB->getCalledOperand();
2712 auto *CalledFunction = CB->getCalledFunction();
2713 if (CalledValue && !CalledFunction) {
2714 CalledValue = CalledValue->stripPointerCasts();
2721 assert(!CalledFunction &&
2722 "Expected null called function in callsite for alias");
2725 if (!CalledFunction)
2727 if (CalledFunction == ProfiledCallee) {
2728 if (FoundSingleCalleeChain) {
2729 FoundMultipleCalleeChains =
true;
2732 FoundSingleCalleeChain =
true;
2733 FoundProfiledCalleeCount++;
2734 FoundProfiledCalleeDepth +=
Depth;
2735 if (
Depth > FoundProfiledCalleeMaxDepth)
2736 FoundProfiledCalleeMaxDepth =
Depth;
2737 SaveCallsiteInfo(&
I, CalleeFunc);
2738 }
else if (findProfiledCalleeThroughTailCalls(
2739 ProfiledCallee, CalledFunction,
Depth + 1,
2740 FoundCalleeChain, FoundMultipleCalleeChains)) {
2743 assert(!FoundMultipleCalleeChains);
2744 if (FoundSingleCalleeChain) {
2745 FoundMultipleCalleeChains =
true;
2748 FoundSingleCalleeChain =
true;
2749 SaveCallsiteInfo(&
I, CalleeFunc);
2750 }
else if (FoundMultipleCalleeChains)
2755 return FoundSingleCalleeChain;
2758const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2760 if (!CB->getCalledOperand() || CB->isIndirectCall())
2762 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2769bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2770 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
2771 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2773 if (!CB->getCalledOperand() || CB->isIndirectCall())
2775 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2777 if (CalleeFunc == Func)
2780 if (Alias && Alias->getAliasee() == Func)
2791 bool FoundMultipleCalleeChains =
false;
2792 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2794 FoundMultipleCalleeChains)) {
2795 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2796 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2797 <<
" that actually called " << CalleeVal->getName()
2798 << (FoundMultipleCalleeChains
2799 ?
" (found multiple possible chains)"
2802 if (FoundMultipleCalleeChains)
2803 FoundProfiledCalleeNonUniquelyCount++;
2810bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2811 Instruction *Call2) {
2813 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2815 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2818 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2820 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2822 return CalleeFunc1 == CalleeFunc2;
2825bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2826 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
2827 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2828 bool &FoundMultipleCalleeChains) {
2834 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
2837 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2838 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2841 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
2842 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2843 CallsiteInfo *NewCallsiteInfo =
2844 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2845 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2852 bool FoundSingleCalleeChain =
false;
2855 !isPrevailing(CurCallee.
getGUID(), S.get()))
2860 auto FSVI = CurCallee;
2863 FSVI = AS->getAliaseeVI();
2864 for (
auto &CallEdge :
FS->calls()) {
2865 if (!CallEdge.second.hasTailCall())
2867 if (CallEdge.first == ProfiledCallee) {
2868 if (FoundSingleCalleeChain) {
2869 FoundMultipleCalleeChains =
true;
2872 FoundSingleCalleeChain =
true;
2873 FoundProfiledCalleeCount++;
2874 FoundProfiledCalleeDepth +=
Depth;
2875 if (
Depth > FoundProfiledCalleeMaxDepth)
2876 FoundProfiledCalleeMaxDepth =
Depth;
2877 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2879 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2880 FSToVIMap[
FS] = FSVI;
2881 }
else if (findProfiledCalleeThroughTailCalls(
2882 ProfiledCallee, CallEdge.first,
Depth + 1,
2883 FoundCalleeChain, FoundMultipleCalleeChains)) {
2886 assert(!FoundMultipleCalleeChains);
2887 if (FoundSingleCalleeChain) {
2888 FoundMultipleCalleeChains =
true;
2891 FoundSingleCalleeChain =
true;
2892 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2894 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2895 FSToVIMap[
FS] = FSVI;
2896 }
else if (FoundMultipleCalleeChains)
2901 return FoundSingleCalleeChain;
2904const FunctionSummary *
2905IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
2907 if (
Callee.getSummaryList().empty())
2912bool IndexCallsiteContextGraph::calleeMatchesFunc(
2913 IndexCall &
Call,
const FunctionSummary *Func,
2914 const FunctionSummary *CallerFunc,
2915 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2919 AliasSummary *Alias =
2920 Callee.getSummaryList().empty()
2923 assert(FSToVIMap.count(Func));
2924 auto FuncVI = FSToVIMap[
Func];
2925 if (Callee == FuncVI ||
2940 bool FoundMultipleCalleeChains =
false;
2941 if (!findProfiledCalleeThroughTailCalls(
2942 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2943 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2944 <<
" from " << FSToVIMap[CallerFunc]
2945 <<
" that actually called " << Callee
2946 << (FoundMultipleCalleeChains
2947 ?
" (found multiple possible chains)"
2950 if (FoundMultipleCalleeChains)
2951 FoundProfiledCalleeNonUniquelyCount++;
2958bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2961 return Callee1 == Callee2;
2964template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2965void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2971template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2972void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2973 raw_ostream &OS)
const {
2974 OS <<
"Node " <<
this <<
"\n";
2978 OS <<
" (recursive)";
2980 if (!MatchingCalls.empty()) {
2981 OS <<
"\tMatchingCalls:\n";
2982 for (
auto &MatchingCall : MatchingCalls) {
2984 MatchingCall.print(OS);
2988 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
2990 OS <<
"\tContextIds:";
2992 auto ContextIds = getContextIds();
2993 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2994 std::sort(SortedIds.begin(), SortedIds.end());
2995 for (
auto Id : SortedIds)
2998 OS <<
"\tCalleeEdges:\n";
2999 for (
auto &
Edge : CalleeEdges)
3000 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3002 OS <<
"\tCallerEdges:\n";
3003 for (
auto &
Edge : CallerEdges)
3004 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3006 if (!Clones.empty()) {
3009 for (
auto *
C : Clones) {
3013 OS <<
C <<
" NodeId: " <<
C->NodeId;
3016 }
else if (CloneOf) {
3017 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3021template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3022void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3028template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3029void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3030 raw_ostream &OS)
const {
3031 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3032 << (IsBackedge ?
" (BE)" :
"")
3034 OS <<
" ContextIds:";
3035 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3036 std::sort(SortedIds.begin(), SortedIds.end());
3037 for (
auto Id : SortedIds)
3041template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3042void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3046template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3047void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3048 raw_ostream &OS)
const {
3049 OS <<
"Callsite Context Graph:\n";
3050 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3052 if (
Node->isRemoved())
3059template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3060void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3061 raw_ostream &OS)
const {
3062 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3064 if (
Node->isRemoved())
3066 if (!
Node->IsAllocation)
3068 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3069 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3070 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3071 std::sort(SortedIds.begin(), SortedIds.end());
3072 for (
auto Id : SortedIds) {
3073 auto TypeI = ContextIdToAllocationType.find(Id);
3074 assert(TypeI != ContextIdToAllocationType.end());
3075 auto CSI = ContextIdToContextSizeInfos.find(Id);
3076 if (CSI != ContextIdToContextSizeInfos.end()) {
3077 for (
auto &
Info : CSI->second) {
3078 OS <<
"MemProf hinting: "
3080 <<
" full allocation context " <<
Info.FullStackId
3081 <<
" with total size " <<
Info.TotalSize <<
" is "
3083 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3085 <<
" due to cold byte percent";
3087 OS <<
" (context id " <<
Id <<
")";
3095template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3096void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3097 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3100 for (
auto &
Edge :
Node->CallerEdges)
3105template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3107 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3108 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3110 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3126 return G->NodeOwner.begin()->get();
3129 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3130 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3149template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3163 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3169 std::string LabelString =
3170 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3173 LabelString +=
"\n";
3174 if (
Node->hasCall()) {
3175 auto Func =
G->NodeToCallingFunc.find(
Node);
3176 assert(Func !=
G->NodeToCallingFunc.end());
3178 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3180 LabelString +=
"null call";
3181 if (
Node->Recursive)
3182 LabelString +=
" (recursive)";
3184 LabelString +=
" (external)";
3190 auto ContextIds =
Node->getContextIds();
3194 bool Highlight =
false;
3203 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3204 getContextIds(ContextIds) +
"\"")
3208 AttributeString +=
",fontsize=\"30\"";
3210 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3212 if (
Node->CloneOf) {
3213 AttributeString +=
",color=\"blue\"";
3214 AttributeString +=
",style=\"filled,bold,dashed\"";
3216 AttributeString +=
",style=\"filled\"";
3217 return AttributeString;
3222 auto &Edge = *(ChildIter.getCurrent());
3227 bool Highlight =
false;
3236 auto Color = getColor(Edge->AllocTypes, Highlight);
3237 std::string AttributeString =
3238 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3240 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3243 if (Edge->IsBackedge)
3244 AttributeString +=
",style=\"dotted\"";
3247 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3248 return AttributeString;
3254 if (
Node->isRemoved())
3267 std::string IdString =
"ContextIds:";
3268 if (ContextIds.
size() < 100) {
3269 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3270 std::sort(SortedIds.begin(), SortedIds.end());
3271 for (
auto Id : SortedIds)
3272 IdString += (
" " +
Twine(Id)).str();
3274 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3279 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3285 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3287 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3288 if (AllocTypes == (uint8_t)AllocationType::Cold)
3289 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3291 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3292 return Highlight ?
"magenta" :
"mediumorchid1";
3296 static std::string getNodeId(NodeRef Node) {
3297 std::stringstream SStream;
3298 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3299 std::string
Result = SStream.str();
3308template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3313template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3314void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3315 std::string Label)
const {
3320template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3321typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3322CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3323 const std::shared_ptr<ContextEdge> &
Edge,
3324 DenseSet<uint32_t> ContextIdsToMove) {
3326 assert(NodeToCallingFunc.count(Node));
3327 ContextNode *Clone =
3328 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3329 Node->addClone(Clone);
3330 Clone->MatchingCalls =
Node->MatchingCalls;
3331 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3336template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3337void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3338 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3339 ContextNode *NewCallee,
bool NewClone,
3340 DenseSet<uint32_t> ContextIdsToMove) {
3343 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3345 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3347 ContextNode *OldCallee =
Edge->Callee;
3351 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3355 if (ContextIdsToMove.
empty())
3356 ContextIdsToMove =
Edge->getContextIds();
3360 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3363 NewCallee->AllocTypes |=
Edge->AllocTypes;
3365 if (ExistingEdgeToNewCallee) {
3368 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3369 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3370 assert(
Edge->ContextIds == ContextIdsToMove);
3371 removeEdgeFromGraph(
Edge.get());
3374 Edge->Callee = NewCallee;
3375 NewCallee->CallerEdges.push_back(
Edge);
3377 OldCallee->eraseCallerEdge(
Edge.get());
3384 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3385 if (ExistingEdgeToNewCallee) {
3388 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3389 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3392 auto NewEdge = std::make_shared<ContextEdge>(
3393 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3394 Edge->Caller->CalleeEdges.push_back(NewEdge);
3395 NewCallee->CallerEdges.push_back(NewEdge);
3399 NewCallee->AllocTypes |= CallerEdgeAllocType;
3401 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3406 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3407 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3411 if (CalleeToUse == OldCallee) {
3415 if (EdgeIsRecursive) {
3419 CalleeToUse = NewCallee;
3423 DenseSet<uint32_t> EdgeContextIdsToMove =
3425 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3426 OldCalleeEdge->AllocTypes =
3427 computeAllocType(OldCalleeEdge->getContextIds());
3434 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3435 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3436 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3440 auto NewEdge = std::make_shared<ContextEdge>(
3441 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3442 EdgeContextIdsToMove);
3443 NewCallee->CalleeEdges.push_back(NewEdge);
3444 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3448 OldCallee->AllocTypes = OldCallee->computeAllocType();
3450 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3451 OldCallee->emptyContextIds());
3455 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3458 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3464template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3465void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3466 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3467 ContextNode *NewCaller) {
3468 auto *OldCallee =
Edge->Callee;
3469 auto *NewCallee = OldCallee;
3472 bool Recursive =
Edge->Caller ==
Edge->Callee;
3474 NewCallee = NewCaller;
3476 ContextNode *OldCaller =
Edge->Caller;
3477 OldCaller->eraseCalleeEdge(
Edge.get());
3481 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3483 if (ExistingEdgeToNewCaller) {
3486 ExistingEdgeToNewCaller->getContextIds().insert_range(
3487 Edge->getContextIds());
3488 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3489 Edge->ContextIds.clear();
3490 Edge->AllocTypes = (uint8_t)AllocationType::None;
3491 OldCallee->eraseCallerEdge(
Edge.get());
3494 Edge->Caller = NewCaller;
3495 NewCaller->CalleeEdges.push_back(
Edge);
3497 assert(NewCallee == NewCaller);
3500 Edge->Callee = NewCallee;
3501 NewCallee->CallerEdges.push_back(
Edge);
3502 OldCallee->eraseCallerEdge(
Edge.get());
3508 NewCaller->AllocTypes |=
Edge->AllocTypes;
3515 bool IsNewNode = NewCaller->CallerEdges.empty();
3524 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3525 auto OldCallerCaller = OldCallerEdge->Caller;
3529 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3530 if (OldCaller == OldCallerCaller) {
3531 OldCallerCaller = NewCaller;
3537 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3538 OldCallerEdge->AllocTypes =
3539 computeAllocType(OldCallerEdge->getContextIds());
3544 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3548 if (ExistingCallerEdge) {
3549 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3550 ExistingCallerEdge->AllocTypes |=
3551 computeAllocType(EdgeContextIdsToMove);
3554 auto NewEdge = std::make_shared<ContextEdge>(
3555 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3556 EdgeContextIdsToMove);
3557 NewCaller->CallerEdges.push_back(NewEdge);
3558 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3563 OldCaller->AllocTypes = OldCaller->computeAllocType();
3565 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3566 OldCaller->emptyContextIds());
3570 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3573 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3579template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3580void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3581 recursivelyRemoveNoneTypeCalleeEdges(
3582 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3587 removeNoneTypeCalleeEdges(Node);
3589 for (
auto *Clone :
Node->Clones)
3590 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3594 auto CallerEdges =
Node->CallerEdges;
3595 for (
auto &
Edge : CallerEdges) {
3597 if (
Edge->isRemoved()) {
3601 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3606template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3607void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3612 DenseSet<const ContextNode *> Visited;
3613 DenseSet<const ContextNode *> CurrentStack;
3614 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3616 if (
Node->isRemoved())
3619 if (!
Node->CallerEdges.empty())
3621 markBackedges(Node, Visited, CurrentStack);
3627template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3628void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3629 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3630 DenseSet<const ContextNode *> &CurrentStack) {
3631 auto I = Visited.
insert(Node);
3635 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3636 auto *
Callee = CalleeEdge->Callee;
3637 if (Visited.
count(Callee)) {
3640 if (CurrentStack.
count(Callee))
3641 CalleeEdge->IsBackedge =
true;
3644 CurrentStack.
insert(Callee);
3645 markBackedges(Callee, Visited, CurrentStack);
3646 CurrentStack.
erase(Callee);
3650template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3651void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3652 DenseSet<const ContextNode *> Visited;
3653 for (
auto &Entry : AllocationCallToContextNodeMap) {
3655 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3658 for (
auto &Entry : AllocationCallToContextNodeMap)
3659 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3672template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3673void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3674 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3675 const DenseSet<uint32_t> &AllocContextIds) {
3685 if (!
Node->hasCall())
3704 auto CallerEdges =
Node->CallerEdges;
3705 for (
auto &
Edge : CallerEdges) {
3707 if (
Edge->isRemoved()) {
3713 if (
Edge->IsBackedge) {
3720 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3721 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3744 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3748 [&](
const std::shared_ptr<ContextEdge> &
A,
3749 const std::shared_ptr<ContextEdge> &
B) {
3752 if (A->ContextIds.empty())
3758 if (B->ContextIds.empty())
3761 if (A->AllocTypes == B->AllocTypes)
3764 return *A->ContextIds.begin() < *B->ContextIds.begin();
3765 return AllocTypeCloningPriority[A->AllocTypes] <
3766 AllocTypeCloningPriority[B->AllocTypes];
3769 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3771 DenseSet<uint32_t> RecursiveContextIds;
3776 DenseSet<uint32_t> AllCallerContextIds;
3777 for (
auto &CE :
Node->CallerEdges) {
3780 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3781 for (
auto Id :
CE->getContextIds())
3782 if (!AllCallerContextIds.
insert(Id).second)
3783 RecursiveContextIds.
insert(Id);
3793 auto CallerEdges =
Node->CallerEdges;
3794 for (
auto &CallerEdge : CallerEdges) {
3796 if (CallerEdge->isRemoved()) {
3800 assert(CallerEdge->Callee == Node);
3809 if (!CallerEdge->Caller->hasCall())
3814 auto CallerEdgeContextsForAlloc =
3816 if (!RecursiveContextIds.
empty())
3817 CallerEdgeContextsForAlloc =
3819 if (CallerEdgeContextsForAlloc.empty())
3822 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3826 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3827 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3828 for (
auto &CalleeEdge :
Node->CalleeEdges)
3829 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3830 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3846 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3847 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3848 if (!CallerEdge->IsBackedge &&
3849 allocTypeToUse(CallerAllocTypeForAlloc) ==
3850 allocTypeToUse(
Node->AllocTypes) &&
3851 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3852 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3856 if (CallerEdge->IsBackedge) {
3860 DeferredBackedges++;
3873 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3874 !Visited.
count(CallerEdge->Caller)) {
3875 const auto OrigIdCount = CallerEdge->getContextIds().size();
3878 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3879 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3883 bool UpdatedEdge =
false;
3884 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3885 for (
auto E :
Node->CallerEdges) {
3887 if (
E->Caller->CloneOf != CallerEdge->Caller)
3891 auto CallerEdgeContextsForAllocNew =
3893 if (CallerEdgeContextsForAllocNew.empty())
3903 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3913 if (CallerEdge->isRemoved())
3923 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3924 if (CallerEdgeContextsForAlloc.empty())
3929 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3930 CalleeEdgeAllocTypesForCallerEdge.clear();
3931 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3932 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3933 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3939 ContextNode *Clone =
nullptr;
3940 for (
auto *CurClone :
Node->Clones) {
3941 if (allocTypeToUse(CurClone->AllocTypes) !=
3942 allocTypeToUse(CallerAllocTypeForAlloc))
3949 assert(!BothSingleAlloc ||
3950 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3956 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3957 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3965 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3966 CallerEdgeContextsForAlloc);
3968 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3971 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3978 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3984void ModuleCallsiteContextGraph::updateAllocationCall(
3989 "memprof", AllocTypeString);
3992 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
3993 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3995 <<
" marked with memprof allocation attribute "
3996 <<
ore::NV(
"Attribute", AllocTypeString));
3999void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4003 assert(AI->Versions.size() >
Call.cloneNo());
4008ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4010 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4011 return AllocationType::None;
4012 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4013 ? AllocationType::Cold
4014 : AllocationType::NotCold;
4018IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4020 assert(AI->Versions.size() >
Call.cloneNo());
4024void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4025 FuncInfo CalleeFunc) {
4026 auto *CurF = getCalleeFunc(CallerCall.call());
4027 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4034 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4036 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4038 MismatchedCloneAssignments++;
4041 if (NewCalleeCloneNo > 0)
4042 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4043 OREGetter(CallerCall.call()->getFunction())
4044 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4045 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4046 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4047 <<
" assigned to call function clone "
4048 <<
ore::NV(
"Callee", CalleeFunc.func()));
4051void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4052 FuncInfo CalleeFunc) {
4055 "Caller cannot be an allocation which should not have profiled calls");
4056 assert(CI->Clones.size() > CallerCall.cloneNo());
4057 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4058 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4063 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4065 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4067 MismatchedCloneAssignments++;
4069 CurCalleeCloneNo = NewCalleeCloneNo;
4081 SP->replaceLinkageName(MDName);
4085 TempDISubprogram NewDecl = Decl->
clone();
4086 NewDecl->replaceLinkageName(MDName);
4090CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4092ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4093 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4094 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4099 assert(!
Func.func()->getParent()->getFunction(Name));
4100 NewFunc->setName(Name);
4102 for (
auto &Inst : CallsWithMetadataInFunc) {
4104 assert(Inst.cloneNo() == 0);
4107 OREGetter(
Func.func())
4108 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4109 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4110 return {NewFunc, CloneNo};
4113CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4114 IndexCall>::FuncInfo
4115IndexCallsiteContextGraph::cloneFunctionForCallsite(
4116 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4117 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4131 for (
auto &Inst : CallsWithMetadataInFunc) {
4133 assert(Inst.cloneNo() == 0);
4135 assert(AI->Versions.size() == CloneNo);
4138 AI->Versions.push_back(0);
4141 assert(CI && CI->Clones.size() == CloneNo);
4144 CI->Clones.push_back(0);
4146 CallMap[Inst] = {Inst.call(), CloneNo};
4148 return {
Func.func(), CloneNo};
4165template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4166void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4172 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4173 for (
auto &Entry : AllocationCallToContextNodeMap) {
4175 for (
auto Id :
Node->getContextIds())
4176 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4177 for (
auto *Clone :
Node->Clones) {
4178 for (
auto Id : Clone->getContextIds())
4179 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4186 DenseSet<const ContextNode *> Visited;
4187 for (
auto &Entry : AllocationCallToContextNodeMap) {
4190 mergeClones(Node, Visited, ContextIdToAllocationNode);
4196 auto Clones =
Node->Clones;
4197 for (
auto *Clone : Clones)
4198 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4202 dbgs() <<
"CCG after merging:\n";
4206 exportToDot(
"aftermerge");
4214template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4215void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4216 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4217 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4227 bool FoundUnvisited =
true;
4229 while (FoundUnvisited) {
4231 FoundUnvisited =
false;
4234 auto CallerEdges =
Node->CallerEdges;
4235 for (
auto CallerEdge : CallerEdges) {
4237 if (CallerEdge->Callee != Node)
4242 FoundUnvisited =
true;
4243 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4247 TotalMergeInvokes++;
4248 TotalMergeIters += Iters;
4249 if (Iters > MaxMergeIters)
4250 MaxMergeIters = Iters;
4253 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4257void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4258 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4259 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4261 if (
Node->emptyContextIds())
4266 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4267 OrigNodeToCloneEdges;
4268 for (
const auto &
E :
Node->CalleeEdges) {
4273 OrigNodeToCloneEdges[
Base].push_back(
E);
4279 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4280 const std::shared_ptr<ContextEdge> &
B) {
4281 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4282 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4283 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4285 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4289 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4294 for (
auto Entry : OrigNodeToCloneEdges) {
4297 auto &CalleeEdges =
Entry.second;
4298 auto NumCalleeClones = CalleeEdges.size();
4300 if (NumCalleeClones == 1)
4311 DenseSet<ContextNode *> OtherCallersToShareMerge;
4312 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4313 OtherCallersToShareMerge);
4318 ContextNode *MergeNode =
nullptr;
4319 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4320 for (
auto CalleeEdge : CalleeEdges) {
4321 auto *OrigCallee = CalleeEdge->Callee;
4327 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4328 MergeNode = OrigCallee;
4329 NonNewMergedNodes++;
4336 if (!OtherCallersToShareMerge.
empty()) {
4337 bool MoveAllCallerEdges =
true;
4338 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4339 if (CalleeCallerE == CalleeEdge)
4341 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4342 MoveAllCallerEdges =
false;
4348 if (MoveAllCallerEdges) {
4349 MergeNode = OrigCallee;
4350 NonNewMergedNodes++;
4357 assert(MergeNode != OrigCallee);
4358 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4361 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4366 if (!OtherCallersToShareMerge.
empty()) {
4370 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4371 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4372 if (CalleeCallerE == CalleeEdge)
4374 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4376 CallerToMoveCount[CalleeCallerE->Caller]++;
4377 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4381 removeNoneTypeCalleeEdges(OrigCallee);
4382 removeNoneTypeCalleeEdges(MergeNode);
4400template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4401void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4402 findOtherCallersToShareMerge(
4404 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4405 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4406 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4407 auto NumCalleeClones = CalleeEdges.size();
4410 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4413 unsigned PossibleOtherCallerNodes = 0;
4417 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4423 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4424 for (
auto CalleeEdge : CalleeEdges) {
4425 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4428 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4429 if (CalleeCallerEdges->Caller == Node) {
4430 assert(CalleeCallerEdges == CalleeEdge);
4433 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4436 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4438 PossibleOtherCallerNodes++;
4442 for (
auto Id : CalleeEdge->getContextIds()) {
4443 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4447 MissingAllocForContextId++;
4450 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4457 for (
auto CalleeEdge : CalleeEdges) {
4458 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4460 if (!PossibleOtherCallerNodes)
4462 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4464 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4466 if (CalleeCallerE == CalleeEdge)
4470 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4475 for (
auto Id : CalleeCallerE->getContextIds()) {
4476 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4481 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4482 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4483 PossibleOtherCallerNodes--;
4490 if (!PossibleOtherCallerNodes)
4495 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4496 if (
Count != NumCalleeClones)
4498 OtherCallersToShareMerge.
insert(OtherCaller);
4543template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4544bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4551 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4555 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4556 const FuncInfo &CalleeFunc) {
4558 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4562 struct FuncCloneInfo {
4567 DenseMap<CallInfo, CallInfo> CallMap;
4595 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4596 UnassignedCallClones;
4600 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4601 FuncInfo OrigFunc(Func);
4606 std::vector<FuncCloneInfo> FuncCloneInfos;
4607 for (
auto &
Call : CallsWithMetadata) {
4608 ContextNode *
Node = getNodeForInst(
Call);
4612 if (!Node ||
Node->Clones.empty())
4615 "Not having a call should have prevented cloning");
4619 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4623 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4625 ContextNode *CallsiteClone,
4628 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4630 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4631 DenseMap<CallInfo, CallInfo> &CallMap =
4632 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4633 CallInfo CallClone(
Call);
4634 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4635 CallClone = It->second;
4636 CallsiteClone->setCall(CallClone);
4638 for (
auto &MatchingCall :
Node->MatchingCalls) {
4639 CallInfo CallClone(MatchingCall);
4640 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4641 CallClone = It->second;
4643 MatchingCall = CallClone;
4651 auto MoveEdgeToNewCalleeCloneAndSetUp =
4652 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4653 ContextNode *OrigCallee =
Edge->Callee;
4654 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4655 removeNoneTypeCalleeEdges(NewClone);
4656 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4660 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4661 RecordCalleeFuncOfCallsite(
4662 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4669 std::deque<ContextNode *> ClonesWorklist;
4671 if (!
Node->emptyContextIds())
4672 ClonesWorklist.push_back(Node);
4678 unsigned NodeCloneCount = 0;
4679 while (!ClonesWorklist.empty()) {
4680 ContextNode *Clone = ClonesWorklist.front();
4681 ClonesWorklist.pop_front();
4690 if (FuncCloneInfos.size() < NodeCloneCount) {
4692 if (NodeCloneCount == 1) {
4697 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4698 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4702 FuncCloneInfos.push_back(
4703 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4704 AssignCallsiteCloneToFuncClone(
4705 OrigFunc,
Call, Clone,
4706 AllocationCallToContextNodeMap.count(
Call));
4707 for (
auto &CE : Clone->CallerEdges) {
4709 if (!
CE->Caller->hasCall())
4711 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4721 FuncInfo PreviousAssignedFuncClone;
4723 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4724 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4726 bool CallerAssignedToCloneOfFunc =
false;
4727 if (EI != Clone->CallerEdges.end()) {
4728 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4729 PreviousAssignedFuncClone =
4730 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4731 CallerAssignedToCloneOfFunc =
true;
4736 DenseMap<CallInfo, CallInfo> NewCallMap;
4737 unsigned CloneNo = FuncCloneInfos.size();
4738 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4739 "should already exist in the map");
4740 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4741 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4742 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4743 FunctionClonesAnalysis++;
4749 if (!CallerAssignedToCloneOfFunc) {
4750 AssignCallsiteCloneToFuncClone(
4751 NewFuncClone,
Call, Clone,
4752 AllocationCallToContextNodeMap.count(
Call));
4753 for (
auto &CE : Clone->CallerEdges) {
4755 if (!
CE->Caller->hasCall())
4757 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4769 auto CallerEdges = Clone->CallerEdges;
4770 for (
auto CE : CallerEdges) {
4772 if (
CE->isRemoved()) {
4778 if (!
CE->Caller->hasCall())
4781 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4785 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4786 PreviousAssignedFuncClone)
4789 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4802 auto CalleeEdges =
CE->Caller->CalleeEdges;
4803 for (
auto CalleeEdge : CalleeEdges) {
4806 if (CalleeEdge->isRemoved()) {
4811 ContextNode *
Callee = CalleeEdge->Callee;
4815 if (Callee == Clone || !
Callee->hasCall())
4820 if (Callee == CalleeEdge->Caller)
4822 ContextNode *NewClone =
4823 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
4826 removeNoneTypeCalleeEdges(Callee);
4834 CallInfo OrigCall(
Callee->getOrigNode()->Call);
4835 OrigCall.setCloneNo(0);
4836 DenseMap<CallInfo, CallInfo> &CallMap =
4837 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4839 CallInfo NewCall(CallMap[OrigCall]);
4841 NewClone->setCall(NewCall);
4843 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4844 CallInfo OrigMatchingCall(MatchingCall);
4845 OrigMatchingCall.setCloneNo(0);
4847 CallInfo NewCall(CallMap[OrigMatchingCall]);
4850 MatchingCall = NewCall;
4859 auto FindFirstAvailFuncClone = [&]() {
4864 for (
auto &CF : FuncCloneInfos) {
4865 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4866 return CF.FuncClone;
4869 "Expected an available func clone for this callsite clone");
4886 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4887 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4891 auto CloneCallerEdges = Clone->CallerEdges;
4892 for (
auto &
Edge : CloneCallerEdges) {
4896 if (
Edge->isRemoved())
4899 if (!
Edge->Caller->hasCall())
4903 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4904 FuncInfo FuncCloneCalledByCaller =
4905 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4915 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4916 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4924 (FuncCloneAssignedToCurCallsiteClone &&
4925 FuncCloneAssignedToCurCallsiteClone !=
4926 FuncCloneCalledByCaller)) {
4941 if (FuncCloneToNewCallsiteCloneMap.count(
4942 FuncCloneCalledByCaller)) {
4943 ContextNode *NewClone =
4944 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4945 moveEdgeToExistingCalleeClone(
Edge, NewClone);
4947 removeNoneTypeCalleeEdges(NewClone);
4950 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
4951 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4954 ClonesWorklist.push_back(NewClone);
4958 removeNoneTypeCalleeEdges(Clone);
4966 if (!FuncCloneAssignedToCurCallsiteClone) {
4967 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4969 AssignCallsiteCloneToFuncClone(
4970 FuncCloneCalledByCaller,
Call, Clone,
4971 AllocationCallToContextNodeMap.count(
Call));
4975 assert(FuncCloneAssignedToCurCallsiteClone ==
4976 FuncCloneCalledByCaller);
4985 if (!FuncCloneAssignedToCurCallsiteClone) {
4986 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4987 assert(FuncCloneAssignedToCurCallsiteClone);
4989 AssignCallsiteCloneToFuncClone(
4990 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4991 AllocationCallToContextNodeMap.count(
Call));
4993 assert(FuncCloneToCurNodeCloneMap
4994 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4996 RecordCalleeFuncOfCallsite(
Edge->Caller,
4997 FuncCloneAssignedToCurCallsiteClone);
5017 if (!FuncCloneAssignedToCurCallsiteClone) {
5018 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5019 assert(FuncCloneAssignedToCurCallsiteClone &&
5020 "No available func clone for this callsite clone");
5021 AssignCallsiteCloneToFuncClone(
5022 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5023 AllocationCallToContextNodeMap.contains(
Call));
5028 for (
const auto &PE :
Node->CalleeEdges)
5030 for (
const auto &CE :
Node->CallerEdges)
5032 for (
auto *Clone :
Node->Clones) {
5034 for (
const auto &PE : Clone->CalleeEdges)
5036 for (
const auto &CE : Clone->CallerEdges)
5042 if (FuncCloneInfos.size() < 2)
5048 for (
auto &
Call : CallsWithMetadata) {
5049 ContextNode *
Node = getNodeForInst(
Call);
5050 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5056 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5060 DenseSet<unsigned> NodeCallClones;
5061 for (
auto *
C :
Node->Clones)
5062 NodeCallClones.
insert(
C->Call.cloneNo());
5065 for (
auto &FC : FuncCloneInfos) {
5070 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5075 auto &CallVector = UnassignedCallClones[
Node][
I];
5076 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5077 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5078 CallInfo CallClone = It->second;
5079 CallVector.push_back(CallClone);
5083 assert(
false &&
"Expected to find call in CallMap");
5086 for (
auto &MatchingCall :
Node->MatchingCalls) {
5087 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5088 CallInfo CallClone = It->second;
5089 CallVector.push_back(CallClone);
5093 assert(
false &&
"Expected to find call in CallMap");
5101 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5103 auto UpdateCalls = [&](ContextNode *
Node,
5104 DenseSet<const ContextNode *> &Visited,
5105 auto &&UpdateCalls) {
5106 auto Inserted = Visited.insert(Node);
5110 for (
auto *Clone :
Node->Clones)
5111 UpdateCalls(Clone, Visited, UpdateCalls);
5113 for (
auto &
Edge :
Node->CallerEdges)
5114 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5118 if (!
Node->hasCall() ||
Node->emptyContextIds())
5121 if (
Node->IsAllocation) {
5122 auto AT = allocTypeToUse(
Node->AllocTypes);
5128 !ContextIdToContextSizeInfos.empty()) {
5129 uint64_t TotalCold = 0;
5131 for (
auto Id :
Node->getContextIds()) {
5132 auto TypeI = ContextIdToAllocationType.find(Id);
5133 assert(TypeI != ContextIdToAllocationType.end());
5134 auto CSI = ContextIdToContextSizeInfos.find(Id);
5135 if (CSI != ContextIdToContextSizeInfos.end()) {
5136 for (
auto &
Info : CSI->second) {
5138 if (TypeI->second == AllocationType::Cold)
5139 TotalCold +=
Info.TotalSize;
5144 AT = AllocationType::Cold;
5146 updateAllocationCall(
Node->Call, AT);
5151 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5154 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5155 updateCall(
Node->Call, CalleeFunc);
5157 for (
auto &
Call :
Node->MatchingCalls)
5158 updateCall(
Call, CalleeFunc);
5162 if (!UnassignedCallClones.
contains(Node))
5164 DenseSet<unsigned> NodeCallClones;
5165 for (
auto *
C :
Node->Clones)
5166 NodeCallClones.
insert(
C->Call.cloneNo());
5168 auto &ClonedCalls = UnassignedCallClones[
Node];
5169 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5173 if (NodeCallClones.
contains(CloneNo))
5176 for (
auto &
Call : CallVector)
5177 updateCall(
Call, CalleeFunc);
5186 DenseSet<const ContextNode *> Visited;
5187 for (
auto &Entry : AllocationCallToContextNodeMap)
5188 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5199 for (
auto &SN : FS->callsites()) {
5204 SN.Clones.size() >
I &&
5205 "Callsite summary has fewer entries than other summaries in function");
5206 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5213 for (
auto &AN : FS->allocs()) {
5217 assert(AN.Versions.size() >
I &&
5218 "Alloc summary has fewer entries than other summaries in function");
5219 if (AN.Versions.size() <=
I ||
5236 NewGV->takeName(DeclGV);
5243 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5244 if (!FuncToAliasMap.count(&
F))
5246 for (
auto *
A : FuncToAliasMap[&
F]) {
5248 auto *PrevA = M.getNamedAlias(AliasName);
5250 A->getType()->getPointerAddressSpace(),
5251 A->getLinkage(), AliasName, NewF);
5252 NewA->copyAttributesFrom(
A);
5254 TakeDeclNameAndReplace(PrevA, NewA);
5263 FunctionsClonedThinBackend++;
5280 for (
unsigned I = 1;
I < NumClones;
I++) {
5281 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5288 FunctionCloneDuplicatesThinBackend++;
5289 auto *Func = HashToFunc[Hash];
5290 if (Func->hasAvailableExternallyLinkage()) {
5296 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5298 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5301 auto *PrevF = M.getFunction(Name);
5304 TakeDeclNameAndReplace(PrevF, Alias);
5306 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5309 CloneFuncAliases(Func,
I);
5313 HashToFunc[Hash] = NewF;
5314 FunctionClonesThinBackend++;
5317 for (
auto &BB : *NewF) {
5318 for (
auto &Inst : BB) {
5319 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5320 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5325 TakeDeclNameAndReplace(PrevF, NewF);
5327 NewF->setName(Name);
5330 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5333 CloneFuncAliases(NewF,
I);
5342 const Function *CallingFunc =
nullptr) {
5361 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5367 if (!SrcFileMD &&
F.isDeclaration()) {
5371 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5376 assert(SrcFileMD || OrigName ==
F.getName());
5378 StringRef SrcFile = M.getSourceFileName();
5390 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5391 F.getName().contains(
'.')) {
5392 OrigName =
F.getName().rsplit(
'.').first;
5401 assert(TheFnVI ||
F.isDeclaration());
5405bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5407 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5408 Symtab = std::make_unique<InstrProfSymtab>();
5419 if (
Error E = Symtab->create(M,
true,
false)) {
5420 std::string SymtabFailure =
toString(std::move(
E));
5421 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5434 auto MIBIter = AllocNode.
MIBs.begin();
5435 for (
auto &MDOp : MemProfMD->
operands()) {
5437 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5442 auto ContextIterBegin =
5446 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5448 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5453 if (LastStackContextId == *ContextIter)
5455 LastStackContextId = *ContextIter;
5456 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5466bool MemProfContextDisambiguation::applyImport(
Module &M) {
5473 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5475 for (
auto &
A :
M.aliases()) {
5476 auto *Aliasee =
A.getAliaseeObject();
5478 FuncToAliasMap[
F].insert(&
A);
5481 if (!initializeIndirectCallPromotionInfo(M))
5488 OptimizationRemarkEmitter ORE(&
F);
5491 bool ClonesCreated =
false;
5492 unsigned NumClonesCreated = 0;
5493 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5503 if (ClonesCreated) {
5504 assert(NumClonesCreated == NumClones);
5511 ClonesCreated =
true;
5512 NumClonesCreated = NumClones;
5515 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5516 Function *CalledFunction, FunctionSummary *
FS) {
5518 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5530 if (CalledFunction != CB->getCalledOperand() &&
5531 (!GA || CalledFunction != GA->getAliaseeObject())) {
5532 SkippedCallsCloning++;
5538 auto CalleeOrigName = CalledFunction->getName();
5539 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5542 if (J > 0 && VMaps[J - 1]->
empty())
5546 if (!StackNode.
Clones[J])
5548 auto NewF =
M.getOrInsertFunction(
5550 CalledFunction->getFunctionType());
5564 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5565 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5567 <<
" assigned to call function clone "
5568 <<
ore::NV(
"Callee", NewF.getCallee()));
5582 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5586 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5588 "enable-import-metadata is needed to emit thinlto_src_module");
5589 StringRef SrcModule =
5592 if (GVS->modulePath() == SrcModule) {
5593 GVSummary = GVS.get();
5597 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5607 if (
FS->allocs().empty() &&
FS->callsites().empty())
5610 auto SI =
FS->callsites().begin();
5611 auto AI =
FS->allocs().begin();
5616 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5619 for (
auto CallsiteIt =
FS->callsites().rbegin();
5620 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5621 auto &Callsite = *CallsiteIt;
5625 if (!Callsite.StackIdIndices.empty())
5627 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5636 for (
auto &BB :
F) {
5637 for (
auto &
I : BB) {
5643 auto *CalledValue = CB->getCalledOperand();
5644 auto *CalledFunction = CB->getCalledFunction();
5645 if (CalledValue && !CalledFunction) {
5646 CalledValue = CalledValue->stripPointerCasts();
5653 assert(!CalledFunction &&
5654 "Expected null called function in callsite for alias");
5658 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5659 I.getMetadata(LLVMContext::MD_callsite));
5660 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5666 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5667 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5668 ? AllocTypeColdThinBackend++
5669 : AllocTypeNotColdThinBackend++;
5670 OrigAllocsThinBackend++;
5671 AllocVersionsThinBackend++;
5672 if (!MaxAllocVersionsThinBackend)
5673 MaxAllocVersionsThinBackend = 1;
5680 auto &AllocNode = *(AI++);
5688 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5690 OrigAllocsThinBackend++;
5691 AllocVersionsThinBackend += AllocNode.Versions.size();
5692 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5693 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5703 if (AllocNode.Versions.size() == 1 &&
5706 AllocationType::NotCold ||
5708 AllocationType::None);
5709 UnclonableAllocsThinBackend++;
5715 return Type == ((uint8_t)AllocationType::NotCold |
5716 (uint8_t)AllocationType::Cold);
5720 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5723 if (J > 0 && VMaps[J - 1]->
empty())
5726 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5729 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5730 : AllocTypeNotColdThinBackend++;
5745 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5746 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5748 <<
" marked with memprof allocation attribute "
5749 <<
ore::NV(
"Attribute", AllocTypeString));
5751 }
else if (!CallsiteContext.empty()) {
5752 if (!CalledFunction) {
5756 assert(!CI || !CI->isInlineAsm());
5766 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5772 CloneFuncIfNeeded(NumClones, FS);
5777 assert(SI !=
FS->callsites().end());
5778 auto &StackNode = *(
SI++);
5784 for (
auto StackId : CallsiteContext) {
5786 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
5792 CloneCallsite(StackNode, CB, CalledFunction, FS);
5794 }
else if (CB->isTailCall() && CalledFunction) {
5797 ValueInfo CalleeVI =
5799 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5800 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5801 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5802 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
5809 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5819 for (
auto &BB :
F) {
5820 for (
auto &
I : BB) {
5823 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5824 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5832unsigned MemProfContextDisambiguation::recordICPInfo(
5837 uint32_t NumCandidates;
5838 uint64_t TotalCount;
5839 auto CandidateProfileData =
5840 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5842 if (CandidateProfileData.empty())
5848 bool ICPNeeded =
false;
5849 unsigned NumClones = 0;
5850 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5851 for (
const auto &Candidate : CandidateProfileData) {
5853 auto CalleeValueInfo =
5855 ImportSummary->getValueInfo(Candidate.Value);
5858 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5860 auto &StackNode = *(
SI++);
5865 [](
unsigned CloneNo) { return CloneNo != 0; });
5875 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5876 TotalCount, CallsiteInfoStartIndex});
5880void MemProfContextDisambiguation::performICP(
5882 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5884 OptimizationRemarkEmitter &ORE) {
5891 for (
auto &
Info : ICallAnalysisInfo) {
5894 auto TotalCount =
Info.TotalCount;
5895 unsigned NumPromoted = 0;
5896 unsigned NumClones = 0;
5898 for (
auto &Candidate :
Info.CandidateProfileData) {
5909 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5910 if (TargetFunction ==
nullptr ||
5918 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
5919 <<
"Memprof cannot promote indirect call: target with md5sum "
5920 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5929 const char *Reason =
nullptr;
5932 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
5933 <<
"Memprof cannot promote indirect call to "
5934 <<
ore::NV(
"TargetFunction", TargetFunction)
5935 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5946 CallBase *CBClone = CB;
5947 for (
unsigned J = 0; J < NumClones; J++) {
5950 if (J > 0 && VMaps[J - 1]->
empty())
5960 TotalCount, isSamplePGO, &ORE);
5961 auto *TargetToUse = TargetFunction;
5964 if (StackNode.
Clones[J]) {
5983 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5985 <<
" promoted and assigned to call function clone "
5986 <<
ore::NV(
"Callee", TargetToUse));
5990 TotalCount -= Candidate.Count;
5995 CallBase *CBClone = CB;
5996 for (
unsigned J = 0; J < NumClones; J++) {
5999 if (J > 0 && VMaps[J - 1]->
empty())
6005 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6008 if (TotalCount != 0)
6010 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6011 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6016template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6017bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6019 dbgs() <<
"CCG before cloning:\n";
6023 exportToDot(
"postbuild");
6036 dbgs() <<
"CCG after cloning:\n";
6040 exportToDot(
"cloned");
6042 bool Changed = assignFunctions();
6045 dbgs() <<
"CCG after assigning function clones:\n";
6049 exportToDot(
"clonefuncassign");
6052 printTotalSizes(
errs());
6057bool MemProfContextDisambiguation::processModule(
6059 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6064 return applyImport(M);
6077 ModuleCallsiteContextGraph CCG(M, OREGetter);
6078 return CCG.process();
6083 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6088 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6092 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6096 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6097 "-memprof-dot-context-id");
6098 if (ImportSummary) {
6108 auto ReadSummaryFile =
6110 if (!ReadSummaryFile) {
6117 if (!ImportSummaryForTestingOrErr) {
6123 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6124 ImportSummary = ImportSummaryForTesting.get();
6133 if (!processModule(M, OREGetter))
6150 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...