50#include <unordered_map>
55#define DEBUG_TYPE "memprof-context-disambiguation"
58 "Number of function clones created during whole program analysis");
60 "Number of function clones created during ThinLTO backend");
62 "Number of functions that had clones created during ThinLTO backend");
63STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
64 "cloned) during whole program analysis");
65STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
66 "during whole program analysis");
68 "Number of not cold static allocations (possibly cloned) during "
70STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
71 "(possibly cloned) during ThinLTO backend");
73 "Number of original (not cloned) allocations with memprof profiles "
74 "during ThinLTO backend");
76 AllocVersionsThinBackend,
77 "Number of allocation versions (including clones) during ThinLTO backend");
79 "Maximum number of allocation versions created for an original "
80 "allocation during ThinLTO backend");
82 "Number of unclonable ambigous allocations during ThinLTO backend");
84 "Number of edges removed due to mismatched callees (profiled vs IR)");
86 "Number of profiled callees found via tail calls");
88 "Aggregate depth of profiled callees found via tail calls");
90 "Maximum depth of profiled callees found via tail calls");
92 "Number of profiled callees found via multiple tail call chains");
93STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
94STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
95STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
97 "Number of missing alloc nodes for context ids");
99 "Number of calls skipped during cloning due to unexpected operand");
101 "Number of callsites assigned to call multiple non-matching clones");
102STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
103STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
104STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
109 cl::desc(
"Specify the path prefix of the MemProf dot files."));
113 cl::desc(
"Export graph to dot files."));
118 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
128 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
133 "Export only nodes with contexts feeding given "
134 "-memprof-dot-alloc-id"),
136 "Export only nodes with given -memprof-dot-context-id")));
140 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
141 "or to highlight if -memprof-dot-scope=all"));
145 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
146 "highlight otherwise"));
150 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
154 cl::desc(
"Perform verification checks on CallingContextGraph."));
158 cl::desc(
"Perform frequent verification checks on nodes."));
161 "memprof-import-summary",
162 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
168 cl::desc(
"Max depth to recursively search for missing "
169 "frames through tail calls."));
174 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
178 cl::desc(
"Allow cloning of contexts through recursive cycles"));
185 cl::desc(
"Merge clones before assigning functions"));
194 cl::desc(
"Allow cloning of contexts having recursive cycles"));
200 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
211 cl::desc(
"Linking with hot/cold operator new interfaces"));
216 "Require target function definition when promoting indirect calls"));
238template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
239class CallsiteContextGraph {
241 CallsiteContextGraph() =
default;
242 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
243 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
250 void identifyClones();
257 bool assignFunctions();
264 const CallsiteContextGraph &CCG) {
270 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
272 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
274 void exportToDot(std::string Label)
const;
277 struct FuncInfo final
278 :
public std::pair<FuncTy *, unsigned > {
279 using Base = std::pair<FuncTy *, unsigned>;
281 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
282 explicit operator bool()
const {
return this->first !=
nullptr; }
283 FuncTy *
func()
const {
return this->first; }
284 unsigned cloneNo()
const {
return this->second; }
288 struct CallInfo final :
public std::pair<CallTy, unsigned > {
289 using Base = std::pair<CallTy, unsigned>;
291 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
293 explicit operator bool()
const {
return (
bool)this->first; }
294 CallTy call()
const {
return this->first; }
295 unsigned cloneNo()
const {
return this->second; }
296 void setCloneNo(
unsigned N) { this->second =
N; }
298 if (!
operator bool()) {
304 OS <<
"\t(clone " << cloneNo() <<
")";
330 bool Recursive =
false;
357 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
361 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
365 bool useCallerEdgesForContextInfo()
const {
370 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
388 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
389 Count += Edge->getContextIds().size();
393 CalleeEdges, useCallerEdgesForContextInfo()
395 : std::vector<std::shared_ptr<ContextEdge>>());
396 for (
const auto &Edge : Edges)
403 uint8_t computeAllocType()
const {
408 CalleeEdges, useCallerEdgesForContextInfo()
410 : std::vector<std::shared_ptr<ContextEdge>>());
411 for (
const auto &Edge : Edges) {
422 bool emptyContextIds()
const {
424 CalleeEdges, useCallerEdgesForContextInfo()
426 : std::vector<std::shared_ptr<ContextEdge>>());
427 for (
const auto &Edge : Edges) {
428 if (!Edge->getContextIds().empty())
435 std::vector<ContextNode *> Clones;
438 ContextNode *CloneOf =
nullptr;
440 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
442 ContextNode(
bool IsAllocation, CallInfo
C)
443 : IsAllocation(IsAllocation),
Call(
C) {}
445 void addClone(ContextNode *Clone) {
447 CloneOf->Clones.push_back(Clone);
448 Clone->CloneOf = CloneOf;
450 Clones.push_back(Clone);
452 Clone->CloneOf =
this;
456 ContextNode *getOrigNode() {
463 unsigned int ContextId);
465 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
466 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
467 void eraseCalleeEdge(
const ContextEdge *Edge);
468 void eraseCallerEdge(
const ContextEdge *Edge);
470 void setCall(CallInfo
C) {
Call =
C; }
472 bool hasCall()
const {
return (
bool)
Call.call(); }
478 bool isRemoved()
const {
514 bool IsBackedge =
false;
521 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
522 ContextIds(std::move(ContextIds)) {}
528 inline void clear() {
538 inline bool isRemoved()
const {
539 if (Callee || Caller)
560 void removeNoneTypeCalleeEdges(ContextNode *
Node);
561 void removeNoneTypeCallerEdges(ContextNode *
Node);
563 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
569 template <
class NodeT,
class IteratorT>
570 std::vector<uint64_t>
575 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
578 template <
class NodeT,
class IteratorT>
579 void addStackNodesForMIB(ContextNode *AllocNode,
588 void updateStackNodes();
592 void handleCallsitesWithMultipleTargets();
595 void markBackedges();
605 bool partitionCallsByCallee(
607 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
614 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
621 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
626 struct CallContextInfo {
630 std::vector<uint64_t> StackIds;
644 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
645 bool CalleeIter =
true);
653 void assignStackNodesPostOrder(
666 void propagateDuplicateContextIds(
672 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
680 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
690 calleesMatch(CallTy
Call, EdgeIter &EI,
695 const FuncTy *getCalleeFunc(CallTy
Call) {
696 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
702 bool calleeMatchesFunc(
703 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
704 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
705 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
706 Call, Func, CallerFunc, FoundCalleeChain);
710 bool sameCallee(CallTy Call1, CallTy Call2) {
711 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
716 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
717 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
723 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
729 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
734 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
739 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
740 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
746 FuncInfo cloneFunctionForCallsite(
748 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
749 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
750 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
755 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
756 unsigned CloneNo)
const {
757 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
761 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
762 CallInfo
C = CallInfo()) {
763 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
764 auto *NewNode = NodeOwner.back().get();
766 NodeToCallingFunc[NewNode] =
F;
767 NewNode->NodeId = NodeOwner.size();
772 ContextNode *getNodeForInst(
const CallInfo &
C);
773 ContextNode *getNodeForAlloc(
const CallInfo &
C);
774 ContextNode *getNodeForStackId(
uint64_t StackId);
796 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
803 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
804 ContextNode *NewCallee,
805 bool NewClone =
false,
813 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
814 ContextNode *NewCaller);
825 void mergeNodeCalleeClones(
830 void findOtherCallersToShareMerge(
831 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
862 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
868 unsigned int LastContextId = 0;
871template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
873 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
874template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
876 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
877template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
879 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
880template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
882 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
885class ModuleCallsiteContextGraph
886 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
889 ModuleCallsiteContextGraph(
891 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
894 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
897 uint64_t getStackId(uint64_t IdOrIndex)
const;
899 bool calleeMatchesFunc(
900 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
901 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
902 bool sameCallee(Instruction *Call1, Instruction *Call2);
903 bool findProfiledCalleeThroughTailCalls(
904 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
905 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
906 bool &FoundMultipleCalleeChains);
907 uint64_t getLastStackId(Instruction *
Call);
908 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
911 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
912 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
914 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
915 DenseMap<CallInfo, CallInfo> &CallMap,
916 std::vector<CallInfo> &CallsWithMetadataInFunc,
918 std::string getLabel(
const Function *Func,
const Instruction *
Call,
919 unsigned CloneNo)
const;
922 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
928struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
929 IndexCall() : PointerUnion() {}
930 IndexCall(std::nullptr_t) : IndexCall() {}
931 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
932 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
933 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
935 IndexCall *operator->() {
return this; }
937 void print(raw_ostream &OS)
const {
938 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
963class IndexCallsiteContextGraph
964 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
967 IndexCallsiteContextGraph(
968 ModuleSummaryIndex &Index,
972 ~IndexCallsiteContextGraph() {
977 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
979 for (
auto &Callsite :
I.second)
980 FS->addCallsite(*Callsite.second);
985 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
988 uint64_t getStackId(uint64_t IdOrIndex)
const;
989 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
990 bool calleeMatchesFunc(
991 IndexCall &
Call,
const FunctionSummary *Func,
992 const FunctionSummary *CallerFunc,
993 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
994 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
995 bool findProfiledCalleeThroughTailCalls(
996 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
997 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
998 bool &FoundMultipleCalleeChains);
999 uint64_t getLastStackId(IndexCall &
Call);
1000 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1003 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1004 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1005 IndexCall>::FuncInfo
1006 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1007 DenseMap<CallInfo, CallInfo> &CallMap,
1008 std::vector<CallInfo> &CallsWithMetadataInFunc,
1010 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1011 unsigned CloneNo)
const;
1015 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1017 const ModuleSummaryIndex &Index;
1025 std::unordered_map<FunctionSummary *,
1026 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1027 FunctionCalleesToSynthesizedCallsiteInfos;
1039 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1042 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1064template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1065bool allocTypesMatch(
1066 const std::vector<uint8_t> &InAllocTypes,
1067 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1071 assert(InAllocTypes.size() == Edges.size());
1073 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1075 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1079 if (l == (uint8_t)AllocationType::None ||
1080 r->AllocTypes == (uint8_t)AllocationType::None)
1082 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1091template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1092bool allocTypesMatchClone(
1093 const std::vector<uint8_t> &InAllocTypes,
1094 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1095 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1099 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1103 for (
const auto &
E : Clone->CalleeEdges) {
1105 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1109 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1110 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1112 if (Iter == EdgeCalleeMap.
end())
1120 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1128template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1129typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1130CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1131 const CallInfo &
C) {
1132 ContextNode *
Node = getNodeForAlloc(
C);
1136 return NonAllocationCallToContextNodeMap.lookup(
C);
1139template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1140typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1141CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1142 const CallInfo &
C) {
1143 return AllocationCallToContextNodeMap.lookup(
C);
1146template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1147typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1148CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1150 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1151 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1152 return StackEntryNode->second;
1156template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1157void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1159 unsigned int ContextId) {
1160 for (
auto &
Edge : CallerEdges) {
1161 if (
Edge->Caller == Caller) {
1163 Edge->getContextIds().insert(ContextId);
1167 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1168 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1169 CallerEdges.push_back(
Edge);
1173template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1174void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1175 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1191 auto CalleeCallerCount =
Callee->CallerEdges.size();
1192 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1197 }
else if (CalleeIter) {
1199 *EI =
Caller->CalleeEdges.erase(*EI);
1202 *EI =
Callee->CallerEdges.erase(*EI);
1204 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1205 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1208template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1209void CallsiteContextGraph<
1210 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1211 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1213 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1215 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1221template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1222void CallsiteContextGraph<
1223 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1224 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1226 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1228 Edge->Caller->eraseCalleeEdge(
Edge.get());
1229 EI =
Node->CallerEdges.erase(EI);
1235template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1236typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1237CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1238 findEdgeFromCallee(
const ContextNode *Callee) {
1239 for (
const auto &
Edge : CalleeEdges)
1240 if (
Edge->Callee == Callee)
1245template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1246typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1247CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1248 findEdgeFromCaller(
const ContextNode *Caller) {
1249 for (
const auto &
Edge : CallerEdges)
1250 if (
Edge->Caller == Caller)
1255template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1256void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1257 eraseCalleeEdge(
const ContextEdge *
Edge) {
1259 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1260 return CalleeEdge.get() ==
Edge;
1262 assert(EI != CalleeEdges.end());
1263 CalleeEdges.erase(EI);
1266template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1267void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1268 eraseCallerEdge(
const ContextEdge *
Edge) {
1270 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1271 return CallerEdge.get() ==
Edge;
1273 assert(EI != CallerEdges.end());
1274 CallerEdges.erase(EI);
1277template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1278uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1279 DenseSet<uint32_t> &ContextIds)
const {
1281 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1282 uint8_t
AllocType = (uint8_t)AllocationType::None;
1283 for (
auto Id : ContextIds) {
1284 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1292template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1294CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1295 const DenseSet<uint32_t> &Node1Ids,
1296 const DenseSet<uint32_t> &Node2Ids)
const {
1298 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1299 uint8_t
AllocType = (uint8_t)AllocationType::None;
1300 for (
auto Id : Node1Ids) {
1301 if (!Node2Ids.
count(Id))
1303 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1311template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1312uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1313 const DenseSet<uint32_t> &Node1Ids,
1314 const DenseSet<uint32_t> &Node2Ids)
const {
1315 if (Node1Ids.
size() < Node2Ids.
size())
1316 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1318 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1321template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1322typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1323CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1324 CallInfo
Call,
const FuncTy *
F) {
1326 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1327 AllocationCallToContextNodeMap[
Call] = AllocNode;
1329 AllocNode->OrigStackOrAllocId = LastContextId;
1332 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1348template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1349template <
class NodeT,
class IteratorT>
1350void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1351 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1359 ContextIdToAllocationType[++LastContextId] =
AllocType;
1361 if (!ContextSizeInfo.
empty()) {
1362 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1367 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1372 ContextNode *PrevNode = AllocNode;
1376 SmallSet<uint64_t, 8> StackIdSet;
1379 ContextIter != StackContext.
end(); ++ContextIter) {
1380 auto StackId = getStackId(*ContextIter);
1381 ContextNode *StackNode = getNodeForStackId(StackId);
1383 StackNode = createNewNode(
false);
1384 StackEntryIdToContextNodeMap[StackId] = StackNode;
1385 StackNode->OrigStackOrAllocId = StackId;
1392 StackNode->Recursive =
true;
1394 StackNode->AllocTypes |= (uint8_t)
AllocType;
1395 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1396 PrevNode = StackNode;
1400template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1402CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1403 const DenseSet<uint32_t> &StackSequenceContextIds,
1404 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1405 DenseSet<uint32_t> NewContextIds;
1406 for (
auto OldId : StackSequenceContextIds) {
1407 NewContextIds.
insert(++LastContextId);
1408 OldToNewContextIds[OldId].insert(LastContextId);
1409 assert(ContextIdToAllocationType.count(OldId));
1411 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1412 if (DotAllocContextIds.
contains(OldId))
1413 DotAllocContextIds.
insert(LastContextId);
1415 return NewContextIds;
1418template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1419void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1420 propagateDuplicateContextIds(
1421 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1423 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1424 DenseSet<uint32_t> NewIds;
1425 for (
auto Id : ContextIds)
1426 if (
auto NewId = OldToNewContextIds.find(Id);
1427 NewId != OldToNewContextIds.end())
1433 auto UpdateCallers = [&](ContextNode *
Node,
1434 DenseSet<const ContextEdge *> &Visited,
1435 auto &&UpdateCallers) ->
void {
1436 for (
const auto &
Edge :
Node->CallerEdges) {
1440 ContextNode *NextNode =
Edge->Caller;
1441 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1444 if (!NewIdsToAdd.
empty()) {
1445 Edge->getContextIds().insert_range(NewIdsToAdd);
1446 UpdateCallers(NextNode, Visited, UpdateCallers);
1451 DenseSet<const ContextEdge *> Visited;
1452 for (
auto &Entry : AllocationCallToContextNodeMap) {
1454 UpdateCallers(Node, Visited, UpdateCallers);
1458template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1459void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1460 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1463 DenseSet<uint32_t> RemainingContextIds) {
1465 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1466 DenseSet<uint32_t> RecursiveContextIds;
1467 DenseSet<uint32_t> AllCallerContextIds;
1472 for (
auto &CE : OrigEdges) {
1473 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1474 for (
auto Id :
CE->getContextIds())
1475 if (!AllCallerContextIds.
insert(Id).second)
1476 RecursiveContextIds.
insert(Id);
1480 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1482 DenseSet<uint32_t> NewEdgeContextIds;
1483 DenseSet<uint32_t> NotFoundContextIds;
1487 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1488 NotFoundContextIds);
1491 if (RecursiveContextIds.
empty()) {
1494 RemainingContextIds.
swap(NotFoundContextIds);
1504 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1506 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1509 if (NewEdgeContextIds.
empty()) {
1513 if (TowardsCallee) {
1514 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1515 auto NewEdge = std::make_shared<ContextEdge>(
1516 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1517 NewNode->CalleeEdges.push_back(NewEdge);
1518 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1520 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1521 auto NewEdge = std::make_shared<ContextEdge>(
1522 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1523 NewNode->CallerEdges.push_back(NewEdge);
1524 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1527 if (
Edge->getContextIds().empty()) {
1528 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1535template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1537 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1541 assert(!Edge->ContextIds.empty());
1544template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1546 bool CheckEdges =
true) {
1547 if (
Node->isRemoved())
1551 auto NodeContextIds =
Node->getContextIds();
1555 if (
Node->CallerEdges.size()) {
1557 Node->CallerEdges.front()->ContextIds);
1561 set_union(CallerEdgeContextIds, Edge->ContextIds);
1568 NodeContextIds == CallerEdgeContextIds ||
1571 if (
Node->CalleeEdges.size()) {
1573 Node->CalleeEdges.front()->ContextIds);
1577 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1583 NodeContextIds == CalleeEdgeContextIds);
1592 for (
const auto &
E :
Node->CalleeEdges)
1598template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1599void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1600 assignStackNodesPostOrder(
1601 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1602 DenseMap<uint64_t, std::vector<CallContextInfo>>
1603 &StackIdToMatchingCalls,
1604 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1612 auto CallerEdges =
Node->CallerEdges;
1613 for (
auto &
Edge : CallerEdges) {
1615 if (
Edge->isRemoved()) {
1619 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1620 CallToMatchingCall);
1629 if (
Node->IsAllocation ||
1630 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1633 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1637 if (Calls.size() == 1) {
1638 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1639 if (Ids.size() == 1) {
1640 assert(SavedContextIds.empty());
1642 assert(Node == getNodeForStackId(Ids[0]));
1643 if (
Node->Recursive)
1646 NonAllocationCallToContextNodeMap[
Call] =
Node;
1655 uint64_t LastId =
Node->OrigStackOrAllocId;
1656 ContextNode *LastNode = getNodeForStackId(LastId);
1659 assert(LastNode == Node);
1661 ContextNode *LastNode =
Node;
1666 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1668 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1669 bool CreatedNode =
false;
1670 for (
unsigned I = 0;
I < Calls.size();
1671 I++, PrevIterCreatedNode = CreatedNode) {
1672 CreatedNode =
false;
1673 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1676 if (SavedContextIds.empty()) {
1683 auto MatchingCall = CallToMatchingCall[
Call];
1684 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1688 assert(
I > 0 && !PrevIterCreatedNode);
1691 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1696 assert(LastId == Ids.back());
1705 ContextNode *PrevNode = LastNode;
1709 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1711 ContextNode *CurNode = getNodeForStackId(Id);
1715 assert(!CurNode->Recursive);
1717 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1729 if (SavedContextIds.empty()) {
1738 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1739 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1741 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1743 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1749 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1754 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1759 for (
auto Id : Ids) {
1760 ContextNode *CurNode = getNodeForStackId(Id);
1767 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1774 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1775 if (PrevEdge->getContextIds().empty())
1776 removeEdgeFromGraph(PrevEdge);
1781 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1782 ? (uint8_t)AllocationType::None
1783 : CurNode->computeAllocType();
1788 for (
auto Id : Ids) {
1789 ContextNode *CurNode = getNodeForStackId(Id);
1798template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1799void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1807 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1808 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1809 for (
auto &
Call : CallsWithMetadata) {
1811 if (AllocationCallToContextNodeMap.count(
Call))
1813 auto StackIdsWithContextNodes =
1814 getStackIdsWithContextNodesForCall(
Call.call());
1817 if (StackIdsWithContextNodes.empty())
1821 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1822 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
1832 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1836 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1837 for (
auto &It : StackIdToMatchingCalls) {
1838 auto &Calls = It.getSecond();
1840 if (Calls.size() == 1) {
1841 auto &Ids = Calls[0].StackIds;
1842 if (Ids.size() == 1)
1855 DenseMap<const FuncTy *, unsigned> FuncToIndex;
1856 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
1857 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
1860 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1861 return A.StackIds.size() >
B.StackIds.size() ||
1862 (
A.StackIds.size() ==
B.StackIds.size() &&
1863 (
A.StackIds <
B.StackIds ||
1864 (
A.StackIds ==
B.StackIds &&
1865 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1871 uint64_t LastId = It.getFirst();
1872 ContextNode *LastNode = getNodeForStackId(LastId);
1876 if (LastNode->Recursive)
1881 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1889 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1892 for (
unsigned I = 0;
I < Calls.size();
I++) {
1893 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1894 assert(SavedContextIds.empty());
1895 assert(LastId == Ids.back());
1900 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1901 MatchingIdsFuncSet.
clear();
1908 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1910 ContextNode *PrevNode = LastNode;
1911 ContextNode *CurNode = LastNode;
1916 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1918 CurNode = getNodeForStackId(Id);
1922 if (CurNode->Recursive) {
1927 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1948 if (StackSequenceContextIds.
empty()) {
1961 if (Ids.back() != getLastStackId(
Call)) {
1962 for (
const auto &PE : LastNode->CallerEdges) {
1963 set_subtract(StackSequenceContextIds, PE->getContextIds());
1964 if (StackSequenceContextIds.
empty())
1968 if (StackSequenceContextIds.
empty())
1980 MatchingIdsFuncSet.
insert(Func);
1987 bool DuplicateContextIds =
false;
1988 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1989 auto &CallCtxInfo = Calls[J];
1990 auto &NextIds = CallCtxInfo.StackIds;
1993 auto *NextFunc = CallCtxInfo.Func;
1994 if (NextFunc != Func) {
1997 DuplicateContextIds =
true;
2000 auto &NextCall = CallCtxInfo.Call;
2001 CallToMatchingCall[NextCall] =
Call;
2012 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2013 StackSequenceContextIds.
size());
2016 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2017 : StackSequenceContextIds;
2018 assert(!SavedContextIds.empty());
2020 if (!DuplicateContextIds) {
2024 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2025 if (LastNodeContextIds.
empty())
2032 propagateDuplicateContextIds(OldToNewContextIds);
2042 DenseSet<const ContextNode *> Visited;
2043 for (
auto &Entry : AllocationCallToContextNodeMap)
2044 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2045 CallToMatchingCall);
2050uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2051 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2053 return CallsiteContext.
back();
2056uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2058 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2061 return Index.getStackIdAtIndex(CallsiteContext.
back());
2083 auto Pos =
F.getName().find_last_of(
'.');
2086 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2092std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2093 const Instruction *
Call,
2094 unsigned CloneNo)
const {
2100std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2101 const IndexCall &
Call,
2102 unsigned CloneNo)
const {
2103 auto VI = FSToVIMap.find(Func);
2104 assert(VI != FSToVIMap.end());
2107 return CallerName +
" -> alloc";
2110 return CallerName +
" -> " +
2112 Callsite->Clones[CloneNo]);
2116std::vector<uint64_t>
2117ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2118 Instruction *
Call) {
2119 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2121 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2125std::vector<uint64_t>
2126IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2128 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2130 return getStackIdsWithContextNodes<CallsiteInfo,
2131 SmallVector<unsigned>::const_iterator>(
2135template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2136template <
class NodeT,
class IteratorT>
2137std::vector<uint64_t>
2138CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2139 CallStack<NodeT, IteratorT> &CallsiteContext) {
2140 std::vector<uint64_t> StackIds;
2141 for (
auto IdOrIndex : CallsiteContext) {
2142 auto StackId = getStackId(IdOrIndex);
2143 ContextNode *
Node = getNodeForStackId(StackId);
2146 StackIds.push_back(StackId);
2151ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2153 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2154 :
Mod(
M), OREGetter(OREGetter) {
2156 std::vector<CallInfo> CallsWithMetadata;
2157 for (
auto &BB :
F) {
2158 for (
auto &
I : BB) {
2161 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2162 CallsWithMetadata.push_back(&
I);
2163 auto *AllocNode = addAllocNode(&
I, &
F);
2164 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2168 for (
auto &MDOp : MemProfMD->operands()) {
2170 std::vector<ContextTotalSize> ContextSizeInfo;
2172 if (MIBMD->getNumOperands() > 2) {
2173 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2174 MDNode *ContextSizePair =
2183 ContextSizeInfo.push_back({FullStackId, TotalSize});
2189 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2190 AllocNode, StackContext, CallsiteContext,
2196 DotAllocContextIds = AllocNode->getContextIds();
2200 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2201 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2204 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2205 CallsWithMetadata.push_back(&
I);
2209 if (!CallsWithMetadata.empty())
2210 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2214 dbgs() <<
"CCG before updating call stack chains:\n";
2219 exportToDot(
"prestackupdate");
2224 exportToDot(
"poststackupdate");
2226 handleCallsitesWithMultipleTargets();
2231 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2232 for (
auto &
Call : FuncEntry.second)
2233 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2236IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2240 : Index(Index), isPrevailing(isPrevailing) {
2241 for (
auto &
I : Index) {
2242 auto VI = Index.getValueInfo(
I);
2243 for (
auto &S : VI.getSummaryList()) {
2252 !isPrevailing(VI.getGUID(), S.get()))
2257 std::vector<CallInfo> CallsWithMetadata;
2258 if (!
FS->allocs().empty()) {
2259 for (
auto &AN :
FS->mutableAllocs()) {
2264 if (AN.MIBs.empty())
2266 IndexCall AllocCall(&AN);
2267 CallsWithMetadata.push_back(AllocCall);
2268 auto *AllocNode = addAllocNode(AllocCall, FS);
2276 AN.ContextSizeInfos.size() == AN.MIBs.size());
2278 for (
auto &MIB : AN.MIBs) {
2281 std::vector<ContextTotalSize> ContextSizeInfo;
2282 if (!AN.ContextSizeInfos.empty()) {
2283 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2284 ContextSizeInfo.push_back({FullStackId, TotalSize});
2286 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2287 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2294 DotAllocContextIds = AllocNode->getContextIds();
2300 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2304 if (!
FS->callsites().empty())
2305 for (
auto &SN :
FS->mutableCallsites()) {
2306 IndexCall StackNodeCall(&SN);
2307 CallsWithMetadata.push_back(StackNodeCall);
2310 if (!CallsWithMetadata.empty())
2311 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2313 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2319 dbgs() <<
"CCG before updating call stack chains:\n";
2324 exportToDot(
"prestackupdate");
2329 exportToDot(
"poststackupdate");
2331 handleCallsitesWithMultipleTargets();
2336template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2337void CallsiteContextGraph<DerivedCCG, FuncTy,
2338 CallTy>::handleCallsitesWithMultipleTargets() {
2353 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2354 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2355 auto *
Node = Entry.second;
2364 std::vector<CallInfo> AllCalls;
2365 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2366 AllCalls.push_back(
Node->Call);
2380 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2383 auto It = AllCalls.begin();
2385 for (; It != AllCalls.end(); ++It) {
2388 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2391 if (!Edge->Callee->hasCall())
2393 assert(NodeToCallingFunc.count(Edge->Callee));
2395 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2404 if (
Node->Call != ThisCall) {
2405 Node->setCall(ThisCall);
2416 Node->MatchingCalls.clear();
2419 if (It == AllCalls.end()) {
2420 RemovedEdgesWithMismatchedCallees++;
2424 Node->setCall(CallInfo());
2429 for (++It; It != AllCalls.end(); ++It) {
2433 Node->MatchingCalls.push_back(ThisCall);
2442 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2443 return !it.second->hasCall() || it.second->Call != it.first;
2447 for (
auto &[
Call,
Node] : NewCallToNode)
2448 NonAllocationCallToContextNodeMap[
Call] =
Node;
2452 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2453 NonAllocationCallToContextNodeMap[
Call] =
Node;
2456template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2457bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2459 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2463 struct CallsWithSameCallee {
2464 std::vector<CallInfo> Calls;
2465 ContextNode *
Node =
nullptr;
2471 for (
auto ThisCall : AllCalls) {
2472 auto *
F = getCalleeFunc(
ThisCall.call());
2474 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2483 for (
const auto &Edge :
Node->CalleeEdges) {
2484 if (!Edge->Callee->hasCall())
2486 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2487 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2488 CalleeNodeToCallInfo[Edge->Callee] =
2489 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2495 if (CalleeNodeToCallInfo.
empty())
2507 ContextNode *UnmatchedCalleesNode =
nullptr;
2509 bool UsedOrigNode =
false;
2514 auto CalleeEdges =
Node->CalleeEdges;
2515 for (
auto &Edge : CalleeEdges) {
2516 if (!Edge->Callee->hasCall())
2521 ContextNode *CallerNodeToUse =
nullptr;
2525 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2526 if (!UnmatchedCalleesNode)
2527 UnmatchedCalleesNode =
2528 createNewNode(
false, NodeToCallingFunc[
Node]);
2529 CallerNodeToUse = UnmatchedCalleesNode;
2533 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2536 if (!UsedOrigNode) {
2539 Node->MatchingCalls.clear();
2540 UsedOrigNode =
true;
2543 createNewNode(
false, NodeToCallingFunc[
Node]);
2547 Info->Node->setCall(
Info->Calls.front());
2553 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2555 CallerNodeToUse =
Info->Node;
2559 if (CallerNodeToUse ==
Node)
2562 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2569 for (
auto &
I : CalleeNodeToCallInfo)
2570 removeNoneTypeCallerEdges(
I.second->Node);
2571 if (UnmatchedCalleesNode)
2572 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2573 removeNoneTypeCallerEdges(
Node);
2583uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2586 return Index.getStackIdAtIndex(IdOrIndex);
2589template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2590bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2591 CallTy
Call, EdgeIter &EI,
2592 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2594 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2595 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2598 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2599 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2604 if (FoundCalleeChain.empty())
2608 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2612 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2613 CurEdge->AllocTypes |=
Edge->AllocTypes;
2618 auto NewEdge = std::make_shared<ContextEdge>(
2619 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2620 Callee->CallerEdges.push_back(NewEdge);
2621 if (Caller ==
Edge->Caller) {
2625 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2628 "Iterator position not restored after insert and increment");
2630 Caller->CalleeEdges.push_back(NewEdge);
2635 auto *CurCalleeNode =
Edge->Callee;
2636 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2637 ContextNode *NewNode =
nullptr;
2639 if (TailCallToContextNodeMap.
count(NewCall)) {
2640 NewNode = TailCallToContextNodeMap[NewCall];
2641 NewNode->AllocTypes |=
Edge->AllocTypes;
2643 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2645 NewNode = createNewNode(
false, Func, NewCall);
2646 TailCallToContextNodeMap[NewCall] = NewNode;
2647 NewNode->AllocTypes =
Edge->AllocTypes;
2651 AddEdge(NewNode, CurCalleeNode);
2653 CurCalleeNode = NewNode;
2657 AddEdge(
Edge->Caller, CurCalleeNode);
2665 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2677bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2678 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2679 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2680 bool &FoundMultipleCalleeChains) {
2687 FoundCalleeChain.push_back({Callsite,
F});
2702 bool FoundSingleCalleeChain =
false;
2703 for (
auto &BB : *CalleeFunc) {
2704 for (
auto &
I : BB) {
2706 if (!CB || !CB->isTailCall())
2708 auto *CalledValue = CB->getCalledOperand();
2709 auto *CalledFunction = CB->getCalledFunction();
2710 if (CalledValue && !CalledFunction) {
2711 CalledValue = CalledValue->stripPointerCasts();
2718 assert(!CalledFunction &&
2719 "Expected null called function in callsite for alias");
2722 if (!CalledFunction)
2724 if (CalledFunction == ProfiledCallee) {
2725 if (FoundSingleCalleeChain) {
2726 FoundMultipleCalleeChains =
true;
2729 FoundSingleCalleeChain =
true;
2730 FoundProfiledCalleeCount++;
2731 FoundProfiledCalleeDepth +=
Depth;
2732 if (
Depth > FoundProfiledCalleeMaxDepth)
2733 FoundProfiledCalleeMaxDepth =
Depth;
2734 SaveCallsiteInfo(&
I, CalleeFunc);
2735 }
else if (findProfiledCalleeThroughTailCalls(
2736 ProfiledCallee, CalledFunction,
Depth + 1,
2737 FoundCalleeChain, FoundMultipleCalleeChains)) {
2740 assert(!FoundMultipleCalleeChains);
2741 if (FoundSingleCalleeChain) {
2742 FoundMultipleCalleeChains =
true;
2745 FoundSingleCalleeChain =
true;
2746 SaveCallsiteInfo(&
I, CalleeFunc);
2747 }
else if (FoundMultipleCalleeChains)
2752 return FoundSingleCalleeChain;
2755const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2757 if (!CB->getCalledOperand() || CB->isIndirectCall())
2759 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2766bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2767 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
2768 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2770 if (!CB->getCalledOperand() || CB->isIndirectCall())
2772 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2774 if (CalleeFunc == Func)
2777 if (Alias && Alias->getAliasee() == Func)
2788 bool FoundMultipleCalleeChains =
false;
2789 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2791 FoundMultipleCalleeChains)) {
2792 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2793 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2794 <<
" that actually called " << CalleeVal->getName()
2795 << (FoundMultipleCalleeChains
2796 ?
" (found multiple possible chains)"
2799 if (FoundMultipleCalleeChains)
2800 FoundProfiledCalleeNonUniquelyCount++;
2807bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2808 Instruction *Call2) {
2810 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2812 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2815 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2817 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2819 return CalleeFunc1 == CalleeFunc2;
2822bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2823 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
2824 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2825 bool &FoundMultipleCalleeChains) {
2831 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
2834 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2835 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2838 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
2839 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2840 CallsiteInfo *NewCallsiteInfo =
2841 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2842 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2849 bool FoundSingleCalleeChain =
false;
2852 !isPrevailing(CurCallee.
getGUID(), S.get()))
2857 auto FSVI = CurCallee;
2860 FSVI = AS->getAliaseeVI();
2861 for (
auto &CallEdge :
FS->calls()) {
2862 if (!CallEdge.second.hasTailCall())
2864 if (CallEdge.first == ProfiledCallee) {
2865 if (FoundSingleCalleeChain) {
2866 FoundMultipleCalleeChains =
true;
2869 FoundSingleCalleeChain =
true;
2870 FoundProfiledCalleeCount++;
2871 FoundProfiledCalleeDepth +=
Depth;
2872 if (
Depth > FoundProfiledCalleeMaxDepth)
2873 FoundProfiledCalleeMaxDepth =
Depth;
2874 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2876 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2877 FSToVIMap[
FS] = FSVI;
2878 }
else if (findProfiledCalleeThroughTailCalls(
2879 ProfiledCallee, CallEdge.first,
Depth + 1,
2880 FoundCalleeChain, FoundMultipleCalleeChains)) {
2883 assert(!FoundMultipleCalleeChains);
2884 if (FoundSingleCalleeChain) {
2885 FoundMultipleCalleeChains =
true;
2888 FoundSingleCalleeChain =
true;
2889 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2891 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2892 FSToVIMap[
FS] = FSVI;
2893 }
else if (FoundMultipleCalleeChains)
2898 return FoundSingleCalleeChain;
2901const FunctionSummary *
2902IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
2904 if (
Callee.getSummaryList().empty())
2909bool IndexCallsiteContextGraph::calleeMatchesFunc(
2910 IndexCall &
Call,
const FunctionSummary *Func,
2911 const FunctionSummary *CallerFunc,
2912 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2916 AliasSummary *Alias =
2917 Callee.getSummaryList().empty()
2920 assert(FSToVIMap.count(Func));
2921 auto FuncVI = FSToVIMap[
Func];
2922 if (Callee == FuncVI ||
2937 bool FoundMultipleCalleeChains =
false;
2938 if (!findProfiledCalleeThroughTailCalls(
2939 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2940 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2941 <<
" from " << FSToVIMap[CallerFunc]
2942 <<
" that actually called " << Callee
2943 << (FoundMultipleCalleeChains
2944 ?
" (found multiple possible chains)"
2947 if (FoundMultipleCalleeChains)
2948 FoundProfiledCalleeNonUniquelyCount++;
2955bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2958 return Callee1 == Callee2;
2961template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2962void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2968template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2969void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2970 raw_ostream &OS)
const {
2971 OS <<
"Node " <<
this <<
"\n";
2975 OS <<
" (recursive)";
2977 if (!MatchingCalls.empty()) {
2978 OS <<
"\tMatchingCalls:\n";
2979 for (
auto &MatchingCall : MatchingCalls) {
2981 MatchingCall.print(OS);
2985 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
2987 OS <<
"\tContextIds:";
2989 auto ContextIds = getContextIds();
2990 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2991 std::sort(SortedIds.begin(), SortedIds.end());
2992 for (
auto Id : SortedIds)
2995 OS <<
"\tCalleeEdges:\n";
2996 for (
auto &
Edge : CalleeEdges)
2997 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
2999 OS <<
"\tCallerEdges:\n";
3000 for (
auto &
Edge : CallerEdges)
3001 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3003 if (!Clones.empty()) {
3006 for (
auto *
C : Clones) {
3010 OS <<
C <<
" NodeId: " <<
C->NodeId;
3013 }
else if (CloneOf) {
3014 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3018template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3019void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3025template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3026void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3027 raw_ostream &OS)
const {
3028 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3029 << (IsBackedge ?
" (BE)" :
"")
3031 OS <<
" ContextIds:";
3032 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3033 std::sort(SortedIds.begin(), SortedIds.end());
3034 for (
auto Id : SortedIds)
3038template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3039void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3043template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3044void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3045 raw_ostream &OS)
const {
3046 OS <<
"Callsite Context Graph:\n";
3047 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3049 if (
Node->isRemoved())
3056template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3057void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3058 raw_ostream &OS)
const {
3059 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3061 if (
Node->isRemoved())
3063 if (!
Node->IsAllocation)
3065 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3066 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3067 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3068 std::sort(SortedIds.begin(), SortedIds.end());
3069 for (
auto Id : SortedIds) {
3070 auto TypeI = ContextIdToAllocationType.find(Id);
3071 assert(TypeI != ContextIdToAllocationType.end());
3072 auto CSI = ContextIdToContextSizeInfos.find(Id);
3073 if (CSI != ContextIdToContextSizeInfos.end()) {
3074 for (
auto &
Info : CSI->second) {
3075 OS <<
"MemProf hinting: "
3077 <<
" full allocation context " <<
Info.FullStackId
3078 <<
" with total size " <<
Info.TotalSize <<
" is "
3080 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3082 <<
" due to cold byte percent";
3084 OS <<
" (context id " <<
Id <<
")";
3092template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3093void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3094 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3097 for (
auto &
Edge :
Node->CallerEdges)
3102template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3104 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3105 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3107 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3123 return G->NodeOwner.begin()->get();
3126 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3127 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3146template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3160 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3166 std::string LabelString =
3167 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3170 LabelString +=
"\n";
3171 if (
Node->hasCall()) {
3172 auto Func =
G->NodeToCallingFunc.find(
Node);
3173 assert(Func !=
G->NodeToCallingFunc.end());
3175 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3177 LabelString +=
"null call";
3178 if (
Node->Recursive)
3179 LabelString +=
" (recursive)";
3181 LabelString +=
" (external)";
3187 auto ContextIds =
Node->getContextIds();
3191 bool Highlight =
false;
3200 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3201 getContextIds(ContextIds) +
"\"")
3205 AttributeString +=
",fontsize=\"30\"";
3207 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3209 if (
Node->CloneOf) {
3210 AttributeString +=
",color=\"blue\"";
3211 AttributeString +=
",style=\"filled,bold,dashed\"";
3213 AttributeString +=
",style=\"filled\"";
3214 return AttributeString;
3219 auto &Edge = *(ChildIter.getCurrent());
3224 bool Highlight =
false;
3233 auto Color = getColor(Edge->AllocTypes, Highlight);
3234 std::string AttributeString =
3235 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3237 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3240 if (Edge->IsBackedge)
3241 AttributeString +=
",style=\"dotted\"";
3244 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3245 return AttributeString;
3251 if (
Node->isRemoved())
3264 std::string IdString =
"ContextIds:";
3265 if (ContextIds.
size() < 100) {
3266 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3267 std::sort(SortedIds.begin(), SortedIds.end());
3268 for (
auto Id : SortedIds)
3269 IdString += (
" " +
Twine(Id)).str();
3271 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3276 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3282 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3284 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3285 if (AllocTypes == (uint8_t)AllocationType::Cold)
3286 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3288 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3289 return Highlight ?
"magenta" :
"mediumorchid1";
3293 static std::string getNodeId(NodeRef Node) {
3294 std::stringstream SStream;
3295 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3296 std::string
Result = SStream.str();
3305template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3311void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3312 std::string Label)
const {
3317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3318typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3319CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3320 const std::shared_ptr<ContextEdge> &
Edge,
3321 DenseSet<uint32_t> ContextIdsToMove) {
3323 assert(NodeToCallingFunc.count(Node));
3324 ContextNode *Clone =
3325 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3326 Node->addClone(Clone);
3327 Clone->MatchingCalls =
Node->MatchingCalls;
3328 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3333template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3334void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3335 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3336 ContextNode *NewCallee,
bool NewClone,
3337 DenseSet<uint32_t> ContextIdsToMove) {
3340 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3342 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3344 ContextNode *OldCallee =
Edge->Callee;
3348 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3352 if (ContextIdsToMove.
empty())
3353 ContextIdsToMove =
Edge->getContextIds();
3357 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3360 NewCallee->AllocTypes |=
Edge->AllocTypes;
3362 if (ExistingEdgeToNewCallee) {
3365 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3366 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3367 assert(
Edge->ContextIds == ContextIdsToMove);
3368 removeEdgeFromGraph(
Edge.get());
3371 Edge->Callee = NewCallee;
3372 NewCallee->CallerEdges.push_back(
Edge);
3374 OldCallee->eraseCallerEdge(
Edge.get());
3381 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3382 if (ExistingEdgeToNewCallee) {
3385 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3386 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3389 auto NewEdge = std::make_shared<ContextEdge>(
3390 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3391 Edge->Caller->CalleeEdges.push_back(NewEdge);
3392 NewCallee->CallerEdges.push_back(NewEdge);
3396 NewCallee->AllocTypes |= CallerEdgeAllocType;
3398 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3403 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3404 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3408 if (CalleeToUse == OldCallee) {
3412 if (EdgeIsRecursive) {
3416 CalleeToUse = NewCallee;
3420 DenseSet<uint32_t> EdgeContextIdsToMove =
3422 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3423 OldCalleeEdge->AllocTypes =
3424 computeAllocType(OldCalleeEdge->getContextIds());
3431 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3432 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3433 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3437 auto NewEdge = std::make_shared<ContextEdge>(
3438 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3439 EdgeContextIdsToMove);
3440 NewCallee->CalleeEdges.push_back(NewEdge);
3441 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3445 OldCallee->AllocTypes = OldCallee->computeAllocType();
3447 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3448 OldCallee->emptyContextIds());
3452 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3455 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3461template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3462void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3463 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3464 ContextNode *NewCaller) {
3465 auto *OldCallee =
Edge->Callee;
3466 auto *NewCallee = OldCallee;
3469 bool Recursive =
Edge->Caller ==
Edge->Callee;
3471 NewCallee = NewCaller;
3473 ContextNode *OldCaller =
Edge->Caller;
3474 OldCaller->eraseCalleeEdge(
Edge.get());
3478 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3480 if (ExistingEdgeToNewCaller) {
3483 ExistingEdgeToNewCaller->getContextIds().insert_range(
3484 Edge->getContextIds());
3485 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3486 Edge->ContextIds.clear();
3487 Edge->AllocTypes = (uint8_t)AllocationType::None;
3488 OldCallee->eraseCallerEdge(
Edge.get());
3491 Edge->Caller = NewCaller;
3492 NewCaller->CalleeEdges.push_back(
Edge);
3494 assert(NewCallee == NewCaller);
3497 Edge->Callee = NewCallee;
3498 NewCallee->CallerEdges.push_back(
Edge);
3499 OldCallee->eraseCallerEdge(
Edge.get());
3505 NewCaller->AllocTypes |=
Edge->AllocTypes;
3512 bool IsNewNode = NewCaller->CallerEdges.empty();
3521 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3522 auto OldCallerCaller = OldCallerEdge->Caller;
3526 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3527 if (OldCaller == OldCallerCaller) {
3528 OldCallerCaller = NewCaller;
3534 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3535 OldCallerEdge->AllocTypes =
3536 computeAllocType(OldCallerEdge->getContextIds());
3541 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3545 if (ExistingCallerEdge) {
3546 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3547 ExistingCallerEdge->AllocTypes |=
3548 computeAllocType(EdgeContextIdsToMove);
3551 auto NewEdge = std::make_shared<ContextEdge>(
3552 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3553 EdgeContextIdsToMove);
3554 NewCaller->CallerEdges.push_back(NewEdge);
3555 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3560 OldCaller->AllocTypes = OldCaller->computeAllocType();
3562 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3563 OldCaller->emptyContextIds());
3567 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3570 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3576template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3577void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3578 recursivelyRemoveNoneTypeCalleeEdges(
3579 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3584 removeNoneTypeCalleeEdges(Node);
3586 for (
auto *Clone :
Node->Clones)
3587 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3591 auto CallerEdges =
Node->CallerEdges;
3592 for (
auto &
Edge : CallerEdges) {
3594 if (
Edge->isRemoved()) {
3598 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3603template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3604void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3609 DenseSet<const ContextNode *> Visited;
3610 DenseSet<const ContextNode *> CurrentStack;
3611 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3613 if (
Node->isRemoved())
3616 if (!
Node->CallerEdges.empty())
3618 markBackedges(Node, Visited, CurrentStack);
3624template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3625void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3626 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3627 DenseSet<const ContextNode *> &CurrentStack) {
3628 auto I = Visited.
insert(Node);
3632 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3633 auto *
Callee = CalleeEdge->Callee;
3634 if (Visited.
count(Callee)) {
3637 if (CurrentStack.
count(Callee))
3638 CalleeEdge->IsBackedge =
true;
3641 CurrentStack.
insert(Callee);
3642 markBackedges(Callee, Visited, CurrentStack);
3643 CurrentStack.
erase(Callee);
3647template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3648void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3649 DenseSet<const ContextNode *> Visited;
3650 for (
auto &Entry : AllocationCallToContextNodeMap) {
3652 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3655 for (
auto &Entry : AllocationCallToContextNodeMap)
3656 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3669template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3670void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3671 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3672 const DenseSet<uint32_t> &AllocContextIds) {
3682 if (!
Node->hasCall())
3701 auto CallerEdges =
Node->CallerEdges;
3702 for (
auto &
Edge : CallerEdges) {
3704 if (
Edge->isRemoved()) {
3710 if (
Edge->IsBackedge) {
3717 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3718 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3741 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3745 [&](
const std::shared_ptr<ContextEdge> &
A,
3746 const std::shared_ptr<ContextEdge> &
B) {
3749 if (A->ContextIds.empty())
3755 if (B->ContextIds.empty())
3758 if (A->AllocTypes == B->AllocTypes)
3761 return *A->ContextIds.begin() < *B->ContextIds.begin();
3762 return AllocTypeCloningPriority[A->AllocTypes] <
3763 AllocTypeCloningPriority[B->AllocTypes];
3766 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3768 DenseSet<uint32_t> RecursiveContextIds;
3773 DenseSet<uint32_t> AllCallerContextIds;
3774 for (
auto &CE :
Node->CallerEdges) {
3777 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3778 for (
auto Id :
CE->getContextIds())
3779 if (!AllCallerContextIds.
insert(Id).second)
3780 RecursiveContextIds.
insert(Id);
3790 auto CallerEdges =
Node->CallerEdges;
3791 for (
auto &CallerEdge : CallerEdges) {
3793 if (CallerEdge->isRemoved()) {
3797 assert(CallerEdge->Callee == Node);
3806 if (!CallerEdge->Caller->hasCall())
3811 auto CallerEdgeContextsForAlloc =
3813 if (!RecursiveContextIds.
empty())
3814 CallerEdgeContextsForAlloc =
3816 if (CallerEdgeContextsForAlloc.empty())
3819 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3823 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3824 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3825 for (
auto &CalleeEdge :
Node->CalleeEdges)
3826 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3827 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3843 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3844 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3845 if (!CallerEdge->IsBackedge &&
3846 allocTypeToUse(CallerAllocTypeForAlloc) ==
3847 allocTypeToUse(
Node->AllocTypes) &&
3848 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3849 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3853 if (CallerEdge->IsBackedge) {
3857 DeferredBackedges++;
3870 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3871 !Visited.
count(CallerEdge->Caller)) {
3872 const auto OrigIdCount = CallerEdge->getContextIds().size();
3875 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3876 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3880 bool UpdatedEdge =
false;
3881 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3882 for (
auto E :
Node->CallerEdges) {
3884 if (
E->Caller->CloneOf != CallerEdge->Caller)
3888 auto CallerEdgeContextsForAllocNew =
3890 if (CallerEdgeContextsForAllocNew.empty())
3900 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3910 if (CallerEdge->isRemoved())
3920 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3921 if (CallerEdgeContextsForAlloc.empty())
3926 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3927 CalleeEdgeAllocTypesForCallerEdge.clear();
3928 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3929 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3930 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3936 ContextNode *Clone =
nullptr;
3937 for (
auto *CurClone :
Node->Clones) {
3938 if (allocTypeToUse(CurClone->AllocTypes) !=
3939 allocTypeToUse(CallerAllocTypeForAlloc))
3946 assert(!BothSingleAlloc ||
3947 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3953 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3954 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3962 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3963 CallerEdgeContextsForAlloc);
3965 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3968 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3975 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3981void ModuleCallsiteContextGraph::updateAllocationCall(
3986 "memprof", AllocTypeString);
3989 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
3990 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3992 <<
" marked with memprof allocation attribute "
3993 <<
ore::NV(
"Attribute", AllocTypeString));
3996void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4000 assert(AI->Versions.size() >
Call.cloneNo());
4005ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4007 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4008 return AllocationType::None;
4009 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4010 ? AllocationType::Cold
4011 : AllocationType::NotCold;
4015IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4017 assert(AI->Versions.size() >
Call.cloneNo());
4021void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4022 FuncInfo CalleeFunc) {
4023 auto *CurF = getCalleeFunc(CallerCall.call());
4024 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4031 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4033 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4035 MismatchedCloneAssignments++;
4038 if (NewCalleeCloneNo > 0)
4039 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4040 OREGetter(CallerCall.call()->getFunction())
4041 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4042 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4043 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4044 <<
" assigned to call function clone "
4045 <<
ore::NV(
"Callee", CalleeFunc.func()));
4048void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4049 FuncInfo CalleeFunc) {
4052 "Caller cannot be an allocation which should not have profiled calls");
4053 assert(CI->Clones.size() > CallerCall.cloneNo());
4054 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4055 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4060 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4062 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4064 MismatchedCloneAssignments++;
4066 CurCalleeCloneNo = NewCalleeCloneNo;
4078 SP->replaceLinkageName(MDName);
4082 TempDISubprogram NewDecl = Decl->
clone();
4083 NewDecl->replaceLinkageName(MDName);
4087CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4089ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4090 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4091 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4096 assert(!
Func.func()->getParent()->getFunction(Name));
4097 NewFunc->setName(Name);
4099 for (
auto &Inst : CallsWithMetadataInFunc) {
4101 assert(Inst.cloneNo() == 0);
4104 OREGetter(
Func.func())
4105 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4106 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4107 return {NewFunc, CloneNo};
4110CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4111 IndexCall>::FuncInfo
4112IndexCallsiteContextGraph::cloneFunctionForCallsite(
4113 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4114 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4128 for (
auto &Inst : CallsWithMetadataInFunc) {
4130 assert(Inst.cloneNo() == 0);
4132 assert(AI->Versions.size() == CloneNo);
4135 AI->Versions.push_back(0);
4138 assert(CI && CI->Clones.size() == CloneNo);
4141 CI->Clones.push_back(0);
4143 CallMap[Inst] = {Inst.call(), CloneNo};
4145 return {
Func.func(), CloneNo};
4162template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4163void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4169 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4170 for (
auto &Entry : AllocationCallToContextNodeMap) {
4172 for (
auto Id :
Node->getContextIds())
4173 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4174 for (
auto *Clone :
Node->Clones) {
4175 for (
auto Id : Clone->getContextIds())
4176 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4183 DenseSet<const ContextNode *> Visited;
4184 for (
auto &Entry : AllocationCallToContextNodeMap) {
4187 mergeClones(Node, Visited, ContextIdToAllocationNode);
4193 auto Clones =
Node->Clones;
4194 for (
auto *Clone : Clones)
4195 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4199 dbgs() <<
"CCG after merging:\n";
4203 exportToDot(
"aftermerge");
4211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4212void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4213 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4214 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4224 bool FoundUnvisited =
true;
4226 while (FoundUnvisited) {
4228 FoundUnvisited =
false;
4231 auto CallerEdges =
Node->CallerEdges;
4232 for (
auto CallerEdge : CallerEdges) {
4234 if (CallerEdge->Callee != Node)
4239 FoundUnvisited =
true;
4240 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4244 TotalMergeInvokes++;
4245 TotalMergeIters += Iters;
4246 if (Iters > MaxMergeIters)
4247 MaxMergeIters = Iters;
4250 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4253template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4254void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4255 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4256 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4258 if (
Node->emptyContextIds())
4263 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4264 OrigNodeToCloneEdges;
4265 for (
const auto &
E :
Node->CalleeEdges) {
4270 OrigNodeToCloneEdges[
Base].push_back(
E);
4276 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4277 const std::shared_ptr<ContextEdge> &
B) {
4278 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4279 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4280 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4282 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4286 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4291 for (
auto Entry : OrigNodeToCloneEdges) {
4294 auto &CalleeEdges =
Entry.second;
4295 auto NumCalleeClones = CalleeEdges.size();
4297 if (NumCalleeClones == 1)
4308 DenseSet<ContextNode *> OtherCallersToShareMerge;
4309 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4310 OtherCallersToShareMerge);
4315 ContextNode *MergeNode =
nullptr;
4316 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4317 for (
auto CalleeEdge : CalleeEdges) {
4318 auto *OrigCallee = CalleeEdge->Callee;
4324 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4325 MergeNode = OrigCallee;
4326 NonNewMergedNodes++;
4333 if (!OtherCallersToShareMerge.
empty()) {
4334 bool MoveAllCallerEdges =
true;
4335 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4336 if (CalleeCallerE == CalleeEdge)
4338 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4339 MoveAllCallerEdges =
false;
4345 if (MoveAllCallerEdges) {
4346 MergeNode = OrigCallee;
4347 NonNewMergedNodes++;
4354 assert(MergeNode != OrigCallee);
4355 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4358 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4363 if (!OtherCallersToShareMerge.
empty()) {
4367 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4368 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4369 if (CalleeCallerE == CalleeEdge)
4371 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4373 CallerToMoveCount[CalleeCallerE->Caller]++;
4374 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4378 removeNoneTypeCalleeEdges(OrigCallee);
4379 removeNoneTypeCalleeEdges(MergeNode);
4397template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4398void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4399 findOtherCallersToShareMerge(
4401 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4402 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4403 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4404 auto NumCalleeClones = CalleeEdges.size();
4407 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4410 unsigned PossibleOtherCallerNodes = 0;
4414 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4420 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4421 for (
auto CalleeEdge : CalleeEdges) {
4422 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4425 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4426 if (CalleeCallerEdges->Caller == Node) {
4427 assert(CalleeCallerEdges == CalleeEdge);
4430 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4433 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4435 PossibleOtherCallerNodes++;
4439 for (
auto Id : CalleeEdge->getContextIds()) {
4440 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4444 MissingAllocForContextId++;
4447 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4454 for (
auto CalleeEdge : CalleeEdges) {
4455 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4457 if (!PossibleOtherCallerNodes)
4459 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4461 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4463 if (CalleeCallerE == CalleeEdge)
4467 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4472 for (
auto Id : CalleeCallerE->getContextIds()) {
4473 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4478 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4479 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4480 PossibleOtherCallerNodes--;
4487 if (!PossibleOtherCallerNodes)
4492 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4493 if (
Count != NumCalleeClones)
4495 OtherCallersToShareMerge.
insert(OtherCaller);
4540template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4541bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4548 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4552 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4553 const FuncInfo &CalleeFunc) {
4555 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4559 struct FuncCloneInfo {
4564 DenseMap<CallInfo, CallInfo> CallMap;
4592 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4593 UnassignedCallClones;
4597 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4598 FuncInfo OrigFunc(Func);
4603 std::vector<FuncCloneInfo> FuncCloneInfos;
4604 for (
auto &
Call : CallsWithMetadata) {
4605 ContextNode *
Node = getNodeForInst(
Call);
4609 if (!Node ||
Node->Clones.empty())
4612 "Not having a call should have prevented cloning");
4616 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4620 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4622 ContextNode *CallsiteClone,
4625 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4627 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4628 DenseMap<CallInfo, CallInfo> &CallMap =
4629 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4630 CallInfo CallClone(
Call);
4631 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4632 CallClone = It->second;
4633 CallsiteClone->setCall(CallClone);
4635 for (
auto &MatchingCall :
Node->MatchingCalls) {
4636 CallInfo CallClone(MatchingCall);
4637 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4638 CallClone = It->second;
4640 MatchingCall = CallClone;
4648 auto MoveEdgeToNewCalleeCloneAndSetUp =
4649 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4650 ContextNode *OrigCallee =
Edge->Callee;
4651 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4652 removeNoneTypeCalleeEdges(NewClone);
4653 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4657 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4658 RecordCalleeFuncOfCallsite(
4659 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4666 std::deque<ContextNode *> ClonesWorklist;
4668 if (!
Node->emptyContextIds())
4669 ClonesWorklist.push_back(Node);
4675 unsigned NodeCloneCount = 0;
4676 while (!ClonesWorklist.empty()) {
4677 ContextNode *Clone = ClonesWorklist.front();
4678 ClonesWorklist.pop_front();
4687 if (FuncCloneInfos.size() < NodeCloneCount) {
4689 if (NodeCloneCount == 1) {
4694 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4695 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4699 FuncCloneInfos.push_back(
4700 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4701 AssignCallsiteCloneToFuncClone(
4702 OrigFunc,
Call, Clone,
4703 AllocationCallToContextNodeMap.count(
Call));
4704 for (
auto &CE : Clone->CallerEdges) {
4706 if (!
CE->Caller->hasCall())
4708 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4718 FuncInfo PreviousAssignedFuncClone;
4720 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4721 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4723 bool CallerAssignedToCloneOfFunc =
false;
4724 if (EI != Clone->CallerEdges.end()) {
4725 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4726 PreviousAssignedFuncClone =
4727 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4728 CallerAssignedToCloneOfFunc =
true;
4733 DenseMap<CallInfo, CallInfo> NewCallMap;
4734 unsigned CloneNo = FuncCloneInfos.size();
4735 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4736 "should already exist in the map");
4737 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4738 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4739 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4740 FunctionClonesAnalysis++;
4746 if (!CallerAssignedToCloneOfFunc) {
4747 AssignCallsiteCloneToFuncClone(
4748 NewFuncClone,
Call, Clone,
4749 AllocationCallToContextNodeMap.count(
Call));
4750 for (
auto &CE : Clone->CallerEdges) {
4752 if (!
CE->Caller->hasCall())
4754 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4766 auto CallerEdges = Clone->CallerEdges;
4767 for (
auto CE : CallerEdges) {
4769 if (
CE->isRemoved()) {
4775 if (!
CE->Caller->hasCall())
4778 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4782 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4783 PreviousAssignedFuncClone)
4786 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4799 auto CalleeEdges =
CE->Caller->CalleeEdges;
4800 for (
auto CalleeEdge : CalleeEdges) {
4803 if (CalleeEdge->isRemoved()) {
4808 ContextNode *
Callee = CalleeEdge->Callee;
4812 if (Callee == Clone || !
Callee->hasCall())
4817 if (Callee == CalleeEdge->Caller)
4819 ContextNode *NewClone =
4820 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
4823 removeNoneTypeCalleeEdges(Callee);
4831 CallInfo OrigCall(
Callee->getOrigNode()->Call);
4832 OrigCall.setCloneNo(0);
4833 DenseMap<CallInfo, CallInfo> &CallMap =
4834 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4836 CallInfo NewCall(CallMap[OrigCall]);
4838 NewClone->setCall(NewCall);
4840 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4841 CallInfo OrigMatchingCall(MatchingCall);
4842 OrigMatchingCall.setCloneNo(0);
4844 CallInfo NewCall(CallMap[OrigMatchingCall]);
4847 MatchingCall = NewCall;
4856 auto FindFirstAvailFuncClone = [&]() {
4861 for (
auto &CF : FuncCloneInfos) {
4862 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4863 return CF.FuncClone;
4866 "Expected an available func clone for this callsite clone");
4883 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4884 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4888 auto CloneCallerEdges = Clone->CallerEdges;
4889 for (
auto &
Edge : CloneCallerEdges) {
4893 if (
Edge->isRemoved())
4896 if (!
Edge->Caller->hasCall())
4900 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4901 FuncInfo FuncCloneCalledByCaller =
4902 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4912 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4913 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4921 (FuncCloneAssignedToCurCallsiteClone &&
4922 FuncCloneAssignedToCurCallsiteClone !=
4923 FuncCloneCalledByCaller)) {
4938 if (FuncCloneToNewCallsiteCloneMap.count(
4939 FuncCloneCalledByCaller)) {
4940 ContextNode *NewClone =
4941 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4942 moveEdgeToExistingCalleeClone(
Edge, NewClone);
4944 removeNoneTypeCalleeEdges(NewClone);
4947 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
4948 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4951 ClonesWorklist.push_back(NewClone);
4955 removeNoneTypeCalleeEdges(Clone);
4963 if (!FuncCloneAssignedToCurCallsiteClone) {
4964 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4966 AssignCallsiteCloneToFuncClone(
4967 FuncCloneCalledByCaller,
Call, Clone,
4968 AllocationCallToContextNodeMap.count(
Call));
4972 assert(FuncCloneAssignedToCurCallsiteClone ==
4973 FuncCloneCalledByCaller);
4982 if (!FuncCloneAssignedToCurCallsiteClone) {
4983 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4984 assert(FuncCloneAssignedToCurCallsiteClone);
4986 AssignCallsiteCloneToFuncClone(
4987 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4988 AllocationCallToContextNodeMap.count(
Call));
4990 assert(FuncCloneToCurNodeCloneMap
4991 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4993 RecordCalleeFuncOfCallsite(
Edge->Caller,
4994 FuncCloneAssignedToCurCallsiteClone);
5014 if (!FuncCloneAssignedToCurCallsiteClone) {
5015 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5016 assert(FuncCloneAssignedToCurCallsiteClone &&
5017 "No available func clone for this callsite clone");
5018 AssignCallsiteCloneToFuncClone(
5019 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5020 AllocationCallToContextNodeMap.contains(
Call));
5025 for (
const auto &PE :
Node->CalleeEdges)
5027 for (
const auto &CE :
Node->CallerEdges)
5029 for (
auto *Clone :
Node->Clones) {
5031 for (
const auto &PE : Clone->CalleeEdges)
5033 for (
const auto &CE : Clone->CallerEdges)
5039 if (FuncCloneInfos.size() < 2)
5045 for (
auto &
Call : CallsWithMetadata) {
5046 ContextNode *
Node = getNodeForInst(
Call);
5047 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5053 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5057 DenseSet<unsigned> NodeCallClones;
5058 for (
auto *
C :
Node->Clones)
5059 NodeCallClones.
insert(
C->Call.cloneNo());
5062 for (
auto &FC : FuncCloneInfos) {
5067 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5072 auto &CallVector = UnassignedCallClones[
Node][
I];
5073 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5074 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5075 CallInfo CallClone = It->second;
5076 CallVector.push_back(CallClone);
5080 assert(
false &&
"Expected to find call in CallMap");
5083 for (
auto &MatchingCall :
Node->MatchingCalls) {
5084 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5085 CallInfo CallClone = It->second;
5086 CallVector.push_back(CallClone);
5090 assert(
false &&
"Expected to find call in CallMap");
5098 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5100 auto UpdateCalls = [&](ContextNode *
Node,
5101 DenseSet<const ContextNode *> &Visited,
5102 auto &&UpdateCalls) {
5103 auto Inserted = Visited.insert(Node);
5107 for (
auto *Clone :
Node->Clones)
5108 UpdateCalls(Clone, Visited, UpdateCalls);
5110 for (
auto &
Edge :
Node->CallerEdges)
5111 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5115 if (!
Node->hasCall() ||
Node->emptyContextIds())
5118 if (
Node->IsAllocation) {
5119 auto AT = allocTypeToUse(
Node->AllocTypes);
5125 !ContextIdToContextSizeInfos.empty()) {
5126 uint64_t TotalCold = 0;
5128 for (
auto Id :
Node->getContextIds()) {
5129 auto TypeI = ContextIdToAllocationType.find(Id);
5130 assert(TypeI != ContextIdToAllocationType.end());
5131 auto CSI = ContextIdToContextSizeInfos.find(Id);
5132 if (CSI != ContextIdToContextSizeInfos.end()) {
5133 for (
auto &
Info : CSI->second) {
5135 if (TypeI->second == AllocationType::Cold)
5136 TotalCold +=
Info.TotalSize;
5141 AT = AllocationType::Cold;
5143 updateAllocationCall(
Node->Call, AT);
5148 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5151 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5152 updateCall(
Node->Call, CalleeFunc);
5154 for (
auto &
Call :
Node->MatchingCalls)
5155 updateCall(
Call, CalleeFunc);
5159 if (!UnassignedCallClones.
contains(Node))
5161 DenseSet<unsigned> NodeCallClones;
5162 for (
auto *
C :
Node->Clones)
5163 NodeCallClones.
insert(
C->Call.cloneNo());
5165 auto &ClonedCalls = UnassignedCallClones[
Node];
5166 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5170 if (NodeCallClones.
contains(CloneNo))
5173 for (
auto &
Call : CallVector)
5174 updateCall(
Call, CalleeFunc);
5183 DenseSet<const ContextNode *> Visited;
5184 for (
auto &Entry : AllocationCallToContextNodeMap)
5185 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5199 FunctionsClonedThinBackend++;
5200 for (
unsigned I = 1;
I < NumClones;
I++) {
5201 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5203 FunctionClonesThinBackend++;
5206 for (
auto &BB : *NewF) {
5207 for (
auto &Inst : BB) {
5208 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5209 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5213 auto *PrevF = M.getFunction(Name);
5217 assert(PrevF->isDeclaration());
5218 NewF->takeName(PrevF);
5219 PrevF->replaceAllUsesWith(NewF);
5220 PrevF->eraseFromParent();
5222 NewF->setName(Name);
5225 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5228 if (!FuncToAliasMap.count(&
F))
5230 for (
auto *
A : FuncToAliasMap[&
F]) {
5232 auto *PrevA = M.getNamedAlias(Name);
5234 A->getType()->getPointerAddressSpace(),
5235 A->getLinkage(), Name, NewF);
5236 NewA->copyAttributesFrom(
A);
5240 assert(PrevA->isDeclaration());
5241 NewA->takeName(PrevA);
5242 PrevA->replaceAllUsesWith(NewA);
5243 PrevA->eraseFromParent();
5254 const Function *CallingFunc =
nullptr) {
5273 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5279 if (!SrcFileMD &&
F.isDeclaration()) {
5283 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5288 assert(SrcFileMD || OrigName ==
F.getName());
5290 StringRef SrcFile = M.getSourceFileName();
5302 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5303 F.getName().contains(
'.')) {
5304 OrigName =
F.getName().rsplit(
'.').first;
5313 assert(TheFnVI ||
F.isDeclaration());
5317bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5319 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5320 Symtab = std::make_unique<InstrProfSymtab>();
5331 if (
Error E = Symtab->create(M,
true,
false)) {
5332 std::string SymtabFailure =
toString(std::move(
E));
5333 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5346 auto MIBIter = AllocNode.
MIBs.begin();
5347 for (
auto &MDOp : MemProfMD->
operands()) {
5349 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5354 auto ContextIterBegin =
5358 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5360 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5365 if (LastStackContextId == *ContextIter)
5367 LastStackContextId = *ContextIter;
5368 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5378bool MemProfContextDisambiguation::applyImport(
Module &M) {
5385 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5387 for (
auto &
A :
M.aliases()) {
5388 auto *Aliasee =
A.getAliaseeObject();
5390 FuncToAliasMap[
F].insert(&
A);
5393 if (!initializeIndirectCallPromotionInfo(M))
5400 OptimizationRemarkEmitter ORE(&
F);
5403 bool ClonesCreated =
false;
5404 unsigned NumClonesCreated = 0;
5405 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
5415 if (ClonesCreated) {
5416 assert(NumClonesCreated == NumClones);
5423 ClonesCreated =
true;
5424 NumClonesCreated = NumClones;
5427 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5442 if (CalledFunction != CB->getCalledOperand() &&
5443 (!GA || CalledFunction != GA->getAliaseeObject())) {
5444 SkippedCallsCloning++;
5450 auto CalleeOrigName = CalledFunction->getName();
5451 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5454 if (!StackNode.
Clones[J])
5456 auto NewF =
M.getOrInsertFunction(
5458 CalledFunction->getFunctionType());
5472 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5473 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5475 <<
" assigned to call function clone "
5476 <<
ore::NV(
"Callee", NewF.getCallee()));
5490 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5494 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5496 "enable-import-metadata is needed to emit thinlto_src_module");
5497 StringRef SrcModule =
5500 if (GVS->modulePath() == SrcModule) {
5501 GVSummary = GVS.get();
5505 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5515 if (
FS->allocs().empty() &&
FS->callsites().empty())
5518 auto SI =
FS->callsites().begin();
5519 auto AI =
FS->allocs().begin();
5524 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5527 for (
auto CallsiteIt =
FS->callsites().rbegin();
5528 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5529 auto &Callsite = *CallsiteIt;
5533 if (!Callsite.StackIdIndices.empty())
5535 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5544 for (
auto &BB :
F) {
5545 for (
auto &
I : BB) {
5551 auto *CalledValue = CB->getCalledOperand();
5552 auto *CalledFunction = CB->getCalledFunction();
5553 if (CalledValue && !CalledFunction) {
5554 CalledValue = CalledValue->stripPointerCasts();
5561 assert(!CalledFunction &&
5562 "Expected null called function in callsite for alias");
5566 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5567 I.getMetadata(LLVMContext::MD_callsite));
5568 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5572 if (CB->getAttributes().hasFnAttr(
"memprof")) {
5574 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5575 ? AllocTypeColdThinBackend++
5576 : AllocTypeNotColdThinBackend++;
5577 OrigAllocsThinBackend++;
5578 AllocVersionsThinBackend++;
5579 if (!MaxAllocVersionsThinBackend)
5580 MaxAllocVersionsThinBackend = 1;
5587 auto &AllocNode = *(AI++);
5595 CloneFuncIfNeeded(AllocNode.Versions.size());
5597 OrigAllocsThinBackend++;
5598 AllocVersionsThinBackend += AllocNode.Versions.size();
5599 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5600 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5610 if (AllocNode.Versions.size() == 1 &&
5613 AllocationType::NotCold ||
5615 AllocationType::None);
5616 UnclonableAllocsThinBackend++;
5622 return Type == ((uint8_t)AllocationType::NotCold |
5623 (uint8_t)AllocationType::Cold);
5627 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5629 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5632 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5633 : AllocTypeNotColdThinBackend++;
5648 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5649 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5651 <<
" marked with memprof allocation attribute "
5652 <<
ore::NV(
"Attribute", AllocTypeString));
5654 }
else if (!CallsiteContext.empty()) {
5655 if (!CalledFunction) {
5659 assert(!CI || !CI->isInlineAsm());
5669 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5675 CloneFuncIfNeeded(NumClones);
5680 assert(SI !=
FS->callsites().end());
5681 auto &StackNode = *(
SI++);
5687 for (
auto StackId : CallsiteContext) {
5689 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
5695 CloneCallsite(StackNode, CB, CalledFunction);
5697 }
else if (CB->isTailCall() && CalledFunction) {
5700 ValueInfo CalleeVI =
5702 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5703 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5704 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5705 CloneCallsite(Callsite->second, CB, CalledFunction);
5712 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5722 for (
auto &BB :
F) {
5723 for (
auto &
I : BB) {
5726 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5727 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5735unsigned MemProfContextDisambiguation::recordICPInfo(
5740 uint32_t NumCandidates;
5741 uint64_t TotalCount;
5742 auto CandidateProfileData =
5743 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5745 if (CandidateProfileData.empty())
5751 bool ICPNeeded =
false;
5752 unsigned NumClones = 0;
5753 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5754 for (
const auto &Candidate : CandidateProfileData) {
5756 auto CalleeValueInfo =
5758 ImportSummary->getValueInfo(Candidate.Value);
5761 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5763 auto &StackNode = *(
SI++);
5768 [](
unsigned CloneNo) { return CloneNo != 0; });
5778 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5779 TotalCount, CallsiteInfoStartIndex});
5783void MemProfContextDisambiguation::performICP(
5785 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5787 OptimizationRemarkEmitter &ORE) {
5794 for (
auto &
Info : ICallAnalysisInfo) {
5797 auto TotalCount =
Info.TotalCount;
5798 unsigned NumPromoted = 0;
5799 unsigned NumClones = 0;
5801 for (
auto &Candidate :
Info.CandidateProfileData) {
5812 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5813 if (TargetFunction ==
nullptr ||
5821 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
5822 <<
"Memprof cannot promote indirect call: target with md5sum "
5823 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5832 const char *Reason =
nullptr;
5835 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
5836 <<
"Memprof cannot promote indirect call to "
5837 <<
ore::NV(
"TargetFunction", TargetFunction)
5838 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5849 CallBase *CBClone = CB;
5850 for (
unsigned J = 0; J < NumClones; J++) {
5859 TotalCount, isSamplePGO, &ORE);
5860 auto *TargetToUse = TargetFunction;
5863 if (StackNode.
Clones[J]) {
5882 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5884 <<
" promoted and assigned to call function clone "
5885 <<
ore::NV(
"Callee", TargetToUse));
5889 TotalCount -= Candidate.Count;
5894 CallBase *CBClone = CB;
5895 for (
unsigned J = 0; J < NumClones; J++) {
5900 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
5903 if (TotalCount != 0)
5905 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
5906 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
5911template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
5912bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
5914 dbgs() <<
"CCG before cloning:\n";
5918 exportToDot(
"postbuild");
5931 dbgs() <<
"CCG after cloning:\n";
5935 exportToDot(
"cloned");
5937 bool Changed = assignFunctions();
5940 dbgs() <<
"CCG after assigning function clones:\n";
5944 exportToDot(
"clonefuncassign");
5947 printTotalSizes(
errs());
5952bool MemProfContextDisambiguation::processModule(
5954 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
5959 return applyImport(M);
5972 ModuleCallsiteContextGraph CCG(M, OREGetter);
5973 return CCG.process();
5978 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
5983 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
5987 "-memprof-dot-scope=context requires -memprof-dot-context-id");
5991 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
5992 "-memprof-dot-context-id");
5993 if (ImportSummary) {
6003 auto ReadSummaryFile =
6005 if (!ReadSummaryFile) {
6012 if (!ImportSummaryForTestingOrErr) {
6018 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6019 ImportSummary = ImportSummaryForTesting.get();
6028 if (!processModule(M, OREGetter))
6045 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...