LLVM  14.0.0git
Profile.cpp
Go to the documentation of this file.
1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Defines the XRay Profile class representing the latency profile generated by
10 // XRay's profiling mode.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/XRay/Profile.h"
14 
16 #include "llvm/Support/Error.h"
18 #include "llvm/XRay/Trace.h"
19 #include <deque>
20 #include <memory>
21 
22 namespace llvm {
23 namespace xray {
24 
26  // We need to re-create all the tries from the original (O), into the current
27  // Profile being initialized, through the Block instances we see.
28  for (const auto &Block : O) {
29  Blocks.push_back({Block.Thread, {}});
30  auto &B = Blocks.back();
31  for (const auto &PathData : Block.PathData)
32  B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
33  PathData.second});
34  }
35 }
36 
38  Profile P = O;
39  *this = std::move(P);
40  return *this;
41 }
42 
43 namespace {
44 
45 struct BlockHeader {
49 };
50 
51 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
52  uint64_t &Offset) {
53  BlockHeader H;
54  uint64_t CurrentOffset = Offset;
55  H.Size = Extractor.getU32(&Offset);
56  if (Offset == CurrentOffset)
57  return make_error<StringError>(
58  Twine("Error parsing block header size at offset '") +
59  Twine(CurrentOffset) + "'",
60  std::make_error_code(std::errc::invalid_argument));
61  CurrentOffset = Offset;
62  H.Number = Extractor.getU32(&Offset);
63  if (Offset == CurrentOffset)
64  return make_error<StringError>(
65  Twine("Error parsing block header number at offset '") +
66  Twine(CurrentOffset) + "'",
67  std::make_error_code(std::errc::invalid_argument));
68  CurrentOffset = Offset;
69  H.Thread = Extractor.getU64(&Offset);
70  if (Offset == CurrentOffset)
71  return make_error<StringError>(
72  Twine("Error parsing block header thread id at offset '") +
73  Twine(CurrentOffset) + "'",
74  std::make_error_code(std::errc::invalid_argument));
75  return H;
76 }
77 
78 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
79  uint64_t &Offset) {
80  // We're reading a sequence of int32_t's until we find a 0.
81  std::vector<Profile::FuncID> Path;
82  auto CurrentOffset = Offset;
83  int32_t FuncId;
84  do {
85  FuncId = Extractor.getSigned(&Offset, 4);
86  if (CurrentOffset == Offset)
87  return make_error<StringError>(
88  Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
89  std::make_error_code(std::errc::invalid_argument));
90  CurrentOffset = Offset;
91  Path.push_back(FuncId);
92  } while (FuncId != 0);
93  return std::move(Path);
94 }
95 
96 static Expected<Profile::Data> readData(DataExtractor &Extractor,
97  uint64_t &Offset) {
98  // We expect a certain number of elements for Data:
99  // - A 64-bit CallCount
100  // - A 64-bit CumulativeLocalTime counter
102  auto CurrentOffset = Offset;
103  D.CallCount = Extractor.getU64(&Offset);
104  if (CurrentOffset == Offset)
105  return make_error<StringError>(
106  Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
107  "'",
108  std::make_error_code(std::errc::invalid_argument));
109  CurrentOffset = Offset;
110  D.CumulativeLocalTime = Extractor.getU64(&Offset);
111  if (CurrentOffset == Offset)
112  return make_error<StringError>(
113  Twine("Error parsing cumulative local time at offset '") +
114  Twine(CurrentOffset) + "'",
115  std::make_error_code(std::errc::invalid_argument));
116  return D;
117 }
118 
119 } // namespace
120 
122  if (B.PathData.empty())
123  return make_error<StringError>(
124  "Block may not have empty path data.",
125  std::make_error_code(std::errc::invalid_argument));
126 
127  Blocks.emplace_back(std::move(B));
128  return Error::success();
129 }
130 
132  auto It = PathIDMap.find(P);
133  if (It == PathIDMap.end())
134  return make_error<StringError>(
135  Twine("PathID not found: ") + Twine(P),
136  std::make_error_code(std::errc::invalid_argument));
137  std::vector<Profile::FuncID> Path;
138  for (auto Node = It->second; Node; Node = Node->Caller)
139  Path.push_back(Node->Func);
140  return std::move(Path);
141 }
142 
144  if (P.empty())
145  return 0;
146 
147  auto RootToLeafPath = reverse(P);
148 
149  // Find the root.
150  auto It = RootToLeafPath.begin();
151  auto PathRoot = *It++;
152  auto RootIt =
153  find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
154 
155  // If we've not seen this root before, remember it.
156  TrieNode *Node = nullptr;
157  if (RootIt == Roots.end()) {
158  NodeStorage.emplace_back();
159  Node = &NodeStorage.back();
160  Node->Func = PathRoot;
161  Roots.push_back(Node);
162  } else {
163  Node = *RootIt;
164  }
165 
166  // Now traverse the path, re-creating if necessary.
167  while (It != RootToLeafPath.end()) {
168  auto NodeFuncID = *It++;
169  auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
170  return N->Func == NodeFuncID;
171  });
172  if (CalleeIt == Node->Callees.end()) {
173  NodeStorage.emplace_back();
174  auto NewNode = &NodeStorage.back();
175  NewNode->Func = NodeFuncID;
176  NewNode->Caller = Node;
177  Node->Callees.push_back(NewNode);
178  Node = NewNode;
179  } else {
180  Node = *CalleeIt;
181  }
182  }
183 
184  // At this point, Node *must* be pointing at the leaf.
185  assert(Node->Func == P.front());
186  if (Node->ID == 0) {
187  Node->ID = NextID++;
188  PathIDMap.insert({Node->ID, Node});
189  }
190  return Node->ID;
191 }
192 
194  Profile Merged;
195  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
196  using PathDataMapPtr = std::unique_ptr<PathDataMap>;
197  using PathDataVector = decltype(Profile::Block::PathData);
198  using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
199  ThreadProfileIndexMap ThreadProfileIndex;
200 
201  for (const auto &P : {std::ref(L), std::ref(R)})
202  for (const auto &Block : P.get()) {
203  ThreadProfileIndexMap::iterator It;
204  std::tie(It, std::ignore) = ThreadProfileIndex.insert(
205  {Block.Thread, PathDataMapPtr{new PathDataMap()}});
206  for (const auto &PathAndData : Block.PathData) {
207  auto &PathID = PathAndData.first;
208  auto &Data = PathAndData.second;
209  auto NewPathID =
210  Merged.internPath(cantFail(P.get().expandPath(PathID)));
211  PathDataMap::iterator PathDataIt;
212  bool Inserted;
213  std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
214  if (!Inserted) {
215  auto &ExistingData = PathDataIt->second;
216  ExistingData.CallCount += Data.CallCount;
217  ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
218  }
219  }
220  }
221 
222  for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
223  PathDataVector PathAndData;
224  PathAndData.reserve(IndexedThreadBlock.second->size());
225  copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
226  cantFail(
227  Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
228  }
229  return Merged;
230 }
231 
233  Profile Merged;
234  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
235  PathDataMap PathData;
236  using PathDataVector = decltype(Profile::Block::PathData);
237  for (const auto &P : {std::ref(L), std::ref(R)})
238  for (const auto &Block : P.get())
239  for (const auto &PathAndData : Block.PathData) {
240  auto &PathId = PathAndData.first;
241  auto &Data = PathAndData.second;
242  auto NewPathID =
243  Merged.internPath(cantFail(P.get().expandPath(PathId)));
244  PathDataMap::iterator PathDataIt;
245  bool Inserted;
246  std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
247  if (!Inserted) {
248  auto &ExistingData = PathDataIt->second;
249  ExistingData.CallCount += Data.CallCount;
250  ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
251  }
252  }
253 
254  // In the end there's a single Block, for thread 0.
255  PathDataVector Block;
256  Block.reserve(PathData.size());
257  copy(PathData, std::back_inserter(Block));
258  cantFail(Merged.addBlock({0, std::move(Block)}));
259  return Merged;
260 }
261 
264  if (!FdOrErr)
265  return FdOrErr.takeError();
266 
267  uint64_t FileSize;
268  if (auto EC = sys::fs::file_size(Filename, FileSize))
269  return make_error<StringError>(
270  Twine("Cannot get filesize of '") + Filename + "'", EC);
271 
272  std::error_code EC;
273  sys::fs::mapped_file_region MappedFile(
274  *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
275  EC);
276  sys::fs::closeFile(*FdOrErr);
277  if (EC)
278  return make_error<StringError>(
279  Twine("Cannot mmap profile '") + Filename + "'", EC);
280  StringRef Data(MappedFile.data(), MappedFile.size());
281 
282  Profile P;
283  uint64_t Offset = 0;
284  DataExtractor Extractor(Data, true, 8);
285 
286  // For each block we get from the file:
287  while (Offset != MappedFile.size()) {
288  auto HeaderOrError = readBlockHeader(Extractor, Offset);
289  if (!HeaderOrError)
290  return HeaderOrError.takeError();
291 
292  // TODO: Maybe store this header information for each block, even just for
293  // debugging?
294  const auto &Header = HeaderOrError.get();
295 
296  // Read in the path data.
297  auto PathOrError = readPath(Extractor, Offset);
298  if (!PathOrError)
299  return PathOrError.takeError();
300  const auto &Path = PathOrError.get();
301 
302  // For each path we encounter, we should intern it to get a PathID.
303  auto DataOrError = readData(Extractor, Offset);
304  if (!DataOrError)
305  return DataOrError.takeError();
306  auto &Data = DataOrError.get();
307 
308  if (auto E =
309  P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
310  {{P.internPath(Path), std::move(Data)}}}))
311  return std::move(E);
312  }
313 
314  return P;
315 }
316 
317 namespace {
318 
319 struct StackEntry {
321  Profile::FuncID FuncId;
322 };
323 
324 } // namespace
325 
327  Profile P;
328 
329  // The implementation of the algorithm re-creates the execution of
330  // the functions based on the trace data. To do this, we set up a number of
331  // data structures to track the execution context of every thread in the
332  // Trace.
335  ThreadPathData;
336 
337  // We then do a pass through the Trace to account data on a per-thread-basis.
338  for (const auto &E : T) {
339  auto &TSD = ThreadStacks[E.TId];
340  switch (E.Type) {
341  case RecordTypes::ENTER:
342  case RecordTypes::ENTER_ARG:
343 
344  // Push entries into the function call stack.
345  TSD.push_back({E.TSC, E.FuncId});
346  break;
347 
348  case RecordTypes::EXIT:
349  case RecordTypes::TAIL_EXIT:
350 
351  // Exits cause some accounting to happen, based on the state of the stack.
352  // For each function we pop off the stack, we take note of the path and
353  // record the cumulative state for this path. As we're doing this, we
354  // intern the path into the Profile.
355  while (!TSD.empty()) {
356  auto Top = TSD.back();
357  auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
359  transform(reverse(TSD), std::back_inserter(Path),
360  std::mem_fn(&StackEntry::FuncId));
361  auto InternedPath = P.internPath(Path);
362  auto &TPD = ThreadPathData[E.TId][InternedPath];
363  ++TPD.CallCount;
364  TPD.CumulativeLocalTime += FunctionLocalTime;
365  TSD.pop_back();
366 
367  // If we've matched the corresponding entry event for this function,
368  // then we exit the loop.
369  if (Top.FuncId == E.FuncId)
370  break;
371 
372  // FIXME: Consider the intermediate times and the cumulative tree time
373  // as well.
374  }
375 
376  break;
377 
378  case RecordTypes::CUSTOM_EVENT:
379  case RecordTypes::TYPED_EVENT:
380  // TODO: Support an extension point to allow handling of custom and typed
381  // events in profiles.
382  break;
383  }
384  }
385 
386  // Once we've gone through the Trace, we now create one Block per thread in
387  // the Profile.
388  for (const auto &ThreadPaths : ThreadPathData) {
389  const auto &TID = ThreadPaths.first;
390  const auto &PathsData = ThreadPaths.second;
391  if (auto E = P.addBlock({
392  TID,
393  std::vector<std::pair<Profile::PathID, Profile::Data>>(
394  PathsData.begin(), PathsData.end()),
395  }))
396  return std::move(E);
397  }
398 
399  return P;
400 }
401 
402 } // namespace xray
403 } // namespace llvm
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
FileSystem.h
llvm::xray::loadProfile
Expected< Profile > loadProfile(StringRef Filename)
This function will attempt to load an XRay Profiling Mode profile from the provided |Filename|.
Definition: Profile.cpp:262
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Thread
uint64_t Thread
Definition: Profile.cpp:48
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Timestamp
uint64_t Timestamp
Definition: Profile.cpp:320
llvm::xray::Profile::operator=
Profile & operator=(Profile &&O) noexcept
Definition: Profile.h:93
llvm::Error::success
static ErrorSuccess success()
Create a success value.
Definition: Error.h:331
Error.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1602
T
#define T
Definition: Mips16ISelLowering.cpp:341
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::xray::profileFromTrace
Expected< Profile > profileFromTrace(const Trace &T)
This function takes a Trace and creates a Profile instance from it.
Definition: Profile.cpp:326
llvm::xray::Profile::Profile
Profile()=default
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::xray::mergeProfilesByStack
Profile mergeProfilesByStack(const Profile &L, const Profile &R)
This algorithm will merge two Profile instances into a single Profile instance, aggregating blocks by...
Definition: Profile.cpp:232
llvm::xray::Profile::Block
Definition: Profile.h:62
llvm::sys::fs::openNativeFileForRead
Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
llvm::xray::Profile::addBlock
Error addBlock(Block &&B)
Appends a fully-formed Block instance into the Profile.
Definition: Profile.cpp:121
Profile.h
readonly
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from where P can be anything The alignment inference code cannot handle loads from globals in static non mode because it doesn t look through the extra dyld stub load If you try vec_align ll without relocation you ll see what I mean We should lower which eliminates a constant pool load For float z nounwind readonly
Definition: README-SSE.txt:421
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
FuncId
Profile::FuncID FuncId
Definition: Profile.cpp:321
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Size
uint32_t Size
Definition: Profile.cpp:46
llvm::sys::fs::closeFile
std::error_code closeFile(file_t &F)
Close the file object.
llvm::xray::Profile::internPath
PathID internPath(ArrayRef< FuncID > P)
The stack represented in |P| must be in stack order (leaf to root).
Definition: Profile.cpp:143
llvm::xray::Profile
Profile instances are thread-compatible.
Definition: Profile.h:51
llvm::xray::Profile::PathID
unsigned PathID
Definition: Profile.h:54
llvm::sys::fs::mapped_file_region::data
char * data() const
Definition: Path.cpp:1158
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::cantFail
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:737
Number
uint32_t Number
Definition: Profile.cpp:47
uint32_t
Trace.h
Node
Definition: ItaniumDemangle.h:235
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::xray::Profile::Block::PathData
std::vector< std::pair< PathID, Data > > PathData
Definition: Profile.h:64
llvm::make_error_code
std::error_code make_error_code(BitcodeError E)
Definition: BitcodeReader.h:270
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1578
llvm::xray::Profile::Block::Thread
ThreadID Thread
Definition: Profile.h:63
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::xray::mergeProfilesByThread
Profile mergeProfilesByThread(const Profile &L, const Profile &R)
This algorithm will merge two Profile instances into a single Profile instance, aggregating blocks by...
Definition: Profile.cpp:193
DataExtractor.h
llvm::sys::fs::mapped_file_region::size
size_t size() const
Definition: Path.cpp:1153
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::Expected::takeError
Error takeError()
Take ownership of the stored error.
Definition: Error.h:589
llvm::sys::fs::mapped_file_region
This class represents a memory mapped file.
Definition: FileSystem.h:1244
llvm::sys::fs::file_size
std::error_code file_size(const Twine &Path, uint64_t &Result)
Get file size.
Definition: FileSystem.h:691
llvm::DataExtractor
Definition: DataExtractor.h:41
N
#define N
llvm::xray::Trace
A Trace object represents the records that have been loaded from XRay log files generated by instrume...
Definition: Trace.h:46
llvm::xray::Profile::expandPath
Expected< std::vector< FuncID > > expandPath(PathID P) const
Provides a sequence of function IDs from a previously interned PathID.
Definition: Profile.cpp:131
llvm::AbsoluteDifference
std::enable_if_t< std::is_unsigned< T >::value, T > AbsoluteDifference(T X, T Y)
Subtract two unsigned integers, X and Y, of type T and return the absolute value of the result.
Definition: MathExtras.h:794