LLVM 19.0.0git
TimeProfiler.cpp
Go to the documentation of this file.
1//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hierarchical time profiler.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/StringMap.h"
17#include "llvm/Support/JSON.h"
18#include "llvm/Support/Path.h"
21#include <algorithm>
22#include <cassert>
23#include <chrono>
24#include <memory>
25#include <mutex>
26#include <string>
27#include <vector>
28
29using namespace llvm;
30
31namespace {
32
33using std::chrono::duration;
34using std::chrono::duration_cast;
35using std::chrono::microseconds;
36using std::chrono::steady_clock;
38using std::chrono::time_point;
39using std::chrono::time_point_cast;
40
41struct TimeTraceProfilerInstances {
42 std::mutex Lock;
43 std::vector<TimeTraceProfiler *> List;
44};
45
46TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
47 static TimeTraceProfilerInstances Instances;
48 return Instances;
49}
50
51} // anonymous namespace
52
53// Per Thread instance
55
58}
59
60namespace {
61
62using ClockType = steady_clock;
63using TimePointType = time_point<ClockType>;
64using DurationType = duration<ClockType::rep, ClockType::period>;
65using CountAndDurationType = std::pair<size_t, DurationType>;
66using NameAndCountAndDurationType =
67 std::pair<std::string, CountAndDurationType>;
68
69} // anonymous namespace
70
71/// Represents an open or completed time section entry to be captured.
73 const TimePointType Start;
74 TimePointType End;
75 const std::string Name;
76 const std::string Detail;
77 const bool AsyncEvent = false;
78 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
79 std::string &&Dt, bool Ae)
80 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
81 Detail(std::move(Dt)), AsyncEvent(Ae) {}
82
83 // Calculate timings for FlameGraph. Cast time points to microsecond precision
84 // rather than casting duration. This avoids truncation issues causing inner
85 // scopes overruning outer scopes.
86 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
87 return (time_point_cast<microseconds>(Start) -
88 time_point_cast<microseconds>(StartTime))
89 .count();
90 }
91
92 ClockType::rep getFlameGraphDurUs() const {
93 return (time_point_cast<microseconds>(End) -
94 time_point_cast<microseconds>(Start))
95 .count();
96 }
97};
98
101 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
102 ProcName(ProcName), Pid(sys::Process::getProcessId()),
105 }
106
108 llvm::function_ref<std::string()> Detail,
109 bool AsyncEvent = false) {
110 Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
111 ClockType::now(), TimePointType(), std::move(Name), Detail(),
112 AsyncEvent));
113 return Stack.back().get();
114 }
115
116 void end() {
117 assert(!Stack.empty() && "Must call begin() first");
118 end(*Stack.back().get());
119 }
120
122 assert(!Stack.empty() && "Must call begin() first");
123 E.End = ClockType::now();
124
125 // Calculate duration at full precision for overall counts.
126 DurationType Duration = E.End - E.Start;
127
128 // Only include sections longer or equal to TimeTraceGranularity msec.
129 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
130 Entries.emplace_back(E);
131
132 // Track total time taken by each "name", but only the topmost levels of
133 // them; e.g. if there's a template instantiation that instantiates other
134 // templates from within, we only want to add the topmost one. "topmost"
135 // happens to be the ones that don't have any currently open entries above
136 // itself.
138 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
139 return Val->Name == E.Name;
140 })) {
141 auto &CountAndTotal = CountAndTotalPerName[E.Name];
142 CountAndTotal.first++;
143 CountAndTotal.second += Duration;
144 };
145
147 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
148 return Val.get() == &E;
149 });
150 }
151
152 // Write events from this TimeTraceProfilerInstance and
153 // ThreadTimeTraceProfilerInstances.
155 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
156 auto &Instances = getTimeTraceProfilerInstances();
157 std::lock_guard<std::mutex> Lock(Instances.Lock);
158 assert(Stack.empty() &&
159 "All profiler sections should be ended when calling write");
160 assert(llvm::all_of(Instances.List,
161 [](const auto &TTP) { return TTP->Stack.empty(); }) &&
162 "All profiler sections should be ended when calling write");
163
164 json::OStream J(OS);
165 J.objectBegin();
166 J.attributeBegin("traceEvents");
167 J.arrayBegin();
168
169 // Emit all events for the main flame graph.
170 auto writeEvent = [&](const auto &E, uint64_t Tid) {
171 auto StartUs = E.getFlameGraphStartUs(StartTime);
172 auto DurUs = E.getFlameGraphDurUs();
173
174 J.object([&] {
175 J.attribute("pid", Pid);
176 J.attribute("tid", int64_t(Tid));
177 J.attribute("ts", StartUs);
178 if (E.AsyncEvent) {
179 J.attribute("cat", E.Name);
180 J.attribute("ph", "b");
181 J.attribute("id", 0);
182 } else {
183 J.attribute("ph", "X");
184 J.attribute("dur", DurUs);
185 }
186 J.attribute("name", E.Name);
187 if (!E.Detail.empty()) {
188 J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
189 }
190 });
191
192 if (E.AsyncEvent) {
193 J.object([&] {
194 J.attribute("pid", Pid);
195 J.attribute("tid", int64_t(Tid));
196 J.attribute("ts", StartUs + DurUs);
197 J.attribute("cat", E.Name);
198 J.attribute("ph", "e");
199 J.attribute("id", 0);
200 J.attribute("name", E.Name);
201 });
202 }
203 };
204 for (const TimeTraceProfilerEntry &E : Entries)
205 writeEvent(E, this->Tid);
206 for (const TimeTraceProfiler *TTP : Instances.List)
207 for (const TimeTraceProfilerEntry &E : TTP->Entries)
208 writeEvent(E, TTP->Tid);
209
210 // Emit totals by section name as additional "thread" events, sorted from
211 // longest one.
212 // Find highest used thread id.
213 uint64_t MaxTid = this->Tid;
214 for (const TimeTraceProfiler *TTP : Instances.List)
215 MaxTid = std::max(MaxTid, TTP->Tid);
216
217 // Combine all CountAndTotalPerName from threads into one.
218 StringMap<CountAndDurationType> AllCountAndTotalPerName;
219 auto combineStat = [&](const auto &Stat) {
220 StringRef Key = Stat.getKey();
221 auto Value = Stat.getValue();
222 auto &CountAndTotal = AllCountAndTotalPerName[Key];
223 CountAndTotal.first += Value.first;
224 CountAndTotal.second += Value.second;
225 };
226 for (const auto &Stat : CountAndTotalPerName)
227 combineStat(Stat);
228 for (const TimeTraceProfiler *TTP : Instances.List)
229 for (const auto &Stat : TTP->CountAndTotalPerName)
230 combineStat(Stat);
231
232 std::vector<NameAndCountAndDurationType> SortedTotals;
233 SortedTotals.reserve(AllCountAndTotalPerName.size());
234 for (const auto &Total : AllCountAndTotalPerName)
235 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
236
237 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
238 const NameAndCountAndDurationType &B) {
239 return A.second.second > B.second.second;
240 });
241
242 // Report totals on separate threads of tracing file.
243 uint64_t TotalTid = MaxTid + 1;
244 for (const NameAndCountAndDurationType &Total : SortedTotals) {
245 auto DurUs = duration_cast<microseconds>(Total.second.second).count();
246 auto Count = AllCountAndTotalPerName[Total.first].first;
247
248 J.object([&] {
249 J.attribute("pid", Pid);
250 J.attribute("tid", int64_t(TotalTid));
251 J.attribute("ph", "X");
252 J.attribute("ts", 0);
253 J.attribute("dur", DurUs);
254 J.attribute("name", "Total " + Total.first);
255 J.attributeObject("args", [&] {
256 J.attribute("count", int64_t(Count));
257 J.attribute("avg ms", int64_t(DurUs / Count / 1000));
258 });
259 });
260
261 ++TotalTid;
262 }
263
264 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
265 StringRef arg) {
266 J.object([&] {
267 J.attribute("cat", "");
268 J.attribute("pid", Pid);
269 J.attribute("tid", int64_t(Tid));
270 J.attribute("ts", 0);
271 J.attribute("ph", "M");
272 J.attribute("name", Name);
273 J.attributeObject("args", [&] { J.attribute("name", arg); });
274 });
275 };
276
277 writeMetadataEvent("process_name", Tid, ProcName);
278 writeMetadataEvent("thread_name", Tid, ThreadName);
279 for (const TimeTraceProfiler *TTP : Instances.List)
280 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
281
282 J.arrayEnd();
283 J.attributeEnd();
284
285 // Emit the absolute time when this TimeProfiler started.
286 // This can be used to combine the profiling data from
287 // multiple processes and preserve actual time intervals.
288 J.attribute("beginningOfTime",
289 time_point_cast<microseconds>(BeginningOfTime)
290 .time_since_epoch()
291 .count());
292
293 J.objectEnd();
294 }
295
299 // System clock time when the session was begun.
300 const time_point<system_clock> BeginningOfTime;
301 // Profiling clock time when the session was begun.
302 const TimePointType StartTime;
303 const std::string ProcName;
307
308 // Minimum time granularity (in microseconds)
309 const unsigned TimeTraceGranularity;
310};
311
312void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
313 StringRef ProcName) {
315 "Profiler should not be initialized");
317 TimeTraceGranularity, llvm::sys::path::filename(ProcName));
318}
319
320// Removes all TimeTraceProfilerInstances.
321// Called from main thread.
325
326 auto &Instances = getTimeTraceProfilerInstances();
327 std::lock_guard<std::mutex> Lock(Instances.Lock);
328 for (auto *TTP : Instances.List)
329 delete TTP;
330 Instances.List.clear();
331}
332
333// Finish TimeTraceProfilerInstance on a worker thread.
334// This doesn't remove the instance, just moves the pointer to global vector.
336 auto &Instances = getTimeTraceProfilerInstances();
337 std::lock_guard<std::mutex> Lock(Instances.Lock);
338 Instances.List.push_back(TimeTraceProfilerInstance);
340}
341
344 "Profiler object can't be null");
346}
347
349 StringRef FallbackFileName) {
351 "Profiler object can't be null");
352
353 std::string Path = PreferredFileName.str();
354 if (Path.empty()) {
355 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
356 Path += ".time-trace";
357 }
358
359 std::error_code EC;
361 if (EC)
362 return createStringError(EC, "Could not open " + Path);
363
365 return Error::success();
366}
367
369 StringRef Detail) {
370 if (TimeTraceProfilerInstance != nullptr)
372 std::string(Name), [&]() { return std::string(Detail); }, false);
373 return nullptr;
374}
375
378 llvm::function_ref<std::string()> Detail) {
379 if (TimeTraceProfilerInstance != nullptr)
380 return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false);
381 return nullptr;
382}
383
385 StringRef Detail) {
386 if (TimeTraceProfilerInstance != nullptr)
388 std::string(Name), [&]() { return std::string(Detail); }, true);
389 return nullptr;
390}
391
393 if (TimeTraceProfilerInstance != nullptr)
395}
396
398 if (TimeTraceProfilerInstance != nullptr)
400}
This file defines the StringMap class.
static sys::TimePoint< std::chrono::seconds > now(bool Deterministic)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_THREAD_LOCAL
\macro LLVM_THREAD_LOCAL A thread-local storage specifier which can be used with globals,...
Definition: Compiler.h:569
std::string Name
This file supports working with JSON data.
Provides a library for accessing information about this process and other processes on the operating ...
const NodeList & List
Definition: RDFGraph.cpp:201
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static LLVM_THREAD_LOCAL TimeTraceProfiler * TimeTraceProfilerInstance
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:334
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
unsigned size() const
Definition: StringMap.h:104
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:128
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
LLVM Value Representation.
Definition: Value.h:74
An efficient, type-erasing, non-owning reference to a callable.
json::OStream allows writing well-formed JSON without materializing all structures as json::Value ahe...
Definition: JSON.h:977
void object(Block Contents)
Emit an object whose elements are emitted in the provided Block.
Definition: JSON.h:1007
void attributeObject(llvm::StringRef Key, Block Contents)
Emit an attribute whose value is an object with attributes from the Block.
Definition: JSON.h:1040
void attributeBegin(llvm::StringRef Key)
Definition: JSON.cpp:882
void attribute(llvm::StringRef Key, const Value &Contents)
Emit an attribute whose value is self-contained (number, vector<int> etc).
Definition: JSON.h:1032
void arrayBegin()
Definition: JSON.cpp:844
void objectBegin()
Definition: JSON.cpp:863
void attributeEnd()
Definition: JSON.cpp:902
void objectEnd()
Definition: JSON.cpp:871
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:470
An abstract base class for streams implementations that also support a pwrite operation.
Definition: raw_ostream.h:444
A collection of legacy interfaces for querying information about the current executing process.
Definition: Process.h:43
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:768
StringRef filename(StringRef path, Style style=Style::native)
Get filename.
Definition: Path.cpp:578
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, StringRef ProcName)
Initialize the time trace profiler.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1258
TimeTraceProfiler * getTimeTraceProfilerInstance()
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void timeTraceProfilerFinishThread()
Finish a time trace profiler running on a worker thread.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1745
void timeTraceProfilerEnd()
Manually end the last time section.
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
Definition: Threading.cpp:39
TimeTraceProfilerEntry * timeTraceAsyncProfilerBegin(StringRef Name, StringRef Detail)
Manually begin a time section, with the given Name and Detail.
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
Definition: Threading.cpp:33
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1858
void timeTraceProfilerCleanup()
Cleanup the time trace profiler, if it was initialized.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
void timeTraceProfilerWrite(raw_pwrite_stream &OS)
Write profiling data to output stream.
TimeTraceProfilerEntry * timeTraceProfilerBegin(StringRef Name, StringRef Detail)
Manually begin a time section, with the given Name and Detail.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Represents an open or completed time section entry to be captured.
const TimePointType Start
ClockType::rep getFlameGraphDurUs() const
TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt, bool Ae)
ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const
const sys::Process::Pid Pid
void write(raw_pwrite_stream &OS)
StringMap< CountAndDurationType > CountAndTotalPerName
const unsigned TimeTraceGranularity
TimeTraceProfiler(unsigned TimeTraceGranularity=0, StringRef ProcName="")
TimeTraceProfilerEntry * begin(std::string Name, llvm::function_ref< std::string()> Detail, bool AsyncEvent=false)
const time_point< system_clock > BeginningOfTime
SmallVector< std::unique_ptr< TimeTraceProfilerEntry >, 16 > Stack
SmallString< 0 > ThreadName
const std::string ProcName
SmallVector< TimeTraceProfilerEntry, 128 > Entries
const TimePointType StartTime
void end(TimeTraceProfilerEntry &E)