LLVM 20.0.0git
CoverageMappingWriter.cpp
Go to the documentation of this file.
1//===- CoverageMappingWriter.cpp - Code coverage mapping writer -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for writing coverage mapping data for
10// instrumentation based coverage.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
20#include "llvm/Support/LEB128.h"
22#include <algorithm>
23#include <cassert>
24#include <limits>
25#include <vector>
26
27using namespace llvm;
28using namespace coverage;
29
31 ArrayRef<std::string> Filenames)
32 : Filenames(Filenames) {
33#ifndef NDEBUG
34 StringSet<> NameSet;
35 for (StringRef Name : Filenames)
36 assert(NameSet.insert(Name).second && "Duplicate filename");
37#endif
38}
39
41 std::string FilenamesStr;
42 {
43 raw_string_ostream FilenamesOS{FilenamesStr};
44 for (const auto &Filename : Filenames) {
45 encodeULEB128(Filename.size(), FilenamesOS);
46 FilenamesOS << Filename;
47 }
48 }
49
50 SmallVector<uint8_t, 128> CompressedStr;
51 bool doCompression = Compress && compression::zlib::isAvailable() &&
53 if (doCompression)
54 compression::zlib::compress(arrayRefFromStringRef(FilenamesStr),
55 CompressedStr,
57
58 // ::= <num-filenames>
59 // <uncompressed-len>
60 // <compressed-len-or-zero>
61 // (<compressed-filenames> | <uncompressed-filenames>)
62 encodeULEB128(Filenames.size(), OS);
63 encodeULEB128(FilenamesStr.size(), OS);
64 encodeULEB128(doCompression ? CompressedStr.size() : 0U, OS);
65 OS << (doCompression ? toStringRef(CompressedStr) : StringRef(FilenamesStr));
66}
67
68namespace {
69
70/// Gather only the expressions that are used by the mapping
71/// regions in this function.
72class CounterExpressionsMinimizer {
75 std::vector<unsigned> AdjustedExpressionIDs;
76
77public:
78 CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
79 ArrayRef<CounterMappingRegion> MappingRegions)
80 : Expressions(Expressions) {
81 AdjustedExpressionIDs.resize(Expressions.size(), 0);
82 for (const auto &I : MappingRegions) {
83 mark(I.Count);
84 mark(I.FalseCount);
85 }
86 for (const auto &I : MappingRegions) {
87 gatherUsed(I.Count);
88 gatherUsed(I.FalseCount);
89 }
90 }
91
92 void mark(Counter C) {
93 if (!C.isExpression())
94 return;
95 unsigned ID = C.getExpressionID();
96 AdjustedExpressionIDs[ID] = 1;
97 mark(Expressions[ID].LHS);
98 mark(Expressions[ID].RHS);
99 }
100
101 void gatherUsed(Counter C) {
102 if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()])
103 return;
104 AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size();
105 const auto &E = Expressions[C.getExpressionID()];
106 UsedExpressions.push_back(E);
107 gatherUsed(E.LHS);
108 gatherUsed(E.RHS);
109 }
110
111 ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; }
112
113 /// Adjust the given counter to correctly transition from the old
114 /// expression ids to the new expression ids.
115 Counter adjust(Counter C) const {
116 if (C.isExpression())
117 C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]);
118 return C;
119 }
120};
121
122} // end anonymous namespace
123
124/// Encode the counter.
125///
126/// The encoding uses the following format:
127/// Low 2 bits - Tag:
128/// Counter::Zero(0) - A Counter with kind Counter::Zero
129/// Counter::CounterValueReference(1) - A counter with kind
130/// Counter::CounterValueReference
131/// Counter::Expression(2) + CounterExpression::Subtract(0) -
132/// A counter with kind Counter::Expression and an expression
133/// with kind CounterExpression::Subtract
134/// Counter::Expression(2) + CounterExpression::Add(1) -
135/// A counter with kind Counter::Expression and an expression
136/// with kind CounterExpression::Add
137/// Remaining bits - Counter/Expression ID.
138static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions,
139 Counter C) {
140 unsigned Tag = unsigned(C.getKind());
141 if (C.isExpression())
142 Tag += Expressions[C.getExpressionID()].Kind;
143 unsigned ID = C.getCounterID();
144 assert(ID <=
145 (std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits));
146 return Tag | (ID << Counter::EncodingTagBits);
147}
148
150 raw_ostream &OS) {
151 encodeULEB128(encodeCounter(Expressions, C), OS);
152}
153
155 // Check that we don't have any bogus regions.
156 assert(all_of(MappingRegions,
157 [](const CounterMappingRegion &CMR) {
158 return CMR.startLoc() <= CMR.endLoc();
159 }) &&
160 "Source region does not begin before it ends");
161
162 // Sort the regions in an ascending order by the file id and the starting
163 // location. Sort by region kinds to ensure stable order for tests.
164 llvm::stable_sort(MappingRegions, [](const CounterMappingRegion &LHS,
165 const CounterMappingRegion &RHS) {
166 if (LHS.FileID != RHS.FileID)
167 return LHS.FileID < RHS.FileID;
168 if (LHS.startLoc() != RHS.startLoc())
169 return LHS.startLoc() < RHS.startLoc();
170
171 // Put `Decision` before `Expansion`.
172 auto getKindKey = [](CounterMappingRegion::RegionKind Kind) {
173 return (Kind == CounterMappingRegion::MCDCDecisionRegion
174 ? 2 * CounterMappingRegion::ExpansionRegion - 1
175 : 2 * Kind);
176 };
177
178 return getKindKey(LHS.Kind) < getKindKey(RHS.Kind);
179 });
180
181 // Write out the fileid -> filename mapping.
182 encodeULEB128(VirtualFileMapping.size(), OS);
183 for (const auto &FileID : VirtualFileMapping)
184 encodeULEB128(FileID, OS);
185
186 // Write out the expressions.
187 CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions);
188 auto MinExpressions = Minimizer.getExpressions();
189 encodeULEB128(MinExpressions.size(), OS);
190 for (const auto &E : MinExpressions) {
191 writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS);
192 writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS);
193 }
194
195 // Write out the mapping regions.
196 // Split the regions into subarrays where each region in a
197 // subarray has a fileID which is the index of that subarray.
198 unsigned PrevLineStart = 0;
199 unsigned CurrentFileID = ~0U;
200 for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) {
201 if (I->FileID != CurrentFileID) {
202 // Ensure that all file ids have at least one mapping region.
203 assert(I->FileID == (CurrentFileID + 1));
204 // Find the number of regions with this file id.
205 unsigned RegionCount = 1;
206 for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J)
207 ++RegionCount;
208 // Start a new region sub-array.
209 encodeULEB128(RegionCount, OS);
210
211 CurrentFileID = I->FileID;
212 PrevLineStart = 0;
213 }
214 Counter Count = Minimizer.adjust(I->Count);
215 Counter FalseCount = Minimizer.adjust(I->FalseCount);
216 bool ParamsShouldBeNull = true;
217 switch (I->Kind) {
220 writeCounter(MinExpressions, Count, OS);
221 break;
223 assert(Count.isZero());
224 assert(I->ExpandedFileID <=
225 (std::numeric_limits<unsigned>::max() >>
227 // Mark an expansion region with a set bit that follows the counter tag,
228 // and pack the expanded file id into the remaining bits.
229 unsigned EncodedTagExpandedFileID =
231 (I->ExpandedFileID
233 encodeULEB128(EncodedTagExpandedFileID, OS);
234 break;
235 }
237 assert(Count.isZero());
238 encodeULEB128(unsigned(I->Kind)
240 OS);
241 break;
243 encodeULEB128(unsigned(I->Kind)
245 OS);
246 writeCounter(MinExpressions, Count, OS);
247 writeCounter(MinExpressions, FalseCount, OS);
248 break;
250 encodeULEB128(unsigned(I->Kind)
252 OS);
253 writeCounter(MinExpressions, Count, OS);
254 writeCounter(MinExpressions, FalseCount, OS);
255 {
256 // They are written as internal values plus 1.
257 const auto &BranchParams = I->getBranchParams();
258 ParamsShouldBeNull = false;
259 unsigned ID1 = BranchParams.ID + 1;
260 unsigned TID1 = BranchParams.Conds[true] + 1;
261 unsigned FID1 = BranchParams.Conds[false] + 1;
262 encodeULEB128(ID1, OS);
263 encodeULEB128(TID1, OS);
264 encodeULEB128(FID1, OS);
265 }
266 break;
268 encodeULEB128(unsigned(I->Kind)
270 OS);
271 {
272 const auto &DecisionParams = I->getDecisionParams();
273 ParamsShouldBeNull = false;
274 encodeULEB128(static_cast<unsigned>(DecisionParams.BitmapIdx), OS);
275 encodeULEB128(static_cast<unsigned>(DecisionParams.NumConditions), OS);
276 }
277 break;
278 }
279 assert(I->LineStart >= PrevLineStart);
280 encodeULEB128(I->LineStart - PrevLineStart, OS);
281 encodeULEB128(I->ColumnStart, OS);
282 assert(I->LineEnd >= I->LineStart);
283 encodeULEB128(I->LineEnd - I->LineStart, OS);
284 encodeULEB128(I->ColumnEnd, OS);
285 PrevLineStart = I->LineStart;
286 assert((!ParamsShouldBeNull || std::get_if<0>(&I->MCDCParams)) &&
287 "MCDCParams should be empty");
288 (void)ParamsShouldBeNull;
289 }
290 // Ensure that all file ids have at least one mapping region.
291 assert(CurrentFileID == (VirtualFileMapping.size() - 1));
292}
293
295 auto ByteSwap = [](uint64_t N) {
296 return support::endian::byte_swap<uint64_t, llvm::endianness::little>(N);
297 };
298
299 // Output a 64bit magic number.
300 auto Magic = ByteSwap(TestingFormatMagic);
301 OS.write(reinterpret_cast<char *>(&Magic), sizeof(Magic));
302
303 // Output a 64bit version field.
304 auto VersionLittle = ByteSwap(uint64_t(Version));
305 OS.write(reinterpret_cast<char *>(&VersionLittle), sizeof(VersionLittle));
306
307 // Output the ProfileNames data.
308 encodeULEB128(ProfileNamesData.size(), OS);
309 encodeULEB128(ProfileNamesAddr, OS);
310 OS << ProfileNamesData;
311
312 // Version2 adds an extra field to indicate the size of the
313 // CoverageMappingData.
315 encodeULEB128(CoverageMappingData.size(), OS);
316
317 // Coverage mapping data is expected to have an alignment of 8.
318 for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
319 OS.write(uint8_t(0));
320 OS << CoverageMappingData;
321
322 // Coverage records data is expected to have an alignment of 8.
323 for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
324 OS.write(uint8_t(0));
325 OS << CoverageRecordsData;
326}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void writeCounter(ArrayRef< CounterExpression > Expressions, Counter C, raw_ostream &OS)
static unsigned encodeCounter(ArrayRef< CounterExpression > Expressions, Counter C)
Encode the counter.
std::string Name
#define I(x, y, z)
Definition: MD5.cpp:58
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
size_t size() const
Definition: SmallVector.h:92
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
void write(raw_ostream &OS, bool Compress=true)
Write encoded filenames to the given output stream.
CoverageFilenamesSectionWriter(ArrayRef< std::string > Filenames)
void write(raw_ostream &OS)
Write encoded coverage mapping data to the given output stream.
void write(raw_ostream &OS, TestingFormatVersion Version=TestingFormatVersion::CurrentVersion)
Encode to the given output stream.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
uint64_t tell() const
tell - Return the current offset with the file.
Definition: raw_ostream.h:147
raw_ostream & write(unsigned char C)
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
void compress(ArrayRef< uint8_t > Input, SmallVectorImpl< uint8_t > &CompressedBuffer, int Level=DefaultCompression)
constexpr int BestSizeCompression
Definition: Compression.h:39
constexpr uint64_t TestingFormatMagic
StringRef toStringRef(const std::optional< DWARFFormValue > &V, StringRef Default={})
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
cl::opt< bool > DoInstrProfNameCompression
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A Counter mapping region associates a source range with a specific counter.
@ ExpansionRegion
An ExpansionRegion represents a file expansion region that associates a source range with the expansi...
@ MCDCDecisionRegion
A DecisionRegion represents a top-level boolean expression and is associated with a variable length b...
@ MCDCBranchRegion
A Branch Region can be extended to include IDs to facilitate MC/DC.
@ SkippedRegion
A SkippedRegion represents a source range with code that was skipped by a preprocessor or similar mea...
@ GapRegion
A GapRegion is like a CodeRegion, but its count is only set as the line execution count when its the ...
@ BranchRegion
A BranchRegion represents leaf-level boolean expressions and is associated with two counters,...
@ CodeRegion
A CodeRegion associates some code with a counter.
A Counter is an abstract value that describes how to compute the execution count for a region of code...
static const unsigned EncodingTagBits
static const unsigned EncodingCounterTagAndExpansionRegionTagBits
static Counter getExpression(unsigned ExpressionId)
Return the counter that corresponds to a specific addition counter expression.