LLVM  12.0.0git
TypeStreamMerger.cpp
Go to the documentation of this file.
1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/SmallString.h"
11 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Support/Error.h"
20 
21 using namespace llvm;
22 using namespace llvm::codeview;
23 
24 static inline size_t slotForIndex(TypeIndex Idx) {
25  assert(!Idx.isSimple() && "simple type indices have no slots");
27 }
28 
29 namespace {
30 
31 /// Implementation of CodeView type stream merging.
32 ///
33 /// A CodeView type stream is a series of records that reference each other
34 /// through type indices. A type index is either "simple", meaning it is less
35 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36 /// refers to a prior type record in the current stream. The type index of a
37 /// record is equal to the number of records before it in the stream plus
38 /// 0x1000.
39 ///
40 /// Type records are only allowed to use type indices smaller than their own, so
41 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in
42 /// the type graph of the source program are resolved with forward declarations
43 /// of composite types. This class implements the following type stream merging
44 /// algorithm, which relies on this DAG structure:
45 ///
46 /// - Begin with a new empty stream, and a new empty hash table that maps from
47 /// type record contents to new type index.
48 /// - For each new type stream, maintain a map from source type index to
49 /// destination type index.
50 /// - For each record, copy it and rewrite its type indices to be valid in the
51 /// destination type stream.
52 /// - If the new type record is not already present in the destination stream
53 /// hash table, append it to the destination type stream, assign it the next
54 /// type index, and update the two hash tables.
55 /// - If the type record already exists in the destination stream, discard it
56 /// and update the type index map to forward the source type index to the
57 /// existing destination type index.
58 ///
59 /// As an additional complication, type stream merging actually produces two
60 /// streams: an item (or IPI) stream and a type stream, as this is what is
61 /// actually stored in the final PDB. We choose which records go where by
62 /// looking at the record kind.
63 class TypeStreamMerger {
64 public:
65  explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66  : IndexMap(SourceToDest) {
67  // When dealing with precompiled headers objects, all data in SourceToDest
68  // belongs to the precompiled headers object, and is assumed to be already
69  // remapped to the target PDB. Any forthcoming type that will be merged in
70  // might potentially back-reference this data. We also don't want to resolve
71  // twice the types in the precompiled object.
72  CurIndex += SourceToDest.size();
73  }
74 
75  static const TypeIndex Untranslated;
76 
77  // Local hashing entry points
78  Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79  MergingTypeTableBuilder &DestTypes,
80  const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S);
82  ArrayRef<TypeIndex> TypeSourceToDest,
83  const CVTypeArray &Ids);
85  const CVTypeArray &Types);
86 
87  // Global hashing entry points
88  Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
89  GlobalTypeTableBuilder &DestTypes,
90  const CVTypeArray &IdsAndTypes,
94  ArrayRef<TypeIndex> TypeSourceToDest,
95  const CVTypeArray &Ids,
100 
101 private:
102  Error doit(const CVTypeArray &Types);
103 
104  Error remapAllTypes(const CVTypeArray &Types);
105 
106  Error remapType(const CVType &Type);
107 
108  void addMapping(TypeIndex Idx);
109 
110  inline bool remapTypeIndex(TypeIndex &Idx) {
111  // If we're mapping a pure index stream, then IndexMap only contains
112  // mappings from OldIdStream -> NewIdStream, in which case we will need to
113  // use the special mapping from OldTypeStream -> NewTypeStream which was
114  // computed externally. Regardless, we use this special map if and only if
115  // we are doing an id-only mapping.
116  if (!hasTypeStream())
117  return remapIndex(Idx, TypeLookup);
118 
119  assert(TypeLookup.empty());
120  return remapIndex(Idx, IndexMap);
121  }
122  inline bool remapItemIndex(TypeIndex &Idx) {
123  assert(hasIdStream());
124  return remapIndex(Idx, IndexMap);
125  }
126 
127  bool hasTypeStream() const {
128  return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
129  }
130 
131  bool hasIdStream() const {
132  return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
133  }
134 
135  ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
136  MutableArrayRef<uint8_t> Storage);
137 
138  inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
139  if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
140  return true;
141 
142  return remapIndexFallback(Idx, Map);
143  }
144 
145  inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
146  // Simple types are unchanged.
147  if (Idx.isSimple())
148  return true;
149 
150  // Check if this type index refers to a record we've already translated
151  // successfully. If it refers to a type later in the stream or a record we
152  // had to defer, defer it until later pass.
153  unsigned MapPos = slotForIndex(Idx);
154  if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
155  return false;
156 
157  Idx = Map[MapPos];
158  return true;
159  }
160 
161  bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
162 
163  Error errorCorruptRecord() const {
164  return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
165  }
166 
167  Expected<bool> shouldRemapType(const CVType &Type);
168 
169  Optional<Error> LastError;
170 
171  bool UseGlobalHashes = false;
172 
173  bool IsSecondPass = false;
174 
175  unsigned NumBadIndices = 0;
176 
178 
179  MergingTypeTableBuilder *DestIdStream = nullptr;
180  MergingTypeTableBuilder *DestTypeStream = nullptr;
181 
182  GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
183  GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
184 
185  ArrayRef<GloballyHashedType> GlobalHashes;
186 
187  // If we're only mapping id records, this array contains the mapping for
188  // type records.
189  ArrayRef<TypeIndex> TypeLookup;
190 
191  /// Map from source type index to destination type index. Indexed by source
192  /// type index minus 0x1000.
193  SmallVectorImpl<TypeIndex> &IndexMap;
194 
195  /// Temporary storage that we use to copy a record's data while re-writing
196  /// its type indices.
197  SmallVector<uint8_t, 256> RemapStorage;
198 
199  Optional<uint32_t> PCHSignature;
200 };
201 
202 } // end anonymous namespace
203 
204 const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
205 
206 void TypeStreamMerger::addMapping(TypeIndex Idx) {
207  if (!IsSecondPass) {
208  assert(IndexMap.size() == slotForIndex(CurIndex) &&
209  "visitKnownRecord should add one index map entry");
210  IndexMap.push_back(Idx);
211  } else {
212  assert(slotForIndex(CurIndex) < IndexMap.size());
213  IndexMap[slotForIndex(CurIndex)] = Idx;
214  }
215 }
216 
217 bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
218  ArrayRef<TypeIndex> Map) {
219  size_t MapPos = slotForIndex(Idx);
220 
221  // If this is the second pass and this index isn't in the map, then it points
222  // outside the current type stream, and this is a corrupt record.
223  if (IsSecondPass && MapPos >= Map.size()) {
224  // FIXME: Print a more useful error. We can give the current record and the
225  // index that we think its pointing to.
226  if (LastError)
227  LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
228  else
229  LastError = errorCorruptRecord();
230  }
231 
232  ++NumBadIndices;
233 
234  // This type index is invalid. Remap this to "not translated by cvpack",
235  // and return failure.
236  Idx = Untranslated;
237  return false;
238 }
239 
240 // Local hashing entry points
242  const CVTypeArray &Types) {
243  DestTypeStream = &Dest;
244  UseGlobalHashes = false;
245 
246  return doit(Types);
247 }
248 
250  ArrayRef<TypeIndex> TypeSourceToDest,
251  const CVTypeArray &Ids) {
252  DestIdStream = &Dest;
253  TypeLookup = TypeSourceToDest;
254  UseGlobalHashes = false;
255 
256  return doit(Ids);
257 }
258 
259 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
260  MergingTypeTableBuilder &DestTypes,
261  const CVTypeArray &IdsAndTypes,
262  Optional<uint32_t> &S) {
263  DestIdStream = &DestIds;
264  DestTypeStream = &DestTypes;
265  UseGlobalHashes = false;
266  auto Err = doit(IdsAndTypes);
267  S = PCHSignature;
268  return Err;
269 }
270 
271 // Global hashing entry points
273  const CVTypeArray &Types,
275  Optional<uint32_t> &S) {
276  DestGlobalTypeStream = &Dest;
277  UseGlobalHashes = true;
278  GlobalHashes = Hashes;
279  auto Err = doit(Types);
280  S = PCHSignature;
281  return Err;
282 }
283 
285  ArrayRef<TypeIndex> TypeSourceToDest,
286  const CVTypeArray &Ids,
288  DestGlobalIdStream = &Dest;
289  TypeLookup = TypeSourceToDest;
290  UseGlobalHashes = true;
291  GlobalHashes = Hashes;
292 
293  return doit(Ids);
294 }
295 
296 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
297  GlobalTypeTableBuilder &DestTypes,
298  const CVTypeArray &IdsAndTypes,
300  Optional<uint32_t> &S) {
301  DestGlobalIdStream = &DestIds;
302  DestGlobalTypeStream = &DestTypes;
303  UseGlobalHashes = true;
304  GlobalHashes = Hashes;
305  auto Err = doit(IdsAndTypes);
306  S = PCHSignature;
307  return Err;
308 }
309 
310 Error TypeStreamMerger::doit(const CVTypeArray &Types) {
311  if (auto EC = remapAllTypes(Types))
312  return EC;
313 
314  // If we found bad indices but no other errors, try doing another pass and see
315  // if we can resolve the indices that weren't in the map on the first pass.
316  // This may require multiple passes, but we should always make progress. MASM
317  // is the only known CodeView producer that makes type streams that aren't
318  // topologically sorted. The standard library contains MASM-produced objects,
319  // so this is important to handle correctly, but we don't have to be too
320  // efficient. MASM type streams are usually very small.
321  while (!LastError && NumBadIndices > 0) {
322  unsigned BadIndicesRemaining = NumBadIndices;
323  IsSecondPass = true;
324  NumBadIndices = 0;
326 
327  if (auto EC = remapAllTypes(Types))
328  return EC;
329 
330  assert(NumBadIndices <= BadIndicesRemaining &&
331  "second pass found more bad indices");
332  if (!LastError && NumBadIndices == BadIndicesRemaining) {
333  return llvm::make_error<CodeViewError>(
334  cv_error_code::corrupt_record, "Input type graph contains cycles");
335  }
336  }
337 
338  if (LastError)
339  return std::move(*LastError);
340  return Error::success();
341 }
342 
343 Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
344  BinaryStreamRef Stream = Types.getUnderlyingStream();
345  ArrayRef<uint8_t> Buffer;
346  cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
347 
348  return forEachCodeViewRecord<CVType>(
349  Buffer, [this](const CVType &T) { return remapType(T); });
350 }
351 
352 Error TypeStreamMerger::remapType(const CVType &Type) {
353  auto R = shouldRemapType(Type);
354  if (!R)
355  return R.takeError();
356 
357  TypeIndex DestIdx = Untranslated;
358  if (*R) {
359  auto DoSerialize =
360  [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
361  return remapIndices(Type, Storage);
362  };
363  unsigned AlignedSize = alignTo(Type.RecordData.size(), 4);
364 
365  if (LLVM_LIKELY(UseGlobalHashes)) {
366  GlobalTypeTableBuilder &Dest =
367  isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
368  GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
369  DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize);
370  } else {
372  isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
373 
374  RemapStorage.resize(AlignedSize);
375  ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
376  if (!Result.empty())
377  DestIdx = Dest.insertRecordBytes(Result);
378  }
379  }
380  addMapping(DestIdx);
381 
382  ++CurIndex;
383  assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
384  "visitKnownRecord should add one index map entry");
385  return Error::success();
386 }
387 
389 TypeStreamMerger::remapIndices(const CVType &OriginalType,
390  MutableArrayRef<uint8_t> Storage) {
391  unsigned Align = OriginalType.RecordData.size() & 3;
392  assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) &&
393  "The storage buffer size is not a multiple of 4 bytes which will "
394  "cause misalignment in the output TPI stream!");
395 
397  discoverTypeIndices(OriginalType.RecordData, Refs);
398  if (Refs.empty() && Align == 0)
399  return OriginalType.RecordData;
400 
401  ::memcpy(Storage.data(), OriginalType.RecordData.data(),
402  OriginalType.RecordData.size());
403 
404  uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
405 
406  for (auto &Ref : Refs) {
407  TypeIndex *DestTIs =
408  reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
409 
410  for (size_t I = 0; I < Ref.Count; ++I) {
411  TypeIndex &TI = DestTIs[I];
412  bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
413  : remapTypeIndex(TI);
414  if (LLVM_UNLIKELY(!Success))
415  return {};
416  }
417  }
418 
419  if (Align > 0) {
420  RecordPrefix *StorageHeader =
421  reinterpret_cast<RecordPrefix *>(Storage.data());
422  StorageHeader->RecordLen += 4 - Align;
423 
424  DestContent = Storage.data() + OriginalType.RecordData.size();
425  for (; Align < 4; ++Align)
426  *DestContent++ = LF_PAD4 - Align;
427  }
428  return Storage;
429 }
430 
432  SmallVectorImpl<TypeIndex> &SourceToDest,
433  const CVTypeArray &Types) {
434  TypeStreamMerger M(SourceToDest);
435  return M.mergeTypeRecords(Dest, Types);
436 }
437 
439  ArrayRef<TypeIndex> TypeSourceToDest,
440  SmallVectorImpl<TypeIndex> &SourceToDest,
441  const CVTypeArray &Ids) {
442  TypeStreamMerger M(SourceToDest);
443  return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
444 }
445 
448  SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
449  Optional<uint32_t> &PCHSignature) {
450  TypeStreamMerger M(SourceToDest);
451  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature);
452 }
453 
455  GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
456  SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
457  ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) {
458  TypeStreamMerger M(SourceToDest);
459  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes,
460  PCHSignature);
461 }
462 
464  SmallVectorImpl<TypeIndex> &SourceToDest,
465  const CVTypeArray &Types,
467  Optional<uint32_t> &PCHSignature) {
468  TypeStreamMerger M(SourceToDest);
469  return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature);
470 }
471 
473  ArrayRef<TypeIndex> Types,
474  SmallVectorImpl<TypeIndex> &SourceToDest,
475  const CVTypeArray &Ids,
477  TypeStreamMerger M(SourceToDest);
478  return M.mergeIdRecords(Dest, Types, Ids, Hashes);
479 }
480 
481 Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
482  // For object files containing precompiled types, we need to extract the
483  // signature, through EndPrecompRecord. This is done here for performance
484  // reasons, to avoid re-parsing the Types stream.
485  if (Type.kind() == LF_ENDPRECOMP) {
486  EndPrecompRecord EP;
487  if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
488  EP))
489  return joinErrors(std::move(EC), errorCorruptRecord());
490  if (PCHSignature.hasValue())
491  return errorCorruptRecord();
492  PCHSignature.emplace(EP.getSignature());
493  return false;
494  }
495  return true;
496 }
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:708
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void discoverTypeIndices(ArrayRef< uint8_t > RecordData, SmallVectorImpl< TiReference > &Refs)
Kind kind() const
Definition: CVRecord.h:43
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:219
#define LLVM_LIKELY(EXPR)
Definition: Compiler.h:218
BinaryStreamRef getUnderlyingStream() const
Error mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Types)
Merge one set of type records into another.
Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &IdsAndTypes, Optional< uint32_t > &PCHSignature)
Merge a unified set of type and id records, splitting them into separate output streams.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
The access may reference the value stored in memory.
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
static const uint32_t FirstNonSimpleIndex
Definition: TypeIndex.h:97
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
A 32-bit type reference.
Definition: TypeIndex.h:95
A globally hashed type represents a hash value that is sufficient to uniquely identify a record acros...
Definition: TypeHashing.h:77
Error readBytes(uint32_t Offset, uint32_t Size, ArrayRef< uint8_t > &Buffer) const
Given an Offset into this StreamRef and a Size, return a reference to a buffer owned by the stream...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
#define H(x, y, z)
Definition: MD5.cpp:58
static Error deserializeAs(CVType &CVT, T &Record)
const T * data() const
Definition: ArrayRef.h:153
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint32_t getIndex() const
Definition: TypeIndex.h:110
uint32_t getLength() const
static ErrorSuccess success()
Create a success value.
Definition: Error.h:332
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
static size_t slotForIndex(TypeIndex Idx)
#define Success
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:61
Error joinErrors(Error E1, Error E2)
Concatenate errors.
Definition: Error.h:429
TypeIndex insertRecordBytes(ArrayRef< uint8_t > &Record)
#define I(x, y, z)
Definition: MD5.cpp:59
Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef< TypeIndex > Types, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Ids)
Merge one set of id records into another.
T * data() const
Definition: ArrayRef.h:336
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create)
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
bool isIdRecord(TypeLeafKind K)
Return true if this record should be in the IPI stream of a PDB.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:151