LLVM 23.0.0git
OnDiskTrieRawHashMap.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file Implements OnDiskTrieRawHashMap.
10///
11//===----------------------------------------------------------------------===//
12
14#include "DatabaseFile.h"
19#include "llvm/Config/llvm-config.h"
23
24using namespace llvm;
25using namespace llvm::cas;
26using namespace llvm::cas::ondisk;
27
28#if LLVM_ENABLE_ONDISK_CAS
29
30//===----------------------------------------------------------------------===//
31// TrieRawHashMap data structures.
32//===----------------------------------------------------------------------===//
33
34namespace {
35
36class SubtrieHandle;
37class TrieRawHashMapHandle;
38class TrieVisitor;
39
40/// A value stored in the slots inside a SubTrie. A stored value can either be a
41/// subtrie (encoded after negation) which is the file offset to another
42/// subtrie, or it can be a fileset to a DataRecord.
43class SubtrieSlotValue {
44public:
45 explicit operator bool() const { return !isEmpty(); }
46 bool isEmpty() const { return !Offset; }
47 bool isData() const { return Offset > 0; }
48 bool isSubtrie() const { return Offset < 0; }
49 uint64_t asData() const {
50 assert(isData());
51 return Offset;
52 }
53 uint64_t asSubtrie() const {
54 assert(isSubtrie());
55 return -Offset;
56 }
57
58 FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); }
59
60 FileOffset asDataFileOffset() const { return FileOffset(asData()); }
61
62 int64_t getRawOffset() const { return Offset; }
63
64 static SubtrieSlotValue getDataOffset(int64_t Offset) {
65 return SubtrieSlotValue(Offset);
66 }
67
68 static SubtrieSlotValue getSubtrieOffset(int64_t Offset) {
69 return SubtrieSlotValue(-Offset);
70 }
71
72 static SubtrieSlotValue getDataOffset(FileOffset Offset) {
73 return getDataOffset(Offset.get());
74 }
75
76 static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) {
77 return getDataOffset(Offset.get());
78 }
79
80 static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) {
81 return SubtrieSlotValue(Slot.load());
82 }
83
84 SubtrieSlotValue() = default;
85
86private:
87 friend class SubtrieHandle;
88 explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {}
89 int64_t Offset = 0;
90};
91
92/// Subtrie layout:
93/// - 2-bytes: StartBit
94/// - 1-bytes: NumBits=lg(num-slots)
95/// - 5-bytes: 0-pad
96/// - <slots>
97class SubtrieHandle {
98public:
99 struct Header {
100 /// The bit this subtrie starts on.
101 uint16_t StartBit;
102
103 /// The number of bits this subtrie handles. It has 2^NumBits slots.
104 uint8_t NumBits;
105
106 /// 0-pad to 8B.
107 uint8_t ZeroPad1B;
108 uint32_t ZeroPad4B;
109 };
110
111 /// Slot storage:
112 /// - zero: Empty
113 /// - positive: RecordOffset
114 /// - negative: SubtrieOffset
115 using SlotT = std::atomic<int64_t>;
116
117 static int64_t getSlotsSize(uint32_t NumBits) {
118 return sizeof(int64_t) * (1ull << NumBits);
119 }
120
121 static int64_t getSize(uint32_t NumBits) {
122 return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits);
123 }
124
125 int64_t getSize() const { return getSize(H->NumBits); }
126 size_t getNumSlots() const { return Slots.size(); }
127
128 SubtrieSlotValue load(size_t I) const {
129 return SubtrieSlotValue(Slots[I].load());
130 }
131 void store(size_t I, SubtrieSlotValue V) {
132 return Slots[I].store(V.getRawOffset());
133 }
134
135 void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const;
136
137 /// Return None on success, or the existing offset on failure.
138 bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected,
139 SubtrieSlotValue New) {
140 SubtrieSlotValue SaveExpected(Expected);
141 bool Result = Slots[I].compare_exchange_strong(Expected.Offset, New.Offset);
142 if (Logger)
143 Logger->logSubtrieHandleCmpXchg(Region->data(), getOffset().Offset, I,
144 SaveExpected.Offset, New.Offset,
145 Expected.Offset);
146 return Result;
147 }
148
149 /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with
150 /// \p NumSubtrieBits.
151 ///
152 /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a
153 /// new subtrie is created that isn't used because of a lost race, then it If
154 /// it's already valid, it should be used instead of allocating a new one.
155 /// should be returned as an out parameter to be passed back in the future.
156 /// If it's already valid, it should be used instead of allocating a new one.
157 ///
158 /// Returns the subtrie that now lives at \p I.
159 Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V,
160 MappedFileRegionArena &Alloc,
161 size_t NumSubtrieBits,
162 SubtrieHandle &UnusedSubtrie, size_t NewI);
163
164 /// Only safe if the subtrie is empty.
165 void reinitialize(uint32_t StartBit, uint32_t NumBits);
166
167 SubtrieSlotValue getOffset() const {
168 return SubtrieSlotValue::getSubtrieOffset(
169 reinterpret_cast<const char *>(H) - Region->data());
170 }
171
172 FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); }
173
174 explicit operator bool() const { return H; }
175
176 Header &getHeader() const { return *H; }
177 uint32_t getStartBit() const { return H->StartBit; }
178 uint32_t getNumBits() const { return H->NumBits; }
179
180 static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc,
181 uint32_t StartBit, uint32_t NumBits,
182 OnDiskCASLogger *Logger);
183
184 static SubtrieHandle getFromFileOffset(MappedFileRegion &Region,
185 FileOffset Offset,
186 OnDiskCASLogger *Logger) {
187 return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset),
188 Logger);
189 }
190
191 SubtrieHandle() = default;
192 SubtrieHandle(MappedFileRegion &Region, Header &H, OnDiskCASLogger *Logger)
193 : Region(&Region), H(&H), Slots(getSlots(H)), Logger(Logger) {}
194 SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset,
195 OnDiskCASLogger *Logger)
196 : SubtrieHandle(
197 Region,
198 *reinterpret_cast<Header *>(Region.data() + Offset.asSubtrie()),
199 Logger) {}
200
201private:
202 MappedFileRegion *Region = nullptr;
203 Header *H = nullptr;
205 OnDiskCASLogger *Logger = nullptr;
206
207 static MutableArrayRef<SlotT> getSlots(Header &H) {
208 return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1),
209 1ull << H.NumBits);
210 }
211};
212
213/// Handle for a TrieRawHashMap table.
214///
215/// TrieRawHashMap table layout:
216/// - [8-bytes: Generic table header]
217/// - 1-byte: NumSubtrieBits
218/// - 1-byte: Flags (not used yet)
219/// - 2-bytes: NumHashBits
220/// - 4-bytes: RecordDataSize (in bytes)
221/// - 8-bytes: RootTrieOffset
222/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
223/// - <name> '\0'
224///
225/// Record layout:
226/// - <hash>
227/// - <data>
228class TrieRawHashMapHandle {
229public:
230 static constexpr TableHandle::TableKind Kind =
231 TableHandle::TableKind::TrieRawHashMap;
232
233 struct Header {
234 TableHandle::Header GenericHeader;
235 uint8_t NumSubtrieBits;
236 uint8_t Flags; ///< None used yet.
237 uint16_t NumHashBits;
238 uint32_t RecordDataSize;
239 std::atomic<int64_t> RootTrieOffset;
240 std::atomic<int64_t> AllocatorOffset;
241 };
242
243 operator TableHandle() const {
244 if (!H)
245 return TableHandle();
246 return TableHandle(*Region, H->GenericHeader);
247 }
248
249 struct RecordData {
250 OnDiskTrieRawHashMap::ValueProxy Proxy;
251 SubtrieSlotValue Offset;
252 FileOffset getFileOffset() const { return Offset.asDataFileOffset(); }
253 };
254
255 enum Limits : size_t {
256 /// Seems like 65528 hash bits ought to be enough.
257 MaxNumHashBytes = UINT16_MAX >> 3,
258 MaxNumHashBits = MaxNumHashBytes << 3,
259
260 /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit
261 /// index. This many slots is suspicously large anyway.
262 MaxNumRootBits = 16,
263
264 /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously
265 /// large for subtries.
266 MaxNumSubtrieBits = 10,
267 };
268
269 static constexpr size_t getNumHashBytes(size_t NumHashBits) {
270 assert(NumHashBits % 8 == 0);
271 return NumHashBits / 8;
272 }
273 static constexpr size_t getRecordSize(size_t RecordDataSize,
274 size_t NumHashBits) {
275 return RecordDataSize + getNumHashBytes(NumHashBits);
276 }
277
278 RecordData getRecord(SubtrieSlotValue Offset);
279 Expected<RecordData> createRecord(MappedFileRegionArena &Alloc,
280 ArrayRef<uint8_t> Hash);
281
282 explicit operator bool() const { return H; }
283 const Header &getHeader() const { return *H; }
284 SubtrieHandle getRoot() const;
285 int64_t getRootTrieOffset() const { return H->RootTrieOffset; }
286 Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
287 MappedFileRegion &getRegion() const { return *Region; }
288
289 size_t getFlags() const { return H->Flags; }
290 size_t getNumSubtrieBits() const { return H->NumSubtrieBits; }
291 size_t getNumHashBits() const { return H->NumHashBits; }
292 size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); }
293 size_t getRecordDataSize() const { return H->RecordDataSize; }
294 size_t getRecordSize() const {
295 return getRecordSize(H->RecordDataSize, H->NumHashBits);
296 }
297
298 TrieHashIndexGenerator getIndexGen(SubtrieHandle Root,
299 ArrayRef<uint8_t> Hash) {
300 assert(Root.getStartBit() == 0);
301 assert(getNumHashBytes() == Hash.size());
302 assert(getNumHashBits() == Hash.size() * 8);
303 return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash};
304 }
305
306 static Expected<TrieRawHashMapHandle>
307 create(MappedFileRegionArena &Alloc, StringRef Name,
308 std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits,
309 uint64_t NumHashBits, uint64_t RecordDataSize,
310 std::shared_ptr<OnDiskCASLogger> Logger);
311
312 void
313 print(raw_ostream &OS,
314 function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
315
317 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
318 RecordVerifier) const;
319 TrieRawHashMapHandle() = default;
320 TrieRawHashMapHandle(MappedFileRegion &Region, Header &H,
321 std::shared_ptr<OnDiskCASLogger> Logger = nullptr)
322 : Region(&Region), H(&H), Logger(std::move(Logger)) {}
323 TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset,
324 std::shared_ptr<OnDiskCASLogger> Logger = nullptr)
325 : TrieRawHashMapHandle(
326 Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset),
327 std::move(Logger)) {}
328
329 OnDiskCASLogger *getLogger() const { return Logger.get(); }
330 void setLogger(std::shared_ptr<OnDiskCASLogger> Logger) {
331 this->Logger = std::move(Logger);
332 }
333
334private:
335 MappedFileRegion *Region = nullptr;
336 Header *H = nullptr;
337 std::shared_ptr<OnDiskCASLogger> Logger;
338};
339
340} // end anonymous namespace
341
343 DatabaseFile File;
344 TrieRawHashMapHandle Trie;
345};
346
348 uint32_t StartBit,
349 uint32_t NumBits,
351 assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits);
352 assert(NumBits <= UINT8_MAX);
353 assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits);
354
355 auto Mem = Alloc.allocate(getSize(NumBits));
356 if (LLVM_UNLIKELY(!Mem))
357 return Mem.takeError();
358 auto *H =
359 new (*Mem) SubtrieHandle::Header{(uint16_t)StartBit, (uint8_t)NumBits,
360 /*ZeroPad1B=*/0, /*ZeroPad4B=*/0};
361 SubtrieHandle S(Alloc.getRegion(), *H, Logger);
362 for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I)
363 new (I) SlotT(0);
364
365 if (Logger)
366 Logger->logSubtrieHandleCreate(Alloc.data(), S.getOffset().Offset, StartBit,
367 NumBits);
368 return S;
369}
370
371SubtrieHandle TrieRawHashMapHandle::getRoot() const {
372 if (int64_t Root = H->RootTrieOffset)
373 return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root),
374 Logger.get());
375 return SubtrieHandle();
376}
377
379TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
380 assert(&Alloc.getRegion() == &getRegion());
381 if (SubtrieHandle Root = getRoot())
382 return Root;
383
384 int64_t Race = 0;
385 auto LazyRoot =
386 SubtrieHandle::create(Alloc, 0, H->NumSubtrieBits, Logger.get());
387 if (LLVM_UNLIKELY(!LazyRoot))
388 return LazyRoot.takeError();
389
390 if (H->RootTrieOffset.compare_exchange_strong(
391 Race, LazyRoot->getOffset().asSubtrie()))
392 return *LazyRoot;
393
394 // There was a race. Return the other root.
395 //
396 // TODO: Avoid leaking the lazy root by storing it in an allocator.
397 return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Race),
398 Logger.get());
399}
400
402TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
403 std::optional<uint64_t> NumRootBits,
404 uint64_t NumSubtrieBits, uint64_t NumHashBits,
405 uint64_t RecordDataSize,
406 std::shared_ptr<OnDiskCASLogger> Logger) {
407 // Allocate.
408 auto Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1);
409 if (LLVM_UNLIKELY(!Offset))
410 return Offset.takeError();
411
412 // Construct the header and the name.
413 assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
414 assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits");
415 assert(NumHashBits <= UINT16_MAX && "Expected valid hash size");
416 assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name");
417 auto *H = new (Alloc.getRegion().data() + *Offset)
419 (uint32_t)sizeof(Header)},
420 (uint8_t)NumSubtrieBits,
421 /*Flags=*/0,
422 (uint16_t)NumHashBits,
423 (uint32_t)RecordDataSize,
424 /*RootTrieOffset=*/{0},
425 /*AllocatorOffset=*/{0}};
426 char *NameStorage = reinterpret_cast<char *>(H + 1);
427 llvm::copy(Name, NameStorage);
428 NameStorage[Name.size()] = 0;
429
430 // Construct a root trie, if requested.
431 TrieRawHashMapHandle Trie(Alloc.getRegion(), *H, Logger);
432 auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits, Logger.get());
433 if (LLVM_UNLIKELY(!Sub))
434 return Sub.takeError();
435 if (NumRootBits)
436 H->RootTrieOffset = Sub->getOffset().asSubtrie();
437 return Trie;
438}
439
440TrieRawHashMapHandle::RecordData
441TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) {
442 char *Begin = Region->data() + Offset.asData();
444 Proxy.Data = MutableArrayRef(Begin, getRecordDataSize());
445 Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()),
446 getNumHashBytes());
447 return RecordData{Proxy, Offset};
448}
449
451TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc,
452 ArrayRef<uint8_t> Hash) {
453 assert(&Alloc.getRegion() == Region);
454 assert(Hash.size() == getNumHashBytes());
455 auto Offset = Alloc.allocateOffset(getRecordSize());
456 if (LLVM_UNLIKELY(!Offset))
457 return Offset.takeError();
458
459 RecordData Record = getRecord(SubtrieSlotValue::getDataOffset(*Offset));
460 llvm::copy(Hash, const_cast<uint8_t *>(Record.Proxy.Hash.begin()));
461
462 if (Logger)
463 Logger->logHashMappedTrieHandleCreateRecord(
464 Alloc.data(), Record.Offset.getRawOffset(), Hash);
465
466 return Record;
467}
468
471 // Check alignment.
473 return createStringError(make_error_code(std::errc::protocol_error),
474 "unaligned file offset at 0x" +
475 utohexstr(Offset.get(), /*LowerCase=*/true));
476
477 // Check bounds.
478 //
479 // Note: There's no potential overflow when using \c uint64_t because Offset
480 // is in valid offset range and the record size is in \c [0,UINT32_MAX].
481 if (!validOffset(Offset) ||
482 Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size())
483 return createStringError(make_error_code(std::errc::protocol_error),
484 "file offset too large: 0x" +
485 utohexstr(Offset.get(), /*LowerCase=*/true));
486
487 // Looks okay...
488 TrieRawHashMapHandle::RecordData D =
489 Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
490 return ConstOnDiskPtr(D.Proxy, D.getFileOffset());
491}
492
494OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
495 TrieRawHashMapHandle Trie = Impl->Trie;
496 assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
497
498 SubtrieHandle S = Trie.getRoot();
499 if (!S)
500 return ConstOnDiskPtr();
501
502 TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash);
503 size_t Index = IndexGen.next();
504 for (;;) {
505 // Try to set the content.
506 SubtrieSlotValue V = S.load(Index);
507 if (!V)
508 return ConstOnDiskPtr();
509
510 // Check for an exact match.
511 if (V.isData()) {
512 TrieRawHashMapHandle::RecordData D = Trie.getRecord(V);
513 return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset())
514 : ConstOnDiskPtr();
515 }
516
517 Index = IndexGen.next();
518 S = SubtrieHandle(Trie.getRegion(), V, Trie.getLogger());
519 }
520}
521
522/// Only safe if the subtrie is empty.
523void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) {
524 assert(StartBit > H->StartBit);
525 assert(NumBits <= H->NumBits);
526 // Ideally would also assert that all slots are empty, but that's expensive.
527
528 H->StartBit = StartBit;
529 H->NumBits = NumBits;
530}
531
532Expected<OnDiskTrieRawHashMap::OnDiskPtr>
533OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
534 LazyInsertOnConstructCB OnConstruct,
535 LazyInsertOnLeakCB OnLeak) {
536 TrieRawHashMapHandle Trie = Impl->Trie;
537 assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
538
539 MappedFileRegionArena &Alloc = Impl->File.getAlloc();
540 std::optional<SubtrieHandle> S;
541 auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S);
542 if (LLVM_UNLIKELY(Err))
543 return std::move(Err);
544
545 TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash);
546 size_t Index = IndexGen.next();
547
548 // Walk through the hash bytes and insert into correct trie position.
549 std::optional<TrieRawHashMapHandle::RecordData> NewRecord;
550 SubtrieHandle UnusedSubtrie;
551 for (;;) {
552 SubtrieSlotValue Existing = S->load(Index);
553
554 // Try to set it, if it's empty.
555 if (!Existing) {
556 if (!NewRecord) {
557 auto Err = Trie.createRecord(Alloc, Hash).moveInto(NewRecord);
558 if (LLVM_UNLIKELY(Err))
559 return std::move(Err);
560 if (OnConstruct)
561 OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy);
562 }
563
564 if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset))
565 return OnDiskPtr(NewRecord->Proxy,
566 NewRecord->Offset.asDataFileOffset());
567
568 // Race means that Existing is no longer empty; fall through...
569 }
570
571 if (Existing.isSubtrie()) {
572 S = SubtrieHandle(Trie.getRegion(), Existing, Trie.getLogger());
573 Index = IndexGen.next();
574 continue;
575 }
576
577 // Check for an exact match.
578 TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing);
579 if (ExistingRecord.Proxy.Hash == Hash) {
580 if (NewRecord && OnLeak)
581 OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy,
582 ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy);
583 return OnDiskPtr(ExistingRecord.Proxy,
584 ExistingRecord.Offset.asDataFileOffset());
585 }
586
587 // Sink the existing content as long as the indexes match.
588 for (;;) {
589 size_t NextIndex = IndexGen.next();
590 size_t NewIndexForExistingContent =
591 IndexGen.getCollidingBits(ExistingRecord.Proxy.Hash);
592
593 auto Err = S->sink(Index, Existing, Alloc, IndexGen.getNumBits(),
594 UnusedSubtrie, NewIndexForExistingContent)
595 .moveInto(S);
596 if (LLVM_UNLIKELY(Err))
597 return std::move(Err);
598 Index = NextIndex;
599
600 // Found the difference.
601 if (NextIndex != NewIndexForExistingContent)
602 break;
603 }
604 }
605}
606
607Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V,
609 size_t NumSubtrieBits,
610 SubtrieHandle &UnusedSubtrie,
611 size_t NewI) {
612 std::optional<SubtrieHandle> NewS;
613 if (UnusedSubtrie) {
614 // Steal UnusedSubtrie and initialize it.
615 NewS.emplace();
616 std::swap(*NewS, UnusedSubtrie);
617 NewS->reinitialize(getStartBit() + getNumBits(), NumSubtrieBits);
618 } else {
619 // Allocate a new, empty subtrie.
620 auto Err = SubtrieHandle::create(Alloc, getStartBit() + getNumBits(),
621 NumSubtrieBits, Logger)
622 .moveInto(NewS);
623 if (LLVM_UNLIKELY(Err))
624 return std::move(Err);
625 }
626
627 NewS->store(NewI, V);
628 if (compare_exchange_strong(I, V, NewS->getOffset()))
629 return *NewS; // Success!
630
631 // Raced.
632 assert(V.isSubtrie() && "Expected racing sink() to add a subtrie");
633
634 // Wipe out the new slot so NewS can be reused and set the out parameter.
635 NewS->store(NewI, SubtrieSlotValue());
636 UnusedSubtrie = *NewS;
637
638 // Return the subtrie added by the concurrent sink() call.
639 return SubtrieHandle(Alloc.getRegion(), V, Logger);
640}
641
643 raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
644 Impl->Trie.print(OS, PrintRecordData);
645}
646
648 function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
649 uint64_t BumpPtr = Impl->File.getAlloc().size();
651 return createStringError(make_error_code(std::errc::protocol_error),
652 "arena bump pointer is not aligned: 0x" +
653 utohexstr(BumpPtr, /*LowerCase=*/true));
654
655 return Impl->Trie.validate(RecordVerifier);
656}
657
658// Helper function that prints hexdigit and have a sub-byte starting position.
659static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
660 size_t StartBit, size_t NumBits) {
661 assert(StartBit % 4 == 0);
662 assert(NumBits % 4 == 0);
663 for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) {
664 uint8_t HexPair = Bytes[I / 8];
665 uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf;
666 OS << hexdigit(HexDigit, /*LowerCase=*/true);
667 }
668}
669
670static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit,
671 size_t NumBits) {
672 assert(StartBit + NumBits <= Bytes.size() * 8u);
673 for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) {
674 uint8_t Byte = Bytes[I / 8];
675 size_t ByteOffset = I % 8;
676 if (size_t ByteShift = 8 - ByteOffset - 1)
677 Byte >>= ByteShift;
678 OS << (Byte & 0x1 ? '1' : '0');
679 }
680}
681
682void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const {
683 // afb[1c:00*01110*0]def
684 size_t EndBit = getStartBit() + getNumBits();
685 size_t HashEndBit = Bytes.size() * 8u;
686
687 size_t FirstBinaryBit = getStartBit() & ~0x3u;
688 printHexDigits(OS, Bytes, 0, FirstBinaryBit);
689
690 size_t LastBinaryBit = (EndBit + 3u) & ~0x3u;
691 OS << "[";
692 printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit);
693 OS << "]";
694
695 printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit);
696}
697
698static void appendIndexBits(std::string &Prefix, size_t Index,
699 size_t NumSlots) {
700 std::string Bits;
701 for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) {
702 Bits.push_back('0' + (Index & 0x1));
703 Index >>= 1;
704 }
705 for (char Ch : llvm::reverse(Bits))
706 Prefix += Ch;
707}
708
709static void printPrefix(raw_ostream &OS, StringRef Prefix) {
710 while (Prefix.size() >= 4) {
711 uint8_t Digit;
712 bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit);
713 assert(!ErrorParsingBinary);
714 (void)ErrorParsingBinary;
715 OS << hexdigit(Digit, /*LowerCase=*/true);
716 Prefix = Prefix.drop_front(4);
717 }
718 if (!Prefix.empty())
719 OS << "[" << Prefix << "]";
720}
721
723
724static Expected<size_t> checkParameter(StringRef Label, size_t Max,
725 std::optional<size_t> Value,
726 std::optional<size_t> Default,
727 StringRef Path, StringRef TableName) {
728 assert(Value || Default);
729 assert(!Default || *Default <= Max);
730 if (!Value)
731 return *Default;
732
733 if (*Value <= Max)
734 return *Value;
736 std::errc::argument_out_of_domain, Path, TableName,
737 "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")");
738}
739
740size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); }
741size_t OnDiskTrieRawHashMap::capacity() const {
742 return Impl->File.getRegion().size();
743}
744
745Expected<OnDiskTrieRawHashMap>
746OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
747 size_t NumHashBits, uint64_t DataSize,
748 uint64_t MaxFileSize,
749 std::optional<uint64_t> NewFileInitialSize,
750 std::shared_ptr<OnDiskCASLogger> Logger,
751 std::optional<size_t> NewTableNumRootBits,
752 std::optional<size_t> NewTableNumSubtrieBits) {
753 SmallString<128> PathStorage;
754 StringRef Path = PathTwine.toStringRef(PathStorage);
755 SmallString<128> TrieNameStorage;
756 StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage);
757
758 if (MaxFileSize == 0)
759 return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
760 "invalid size");
761
762 constexpr size_t DefaultNumRootBits = 10;
763 constexpr size_t DefaultNumSubtrieBits = 6;
764
765 size_t NumRootBits;
766 if (Error E = checkParameter(
767 "root bits", TrieRawHashMapHandle::MaxNumRootBits,
768 NewTableNumRootBits, DefaultNumRootBits, Path, TrieName)
769 .moveInto(NumRootBits))
770 return std::move(E);
771
772 size_t NumSubtrieBits;
773 if (Error E = checkParameter("subtrie bits",
774 TrieRawHashMapHandle::MaxNumSubtrieBits,
775 NewTableNumSubtrieBits, DefaultNumSubtrieBits,
776 Path, TrieName)
777 .moveInto(NumSubtrieBits))
778 return std::move(E);
779
780 size_t NumHashBytes = NumHashBits >> 3;
781 if (Error E =
782 checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits,
783 NumHashBits, std::nullopt, Path, TrieName)
784 .takeError())
785 return std::move(E);
786 assert(NumHashBits == NumHashBytes << 3 &&
787 "Expected hash size to be byte-aligned");
788 if (NumHashBits != NumHashBytes << 3)
790 std::errc::argument_out_of_domain, Path, TrieName,
791 "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)");
792
793 // Constructor for if the file doesn't exist.
794 auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
795 auto Trie = TrieRawHashMapHandle::create(DB.getAlloc(), TrieName,
796 NumRootBits, NumSubtrieBits,
797 NumHashBits, DataSize, Logger);
798 if (LLVM_UNLIKELY(!Trie))
799 return Trie.takeError();
800
801 return DB.addTable(*Trie);
802 };
803
804 // Get or create the file.
805 Expected<DatabaseFile> File =
806 DatabaseFile::create(Path, MaxFileSize, Logger, NewDBConstructor);
807 if (!File)
808 return File.takeError();
809
810 // Find the trie and validate it.
811 std::optional<TableHandle> Table = File->findTable(TrieName);
812 if (!Table)
813 return createTableConfigError(std::errc::argument_out_of_domain, Path,
814 TrieName, "table not found");
815 if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind,
816 (size_t)Table->getHeader().Kind, Path, TrieName))
817 return std::move(E);
818 auto Trie = Table->cast<TrieRawHashMapHandle>();
819 Trie.setLogger(Logger);
820 assert(Trie && "Already checked the kind");
821
822 // Check the hash and data size.
823 if (Error E = checkTable("hash size", NumHashBits, Trie.getNumHashBits(),
824 Path, TrieName))
825 return std::move(E);
826 if (Error E = checkTable("data size", DataSize, Trie.getRecordDataSize(),
827 Path, TrieName))
828 return std::move(E);
829
830 // No flags supported right now. Either corrupt, or coming from a future
831 // writer.
832 if (size_t Flags = Trie.getFlags())
833 return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
834 "unsupported flags: " + Twine(Flags));
835
836 // Success.
837 OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie};
838 return OnDiskTrieRawHashMap(std::make_unique<ImplType>(std::move(Impl)));
839}
840
841static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) {
842 return createStringError(make_error_code(std::errc::protocol_error),
843 "invalid trie at 0x" +
844 utohexstr(Offset, /*LowerCase=*/true) + ": " +
845 Msg);
846}
847
848//===----------------------------------------------------------------------===//
849// TrieVisitor data structures.
850//===----------------------------------------------------------------------===//
851
852namespace {
853/// A multi-threaded vistior to traverse the Trie.
854///
855/// TODO: add more sanity checks that isn't just plain data corruption. For
856/// example, some ill-formed data can be constructed to form a cycle using
857/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting
858/// data).
859class TrieVisitor {
860public:
861 TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0,
862 unsigned ErrorLimit = 50)
863 : Trie(Trie), ErrorLimit(ErrorLimit),
865 virtual ~TrieVisitor() = default;
866 Error visit();
867
868private:
869 // Virtual method to implement the action when visiting a sub-trie.
870 virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) {
871 return Error::success();
872 }
873
874 // Virtual method to implement the action when visiting a slot in a trie node.
875 virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
876 SubtrieSlotValue Slot) {
877 return Error::success();
878 }
879
880protected:
881 TrieRawHashMapHandle Trie;
882
883private:
884 Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix);
885
886 Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
887
888 Error validateSubtrieHeader(uint64_t Offset, bool IsRoot);
889
890 // Helper function to capture errors when visiting the trie nodes.
891 void addError(Error NewError) {
892 assert(NewError && "not an error");
893 std::lock_guard<std::mutex> ErrorLock(Lock);
894 if (NumError >= ErrorLimit) {
895 // Too many errors.
896 consumeError(std::move(NewError));
897 return;
898 }
899
900 if (Err)
901 Err = joinErrors(std::move(*Err), std::move(NewError));
902 else
903 Err = std::move(NewError);
904 NumError++;
905 }
906
907 bool tooManyErrors() {
908 std::lock_guard<std::mutex> ErrorLock(Lock);
909 return (bool)Err && NumError >= ErrorLimit;
910 }
911
912 const unsigned ErrorLimit;
913 std::optional<Error> Err;
914 unsigned NumError = 0;
915 std::mutex Lock;
916 DefaultThreadPool Threads;
917};
918
919/// A visitor that traverse and print the Trie.
920class TriePrinter : public TrieVisitor {
921public:
922 TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS,
923 function_ref<void(ArrayRef<char>)> PrintRecordData)
924 : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS),
925 PrintRecordData(PrintRecordData) {}
926
927 Error printRecords() {
928 if (Records.empty())
929 return Error::success();
930
931 OS << "records\n";
932 llvm::sort(Records);
933 for (int64_t Offset : Records) {
934 TrieRawHashMapHandle::RecordData Record =
935 Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
936 if (auto Err = printRecord(Record))
937 return Err;
938 }
939 return Error::success();
940 }
941
942 Error printRecord(TrieRawHashMapHandle::RecordData &Record) {
943 OS << "- addr=" << (void *)Record.getFileOffset().get() << " ";
944 if (PrintRecordData) {
945 PrintRecordData(Record.Proxy.Data);
946 } else {
947 OS << "bytes=";
948 ArrayRef<uint8_t> Data(
949 reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()),
950 Record.Proxy.Data.size());
951 printHexDigits(OS, Data, 0, Data.size() * 8);
952 }
953 OS << "\n";
954 return Error::success();
955 }
956
957 Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override {
958 if (Prefix.empty()) {
959 OS << "root";
960 } else {
961 OS << "subtrie=";
962 printPrefix(OS, Prefix);
963 }
964
965 OS << " addr="
966 << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) -
967 Trie.getRegion().data());
968 OS << " num-slots=" << SubTrie.getNumSlots() << "\n";
969 return Error::success();
970 }
971
972 Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
973 SubtrieSlotValue Slot) override {
974 OS << "- index=";
975 for (size_t Pad : {10, 100, 1000})
976 if (I < Pad && Subtrie.getNumSlots() >= Pad)
977 OS << "0";
978 OS << I << " ";
979 if (Slot.isSubtrie()) {
980 OS << "addr=" << (void *)Slot.asSubtrie();
981 OS << " subtrie=";
982 printPrefix(OS, Prefix);
983 OS << "\n";
984 return Error::success();
985 }
986 TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot);
987 OS << "addr=" << (void *)Record.getFileOffset().get();
988 OS << " content=";
989 Subtrie.printHash(OS, Record.Proxy.Hash);
990 OS << "\n";
991 Records.push_back(Slot.asData());
992 return Error::success();
993 }
994
995private:
996 raw_ostream &OS;
997 function_ref<void(ArrayRef<char>)> PrintRecordData;
999};
1000
1001/// TrieVerifier that adds additional verification on top of the basic visitor.
1002class TrieVerifier : public TrieVisitor {
1003public:
1004 TrieVerifier(
1005 TrieRawHashMapHandle Trie,
1006 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
1007 RecordVerifier)
1008 : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {}
1009
1010private:
1011 Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final {
1012 return Error::success();
1013 }
1014
1015 Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
1016 SubtrieSlotValue Slot) final {
1017 if (RecordVerifier && Slot.isData()) {
1019 return createInvalidTrieError(Slot.asData(), "mis-aligned data entry");
1020
1021 uint64_t DataOffset = Slot.asData();
1022 uint64_t RecordEnd = DataOffset + Trie.getRecordSize();
1023 if (RecordEnd > (uint64_t)Trie.getRegion().size())
1024 return createInvalidTrieError(DataOffset,
1025 "data entry extends past end of file");
1026
1027 TrieRawHashMapHandle::RecordData Record =
1028 Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData()));
1029 return RecordVerifier(Slot.asDataFileOffset(),
1030 OnDiskTrieRawHashMap::ConstValueProxy{
1031 Record.Proxy.Hash, Record.Proxy.Data});
1032 }
1033 return Error::success();
1034 }
1035
1036 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
1037 RecordVerifier;
1038};
1039} // namespace
1040
1041Error TrieVisitor::visit() {
1042 if (int64_t RootOffset = Trie.getRootTrieOffset()) {
1043 if (auto Err = validateSubtrieHeader(RootOffset, /*IsRoot=*/true))
1044 return Err;
1045 }
1046
1047 auto Root = Trie.getRoot();
1048 if (!Root)
1049 return Error::success();
1050
1051 if (auto Err = validateSubTrie(Root, /*IsRoot=*/true))
1052 return Err;
1053
1054 if (auto Err = visitSubTrie("", Root))
1055 return Err;
1056
1058 SmallVector<std::string> Prefixes;
1059 const size_t NumSlots = Root.getNumSlots();
1060 for (size_t I = 0, E = NumSlots; I != E; ++I) {
1061 SubtrieSlotValue Slot = Root.load(I);
1062 if (!Slot)
1063 continue;
1064 uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
1065 if (Offset >= (uint64_t)Trie.getRegion().size())
1066 return createInvalidTrieError(Offset, "slot points out of bound");
1067 std::string SubtriePrefix;
1068 appendIndexBits(SubtriePrefix, I, NumSlots);
1069 if (Slot.isSubtrie()) {
1070 if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false))
1071 return Err;
1072 SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
1073 Subs.push_back(S);
1074 Prefixes.push_back(SubtriePrefix);
1075 }
1076 if (auto Err = visitSlot(I, Root, SubtriePrefix, Slot))
1077 return Err;
1078 }
1079
1080 for (size_t I = 0, E = Subs.size(); I != E; ++I) {
1081 Threads.async(
1082 [&](unsigned Idx) {
1083 // Don't run if there is an error already.
1084 if (tooManyErrors())
1085 return;
1086 if (auto Err = traverseTrieNode(Subs[Idx], Prefixes[Idx]))
1087 addError(std::move(Err));
1088 },
1089 I);
1090 }
1091
1092 Threads.wait();
1093 if (Err)
1094 return std::move(*Err);
1095 return Error::success();
1096}
1097
1098Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
1099 char *Addr = reinterpret_cast<char *>(&Node.getHeader());
1100 const int64_t Offset = Node.getFileOffset().get();
1101 if (Addr + Node.getSize() >=
1102 Trie.getRegion().data() + Trie.getRegion().size())
1103 return createInvalidTrieError(Offset, "subtrie node spans out of bound");
1104
1105 if (!IsRoot &&
1106 Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) {
1107 return createInvalidTrieError(Offset,
1108 "subtrie represents too many hash bits");
1109 }
1110
1111 if (IsRoot) {
1112 if (Node.getStartBit() != 0)
1113 return createInvalidTrieError(Offset,
1114 "root node doesn't start at 0 index");
1115
1116 return Error::success();
1117 }
1118
1119 if (Node.getNumBits() > Trie.getNumSubtrieBits())
1120 return createInvalidTrieError(Offset, "subtrie has wrong number of slots");
1121
1122 return Error::success();
1123}
1124
1125Error TrieVisitor::validateSubtrieHeader(uint64_t Offset, bool IsRoot) {
1126 uint64_t RegionSize = Trie.getRegion().size();
1127 if (Offset + sizeof(SubtrieHandle::Header) > RegionSize)
1128 return createInvalidTrieError(Offset, "subtrie header out of bound");
1129
1130 auto *H = reinterpret_cast<const SubtrieHandle::Header *>(
1131 Trie.getRegion().data() + Offset);
1132 if (H->NumBits == 0)
1133 return createInvalidTrieError(Offset, "invalid subtrie NumBits");
1134
1135 if (!IsRoot && H->NumBits > Trie.getNumSubtrieBits())
1136 return createInvalidTrieError(Offset, "subtrie has corrupt NumBits");
1137
1138 if (Offset + static_cast<uint64_t>(SubtrieHandle::getSize(H->NumBits)) >
1139 RegionSize)
1140 return createInvalidTrieError(Offset, "subtrie node spans out of bound");
1141
1142 return Error::success();
1143}
1144
1145Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
1146 if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
1147 return Err;
1148
1149 if (auto Err = visitSubTrie(Prefix, Node))
1150 return Err;
1151
1153 SmallVector<std::string> Prefixes;
1154 const size_t NumSlots = Node.getNumSlots();
1155 for (size_t I = 0, E = NumSlots; I != E; ++I) {
1156 SubtrieSlotValue Slot = Node.load(I);
1157 if (!Slot)
1158 continue;
1159 uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
1160 if (Offset >= (uint64_t)Trie.getRegion().size())
1161 return createInvalidTrieError(Offset, "slot points out of bound");
1162 std::string SubtriePrefix = Prefix.str();
1163 appendIndexBits(SubtriePrefix, I, NumSlots);
1164 if (Slot.isSubtrie()) {
1165 if (auto Err = validateSubtrieHeader(Slot.asSubtrie(), /*IsRoot=*/false))
1166 return Err;
1167 SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
1168 Subs.push_back(S);
1169 Prefixes.push_back(SubtriePrefix);
1170 }
1171 if (auto Err = visitSlot(I, Node, SubtriePrefix, Slot))
1172 return Err;
1173 }
1174 for (size_t I = 0, E = Subs.size(); I != E; ++I)
1175 if (auto Err = traverseTrieNode(Subs[I], Prefixes[I]))
1176 return Err;
1177
1178 return Error::success();
1179}
1180
1181void TrieRawHashMapHandle::print(
1182 raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
1183 OS << "hash-num-bits=" << getNumHashBits()
1184 << " hash-size=" << getNumHashBytes()
1185 << " record-data-size=" << getRecordDataSize() << "\n";
1186
1187 TriePrinter Printer(*this, OS, PrintRecordData);
1188 if (auto Err = Printer.visit())
1189 OS << "error: " << toString(std::move(Err)) << "\n";
1190
1191 if (auto Err = Printer.printRecords())
1192 OS << "error: " << toString(std::move(Err)) << "\n";
1193}
1194
1195Error TrieRawHashMapHandle::validate(
1197 RecordVerifier) const {
1198 // Use the base TrieVisitor to identify the errors inside trie first.
1199 TrieVisitor BasicVerifier(*this);
1200 if (auto Err = BasicVerifier.visit())
1201 return Err;
1202
1203 // If the trie data structure is sound, do a second pass to verify data and
1204 // verifier function can assume the index is correct. However, there can be
1205 // newly added bad entries that can still produce error.
1206 TrieVerifier Verifier(*this, RecordVerifier);
1207 return Verifier.visit();
1208}
1209
1210#else // !LLVM_ENABLE_ONDISK_CAS
1211
1213
1215OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
1216 size_t NumHashBits, uint64_t DataSize,
1217 uint64_t MaxFileSize,
1218 std::optional<uint64_t> NewFileInitialSize,
1219 std::shared_ptr<OnDiskCASLogger> Logger,
1220 std::optional<size_t> NewTableNumRootBits,
1221 std::optional<size_t> NewTableNumSubtrieBits) {
1222 return createStringError(make_error_code(std::errc::not_supported),
1223 "OnDiskTrieRawHashMap is not supported");
1224}
1225
1228 LazyInsertOnConstructCB OnConstruct,
1229 LazyInsertOnLeakCB OnLeak) {
1230 return createStringError(make_error_code(std::errc::not_supported),
1231 "OnDiskTrieRawHashMap is not supported");
1232}
1233
1236 return createStringError(make_error_code(std::errc::not_supported),
1237 "OnDiskTrieRawHashMap is not supported");
1238}
1239
1244
1246 raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
1247}
1248
1251 RecordVerifier) const {
1252 return createStringError(make_error_code(std::errc::not_supported),
1253 "OnDiskTrieRawHashMap is not supported");
1254}
1255
1256size_t OnDiskTrieRawHashMap::size() const { return 0; }
1257size_t OnDiskTrieRawHashMap::capacity() const { return 0; }
1258
1259#endif // LLVM_ENABLE_ONDISK_CAS
1260
1261OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl)
1262 : Impl(std::move(Impl)) {}
1264 default;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
dxil pretty DXIL Metadata Pretty Printer
This file declares the common interface for a DatabaseFile that is used to implement OnDiskCAS.
static DeltaTreeNode * getRoot(void *Root)
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition LICM.cpp:1575
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file declares interface for MappedFileRegionArena, a bump pointer allocator, backed by a memory-...
This file declares interface for OnDiskCASLogger, an interface that can be used to log CAS events to ...
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
verify safepoint Safepoint IR Verifier
static Split data
This file contains some functions that are useful when dealing with strings.
static RecordT createRecord(const CVSymbol &sym)
static uint32_t getFlags(const Symbol *Sym)
Definition TapiFile.cpp:26
static cl::opt< int > ThreadCount("threads", cl::init(0))
static unsigned getSize(unsigned Kind)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Logging utility - given an ordered specification of features, and assuming a scalar reward,...
iterator end() const
Definition ArrayRef.h:343
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
StringRef toStringRef(SmallVectorImpl< char > &Out) const
This returns the twine as a single StringRef if it can be represented as such.
Definition Twine.h:461
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Definition FileOffset.h:24
Allocator for an owned mapped file region that supports thread-safe and process-safe bump pointer all...
static constexpr Align getAlign()
Minimum alignment for allocations, currently hardcoded to 8B.
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
LLVM_ABI_FOR_TEST OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS)
function_ref< void(FileOffset TentativeOffset, ValueProxy TentativeValue)> LazyInsertOnConstructCB
LLVM_ABI_FOR_TEST Error validate(function_ref< Error(FileOffset, ConstValueProxy)> RecordVerifier) const
Validate the trie data structure.
LLVM_ABI_FOR_TEST ConstOnDiskPtr find(ArrayRef< uint8_t > Hash) const
Find the value from hash.
LLVM_DUMP_METHOD void dump() const
static LLVM_ABI_FOR_TEST Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::shared_ptr< ondisk::OnDiskCASLogger > Logger=nullptr, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
LLVM_ABI_FOR_TEST size_t size() const
static bool validOffset(FileOffset Offset)
Check the valid range of file offset for OnDiskTrieRawHashMap.
LLVM_ABI_FOR_TEST Expected< OnDiskPtr > insertLazy(ArrayRef< uint8_t > Hash, LazyInsertOnConstructCB OnConstruct=nullptr, LazyInsertOnLeakCB OnLeak=nullptr)
Insert lazily.
LLVM_ABI_FOR_TEST size_t capacity() const
LLVM_ABI_FOR_TEST Expected< ConstOnDiskPtr > recoverFromFileOffset(FileOffset Offset) const
Helper function to recover a pointer into the trie from file offset.
LLVM_ABI_FOR_TEST OnDiskTrieRawHashMap & operator=(OnDiskTrieRawHashMap &&RHS)
void print(raw_ostream &OS, function_ref< void(ArrayRef< char >)> PrintRecordData=nullptr) const
function_ref< void(FileOffset TentativeOffset, ValueProxy TentativeValue, FileOffset FinalOffset, ValueProxy FinalValue)> LazyInsertOnLeakCB
LLVM_ABI_FOR_TEST ~OnDiskTrieRawHashMap()
static Expected< DatabaseFile > create(const Twine &Path, uint64_t Capacity, std::shared_ptr< OnDiskCASLogger > Logger, function_ref< Error(DatabaseFile &)> NewDBConstructor)
Create the DatabaseFile at Path with Capacity.
Interface for logging low-level on-disk cas operations.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
llvm::SmallVector< std::shared_ptr< RecordsSlice >, 4 > Records
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Error createTableConfigError(std::errc ErrC, StringRef Path, StringRef TableName, const Twine &Msg)
MappedFileRegionArena::RegionT MappedFileRegion
Error checkTable(StringRef Label, size_t Expected, size_t Observed, StringRef Path, StringRef TrieName)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition Threading.h:190
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::error_code make_error_code(BitcodeError E)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1328
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
Error joinErrors(Error E1, Error E2)
Concatenate errors.
Definition Error.h:442
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
char hexdigit(unsigned X, bool LowerCase=false)
hexdigit - Return the hexadecimal character for the given number X (which should be less than 16).
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
@ Sub
Subtraction of integers.
SingleThreadExecutor DefaultThreadPool
Definition ThreadPool.h:262
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
size_t getCollidingBits(ArrayRef< uint8_t > CollidingBits) const
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.