LLVM 23.0.0git
OnDiskGraphDB.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
11/// independent of a particular hashing algorithm. It only needs to be
12/// configured for the hash size and controls the schema of the storage.
13///
14/// OnDiskGraphDB defines:
15///
16/// - How the data is stored inside database, either as a standalone file, or
17/// allocated inside a datapool.
18/// - How references to other objects inside the same database is stored. They
19/// are stored as internal references, instead of full hash value to save
20/// space.
21/// - How to chain databases together and import objects from upstream
22/// databases.
23///
24/// Here's a top-level description of the current layout:
25///
26/// - db/index.<version>: a file for the "index" table, named by \a
27/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
28/// that are accessed atomically, describing the object kind and where/how
29/// it's stored (including an optional file offset). See \a TrieRecord for
30/// more details.
31/// - db/data.<version>: a file for the "data" table, named by \a
32/// DataPoolTableName and managed by \a DataStore. New objects within
33/// TrieRecord::MaxEmbeddedSize are inserted here as \a
34/// TrieRecord::StorageKind::DataPool.
35/// - db/obj.<offset>.<version>: a file storing an object outside the main
36/// "data" table, named by its offset into the "index" table, with the
37/// format of \a TrieRecord::StorageKind::Standalone.
38/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
39/// main "data" table, named by its offset into the "index" table, with
40/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
41/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
42/// outside the main "data" table, named by its offset into the "index" table,
43/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
44//
45//===----------------------------------------------------------------------===//
46
48#include "OnDiskCommon.h"
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/ScopeExit.h"
56#include "llvm/Support/Errc.h"
57#include "llvm/Support/Error.h"
62#include "llvm/Support/Path.h"
64#include <atomic>
65#include <mutex>
66#include <optional>
67
68#define DEBUG_TYPE "on-disk-cas"
69
70using namespace llvm;
71using namespace llvm::cas;
72using namespace llvm::cas::ondisk;
73
74static constexpr StringLiteral IndexTableName = "llvm.cas.index";
75static constexpr StringLiteral DataPoolTableName = "llvm.cas.data";
76
77static constexpr StringLiteral IndexFilePrefix = "index.";
78static constexpr StringLiteral DataPoolFilePrefix = "data.";
79
80static constexpr StringLiteral FilePrefixObject = "obj.";
81static constexpr StringLiteral FilePrefixLeaf = "leaf.";
82static constexpr StringLiteral FilePrefixLeaf0 = "leaf+0.";
83
85 if (!ID)
86 return ID.takeError();
87
89 "corrupt object '" + toHex(*ID) + "'");
90}
91
92namespace {
93
94/// Trie record data: 8 bytes, atomic<uint64_t>
95/// - 1-byte: StorageKind
96/// - 7-bytes: DataStoreOffset (offset into referenced file)
97class TrieRecord {
98public:
99 enum class StorageKind : uint8_t {
100 /// Unknown object.
101 Unknown = 0,
102
103 /// data.vX: main pool, full DataStore record.
104 DataPool = 1,
105
106 /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
107 Standalone = 10,
108
109 /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
110 /// exactly the data content and file size matches the data size. No refs.
111 StandaloneLeaf = 11,
112
113 /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
114 /// extra null character ('\0'). File size is 1 bigger than the data size.
115 /// No refs.
116 StandaloneLeaf0 = 12,
117 };
118
119 static StringRef getStandaloneFilePrefix(StorageKind SK) {
120 switch (SK) {
121 default:
122 llvm_unreachable("Expected standalone storage kind");
123 case TrieRecord::StorageKind::Standalone:
124 return FilePrefixObject;
125 case TrieRecord::StorageKind::StandaloneLeaf:
126 return FilePrefixLeaf;
127 case TrieRecord::StorageKind::StandaloneLeaf0:
128 return FilePrefixLeaf0;
129 }
130 }
131
132 enum Limits : int64_t {
133 /// Saves files bigger than 64KB standalone instead of embedding them.
134 MaxEmbeddedSize = 64LL * 1024LL - 1,
135 };
136
137 struct Data {
138 StorageKind SK = StorageKind::Unknown;
139 FileOffset Offset;
140 };
141
142 /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
143 static uint64_t pack(Data D) {
144 assert(D.Offset.get() < (int64_t)(1ULL << 56));
145 uint64_t Packed = uint64_t(D.SK) << 56 | D.Offset.get();
146 assert(D.SK != StorageKind::Unknown || Packed == 0);
147#ifndef NDEBUG
148 Data RoundTrip = unpack(Packed);
149 assert(D.SK == RoundTrip.SK);
150 assert(D.Offset.get() == RoundTrip.Offset.get());
151#endif
152 return Packed;
153 }
154
155 // Unpack TrieRecord into Data.
156 static Data unpack(uint64_t Packed) {
157 Data D;
158 if (!Packed)
159 return D;
160 D.SK = (StorageKind)(Packed >> 56);
161 D.Offset = FileOffset(Packed & (UINT64_MAX >> 8));
162 return D;
163 }
164
165 TrieRecord() : Storage(0) {}
166
167 Data load() const { return unpack(Storage); }
168 bool compare_exchange_strong(Data &Existing, Data New);
169
170private:
171 std::atomic<uint64_t> Storage;
172};
173
174/// DataStore record data: 4B + size? + refs? + data + 0
175/// - 4-bytes: Header
176/// - {0,4,8}-bytes: DataSize (may be packed in Header)
177/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
178/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
179/// - <data>
180/// - 1-byte: 0-term
181struct DataRecordHandle {
182 /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
183 /// convenience to avoid computing padding later.
184 enum class NumRefsFlags : uint8_t {
185 Uses0B = 0U,
186 Uses1B = 1U,
187 Uses2B = 2U,
188 Uses4B = 3U,
189 Uses8B = 4U,
190 Max = Uses8B,
191 };
192
193 /// DataSize storage: 8B, 4B, 2B, or 1B.
194 enum class DataSizeFlags {
195 Uses1B = 0U,
196 Uses2B = 1U,
197 Uses4B = 2U,
198 Uses8B = 3U,
199 Max = Uses8B,
200 };
201
202 /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
203 enum class RefKindFlags {
204 InternalRef = 0U,
205 InternalRef4B = 1U,
206 Max = InternalRef4B,
207 };
208
209 enum Counts : int {
210 NumRefsShift = 0,
211 NumRefsBits = 3,
212 DataSizeShift = NumRefsShift + NumRefsBits,
213 DataSizeBits = 2,
214 RefKindShift = DataSizeShift + DataSizeBits,
215 RefKindBits = 1,
216 };
217 static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
218 0,
219 "Not enough bits");
220 static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
221 0,
222 "Not enough bits");
223 static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
224 0,
225 "Not enough bits");
226
227 /// Layout of the DataRecordHandle and how to decode it.
228 struct LayoutFlags {
229 NumRefsFlags NumRefs;
230 DataSizeFlags DataSize;
231 RefKindFlags RefKind;
232
233 static uint64_t pack(LayoutFlags LF) {
234 unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
235 ((unsigned)LF.DataSize << DataSizeShift) |
236 ((unsigned)LF.RefKind << RefKindShift);
237#ifndef NDEBUG
238 LayoutFlags RoundTrip = unpack(Packed);
239 assert(LF.NumRefs == RoundTrip.NumRefs);
240 assert(LF.DataSize == RoundTrip.DataSize);
241 assert(LF.RefKind == RoundTrip.RefKind);
242#endif
243 return Packed;
244 }
245 static LayoutFlags unpack(uint64_t Storage) {
246 assert(Storage <= UINT8_MAX && "Expect storage to fit in a byte");
247 LayoutFlags LF;
248 LF.NumRefs =
249 (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
250 LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
251 ((1U << DataSizeBits) - 1));
252 LF.RefKind =
253 (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
254 return LF;
255 }
256 };
257
258 /// Header layout:
259 /// - 1-byte: LayoutFlags
260 /// - 1-byte: 1B size field
261 /// - {0,2}-bytes: 2B size field
262 struct Header {
263 using PackTy = uint32_t;
264 PackTy Packed;
265
266 static constexpr unsigned LayoutFlagsShift =
267 (sizeof(PackTy) - 1) * CHAR_BIT;
268 };
269
270 struct Input {
271 InternalRefArrayRef Refs;
272 ArrayRef<char> Data;
273 };
274
275 LayoutFlags getLayoutFlags() const {
276 return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
277 }
278
279 uint64_t getDataSize() const;
280 void skipDataSize(LayoutFlags LF, int64_t &RelOffset) const;
281 uint32_t getNumRefs() const;
282 void skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const;
283 int64_t getRefsRelOffset() const;
284 int64_t getDataRelOffset() const;
285
286 static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t DataSize) {
287 return DataRelOffset + DataSize + 1;
288 }
289 uint64_t getTotalSize() const {
290 return getDataRelOffset() + getDataSize() + 1;
291 }
292
293 /// Describe the layout of data stored and how to decode from
294 /// DataRecordHandle.
295 struct Layout {
296 explicit Layout(const Input &I);
297
298 LayoutFlags Flags;
299 uint64_t DataSize = 0;
300 uint32_t NumRefs = 0;
301 int64_t RefsRelOffset = 0;
302 int64_t DataRelOffset = 0;
303 uint64_t getTotalSize() const {
304 return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
305 }
306 };
307
308 InternalRefArrayRef getRefs() const {
309 assert(H && "Expected valid handle");
310 auto *BeginByte = reinterpret_cast<const char *>(H) + getRefsRelOffset();
311 size_t Size = getNumRefs();
312 if (!Size)
313 return InternalRefArrayRef();
314 if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
315 return ArrayRef(reinterpret_cast<const InternalRef4B *>(BeginByte), Size);
316 return ArrayRef(reinterpret_cast<const InternalRef *>(BeginByte), Size);
317 }
318
319 ArrayRef<char> getData() const {
320 assert(H && "Expected valid handle");
321 return ArrayRef(reinterpret_cast<const char *>(H) + getDataRelOffset(),
322 getDataSize());
323 }
324
325 static DataRecordHandle create(function_ref<char *(size_t Size)> Alloc,
326 const Input &I);
327 static Expected<DataRecordHandle>
328 createWithError(function_ref<Expected<char *>(size_t Size)> Alloc,
329 const Input &I);
330 static DataRecordHandle construct(char *Mem, const Input &I);
331
332 static DataRecordHandle get(const char *Mem) {
333 return DataRecordHandle(
334 *reinterpret_cast<const DataRecordHandle::Header *>(Mem));
335 }
336 static Expected<DataRecordHandle>
337 getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset);
338
339 explicit operator bool() const { return H; }
340 const Header &getHeader() const { return *H; }
341
342 DataRecordHandle() = default;
343 explicit DataRecordHandle(const Header &H) : H(&H) {}
344
345private:
346 static DataRecordHandle constructImpl(char *Mem, const Input &I,
347 const Layout &L);
348 const Header *H = nullptr;
349};
350
351/// Proxy for any on-disk object or raw data.
352struct OnDiskContent {
353 std::optional<DataRecordHandle> Record;
354 std::optional<ArrayRef<char>> Bytes;
355};
356
357/// Data loaded inside the memory from standalone file.
358class StandaloneDataInMemory {
359public:
360 OnDiskContent getContent() const;
361
362 StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
363 TrieRecord::StorageKind SK)
364 : Region(std::move(Region)), SK(SK) {
365#ifndef NDEBUG
366 bool IsStandalone = false;
367 switch (SK) {
368 case TrieRecord::StorageKind::Standalone:
369 case TrieRecord::StorageKind::StandaloneLeaf:
370 case TrieRecord::StorageKind::StandaloneLeaf0:
371 IsStandalone = true;
372 break;
373 default:
374 break;
375 }
376 assert(IsStandalone);
377#endif
378 }
379
380private:
381 std::unique_ptr<sys::fs::mapped_file_region> Region;
382 TrieRecord::StorageKind SK;
383};
384
385/// Container to lookup loaded standalone objects.
386template <size_t NumShards> class StandaloneDataMap {
387 static_assert(isPowerOf2_64(NumShards), "Expected power of 2");
388
389public:
390 uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
391 std::unique_ptr<sys::fs::mapped_file_region> Region);
392
393 const StandaloneDataInMemory *lookup(ArrayRef<uint8_t> Hash) const;
394 bool count(ArrayRef<uint8_t> Hash) const { return bool(lookup(Hash)); }
395
396private:
397 struct Shard {
398 /// Needs to store a std::unique_ptr for a stable address identity.
399 DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
400 mutable std::mutex Mutex;
401 };
402 Shard &getShard(ArrayRef<uint8_t> Hash) {
403 return const_cast<Shard &>(
404 const_cast<const StandaloneDataMap *>(this)->getShard(Hash));
405 }
406 const Shard &getShard(ArrayRef<uint8_t> Hash) const {
407 static_assert(NumShards <= 256, "Expected only 8 bits of shard");
408 return Shards[Hash[0] % NumShards];
409 }
410
411 Shard Shards[NumShards];
412};
413
414using StandaloneDataMapTy = StandaloneDataMap<16>;
415
416/// A vector of internal node references.
417class InternalRefVector {
418public:
419 void push_back(InternalRef Ref) {
420 if (NeedsFull)
421 return FullRefs.push_back(Ref);
422 if (std::optional<InternalRef4B> Small = InternalRef4B::tryToShrink(Ref))
423 return SmallRefs.push_back(*Small);
424 NeedsFull = true;
425 assert(FullRefs.empty());
426 FullRefs.reserve(SmallRefs.size() + 1);
427 for (InternalRef4B Small : SmallRefs)
428 FullRefs.push_back(Small);
429 FullRefs.push_back(Ref);
430 SmallRefs.clear();
431 }
432
433 operator InternalRefArrayRef() const {
434 assert(SmallRefs.empty() || FullRefs.empty());
435 return NeedsFull ? InternalRefArrayRef(FullRefs)
436 : InternalRefArrayRef(SmallRefs);
437 }
438
439private:
440 bool NeedsFull = false;
443};
444
445} // namespace
446
447Expected<DataRecordHandle> DataRecordHandle::createWithError(
448 function_ref<Expected<char *>(size_t Size)> Alloc, const Input &I) {
449 Layout L(I);
450 if (Expected<char *> Mem = Alloc(L.getTotalSize()))
451 return constructImpl(*Mem, I, L);
452 else
453 return Mem.takeError();
454}
455
457 // Store the file offset as it is.
458 assert(!(Offset.get() & 0x1));
459 return ObjectHandle(Offset.get());
460}
461
463 // Store the pointer from memory with lowest bit set.
464 assert(!(Ptr & 0x1));
465 return ObjectHandle(Ptr | 1);
466}
467
468/// Proxy for an on-disk index record.
474
475template <size_t N>
476uintptr_t StandaloneDataMap<N>::insert(
477 ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
478 std::unique_ptr<sys::fs::mapped_file_region> Region) {
479 auto &S = getShard(Hash);
480 std::lock_guard<std::mutex> Lock(S.Mutex);
481 auto &V = S.Map[Hash.data()];
482 if (!V)
483 V = std::make_unique<StandaloneDataInMemory>(std::move(Region), SK);
484 return reinterpret_cast<uintptr_t>(V.get());
485}
486
487template <size_t N>
488const StandaloneDataInMemory *
489StandaloneDataMap<N>::lookup(ArrayRef<uint8_t> Hash) const {
490 auto &S = getShard(Hash);
491 std::lock_guard<std::mutex> Lock(S.Mutex);
492 auto I = S.Map.find(Hash.data());
493 if (I == S.Map.end())
494 return nullptr;
495 return &*I->second;
496}
497
498namespace {
499
500/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
501/// expensive to register/unregister at this rate.
502///
503/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
504/// files and has a signal handler registerd that removes them all.
505class TempFile {
506 bool Done = false;
507 TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {}
508
509public:
510 /// This creates a temporary file with createUniqueFile.
511 static Expected<TempFile> create(const Twine &Model);
512 TempFile(TempFile &&Other) { *this = std::move(Other); }
513 TempFile &operator=(TempFile &&Other) {
514 TmpName = std::move(Other.TmpName);
515 FD = Other.FD;
516 Other.Done = true;
517 Other.FD = -1;
518 return *this;
519 }
520
521 // Name of the temporary file.
522 std::string TmpName;
523
524 // The open file descriptor.
525 int FD = -1;
526
527 // Keep this with the given name.
528 Error keep(const Twine &Name);
529 Error discard();
530
531 // This checks that keep or delete was called.
532 ~TempFile() { consumeError(discard()); }
533};
534
535class MappedTempFile {
536public:
537 char *data() const { return Map.data(); }
538 size_t size() const { return Map.size(); }
539
540 Error discard() {
541 assert(Map && "Map already destroyed");
542 Map.unmap();
543 return Temp.discard();
544 }
545
546 Error keep(const Twine &Name) {
547 assert(Map && "Map already destroyed");
548 Map.unmap();
549 return Temp.keep(Name);
550 }
551
552 MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
553 : Temp(std::move(Temp)), Map(std::move(Map)) {}
554
555private:
556 TempFile Temp;
557 sys::fs::mapped_file_region Map;
558};
559} // namespace
560
562 Done = true;
563 if (FD != -1) {
565 if (std::error_code EC = sys::fs::closeFile(File))
566 return errorCodeToError(EC);
567 }
568 FD = -1;
569
570 // Always try to close and remove.
571 std::error_code RemoveEC;
572 if (!TmpName.empty()) {
573 std::error_code EC = sys::fs::remove(TmpName);
574 if (EC)
575 return errorCodeToError(EC);
576 }
577 TmpName = "";
578
579 return Error::success();
580}
581
583 assert(!Done);
584 Done = true;
585 // Always try to close and rename.
586 std::error_code RenameEC = sys::fs::rename(TmpName, Name);
587
588 if (!RenameEC)
589 TmpName = "";
590
592 if (std::error_code EC = sys::fs::closeFile(File))
593 return errorCodeToError(EC);
594 FD = -1;
595
596 return errorCodeToError(RenameEC);
597}
598
600 int FD;
601 SmallString<128> ResultPath;
602 if (std::error_code EC = sys::fs::createUniqueFile(Model, FD, ResultPath))
603 return errorCodeToError(EC);
604
605 TempFile Ret(ResultPath, FD);
606 return std::move(Ret);
607}
608
609bool TrieRecord::compare_exchange_strong(Data &Existing, Data New) {
610 uint64_t ExistingPacked = pack(Existing);
611 uint64_t NewPacked = pack(New);
612 if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
613 return true;
614 Existing = unpack(ExistingPacked);
615 return false;
616}
617
619DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool,
621 auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header));
622 if (!HeaderData)
623 return HeaderData.takeError();
624
625 auto Record = DataRecordHandle::get(HeaderData->data());
626 if (Record.getTotalSize() + Offset.get() > Pool.size())
627 return createStringError(
628 make_error_code(std::errc::illegal_byte_sequence),
629 "data record span passed the end of the data pool");
630
631 return Record;
632}
633
634DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I,
635 const Layout &L) {
636 char *Next = Mem + sizeof(Header);
637
638 // Fill in Packed and set other data, then come back to construct the header.
639 Header::PackTy Packed = 0;
640 Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
641
642 // Construct DataSize.
643 switch (L.Flags.DataSize) {
644 case DataSizeFlags::Uses1B:
645 assert(I.Data.size() <= UINT8_MAX);
646 Packed |= (Header::PackTy)I.Data.size()
647 << ((sizeof(Packed) - 2) * CHAR_BIT);
648 break;
649 case DataSizeFlags::Uses2B:
650 assert(I.Data.size() <= UINT16_MAX);
651 Packed |= (Header::PackTy)I.Data.size()
652 << ((sizeof(Packed) - 4) * CHAR_BIT);
653 break;
654 case DataSizeFlags::Uses4B:
655 support::endian::write32le(Next, I.Data.size());
656 Next += 4;
657 break;
658 case DataSizeFlags::Uses8B:
659 support::endian::write64le(Next, I.Data.size());
660 Next += 8;
661 break;
662 }
663
664 // Construct NumRefs.
665 //
666 // NOTE: May be writing NumRefs even if there are zero refs in order to fix
667 // alignment.
668 switch (L.Flags.NumRefs) {
669 case NumRefsFlags::Uses0B:
670 break;
671 case NumRefsFlags::Uses1B:
672 assert(I.Refs.size() <= UINT8_MAX);
673 Packed |= (Header::PackTy)I.Refs.size()
674 << ((sizeof(Packed) - 2) * CHAR_BIT);
675 break;
676 case NumRefsFlags::Uses2B:
677 assert(I.Refs.size() <= UINT16_MAX);
678 Packed |= (Header::PackTy)I.Refs.size()
679 << ((sizeof(Packed) - 4) * CHAR_BIT);
680 break;
681 case NumRefsFlags::Uses4B:
682 support::endian::write32le(Next, I.Refs.size());
683 Next += 4;
684 break;
685 case NumRefsFlags::Uses8B:
686 support::endian::write64le(Next, I.Refs.size());
687 Next += 8;
688 break;
689 }
690
691 // Construct Refs[].
692 if (!I.Refs.empty()) {
693 assert((L.Flags.RefKind == RefKindFlags::InternalRef4B) == I.Refs.is4B());
694 ArrayRef<uint8_t> RefsBuffer = I.Refs.getBuffer();
695 llvm::copy(RefsBuffer, Next);
696 Next += RefsBuffer.size();
697 }
698
699 // Construct Data and the trailing null.
701 llvm::copy(I.Data, Next);
702 Next[I.Data.size()] = 0;
703
704 // Construct the header itself and return.
705 Header *H = new (Mem) Header{Packed};
706 DataRecordHandle Record(*H);
707 assert(Record.getData() == I.Data);
708 assert(Record.getNumRefs() == I.Refs.size());
709 assert(Record.getRefs() == I.Refs);
710 assert(Record.getLayoutFlags().DataSize == L.Flags.DataSize);
711 assert(Record.getLayoutFlags().NumRefs == L.Flags.NumRefs);
712 assert(Record.getLayoutFlags().RefKind == L.Flags.RefKind);
713 return Record;
714}
715
716DataRecordHandle::Layout::Layout(const Input &I) {
717 // Start initial relative offsets right after the Header.
718 uint64_t RelOffset = sizeof(Header);
719
720 // Initialize the easy stuff.
721 DataSize = I.Data.size();
722 NumRefs = I.Refs.size();
723
724 // Check refs size.
725 Flags.RefKind =
726 I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
727
728 // Find the smallest slot available for DataSize.
729 bool Has1B = true;
730 bool Has2B = true;
731 if (DataSize <= UINT8_MAX && Has1B) {
732 Flags.DataSize = DataSizeFlags::Uses1B;
733 Has1B = false;
734 } else if (DataSize <= UINT16_MAX && Has2B) {
735 Flags.DataSize = DataSizeFlags::Uses2B;
736 Has2B = false;
737 } else if (DataSize <= UINT32_MAX) {
738 Flags.DataSize = DataSizeFlags::Uses4B;
739 RelOffset += 4;
740 } else {
741 Flags.DataSize = DataSizeFlags::Uses8B;
742 RelOffset += 8;
743 }
744
745 // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
746 if (!NumRefs) {
747 Flags.NumRefs = NumRefsFlags::Uses0B;
748 } else if (NumRefs <= UINT8_MAX && Has1B) {
749 Flags.NumRefs = NumRefsFlags::Uses1B;
750 Has1B = false;
751 } else if (NumRefs <= UINT16_MAX && Has2B) {
752 Flags.NumRefs = NumRefsFlags::Uses2B;
753 Has2B = false;
754 } else {
755 Flags.NumRefs = NumRefsFlags::Uses4B;
756 RelOffset += 4;
757 }
758
759 // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
760 // padding rules when reading and writing. This also bumps RelOffset.
761 //
762 // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
763 // stored as 8B. This means we can *always* find a size to grow.
764 //
765 // NOTE: Only call this once.
766 auto GrowSizeFieldsBy4B = [&]() {
767 assert(isAligned(Align(4), RelOffset));
768 RelOffset += 4;
769
770 assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
771 "Expected to be able to grow NumRefs8B");
772
773 // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
774 // DataSize is upgraded to 8B it'll already be aligned.
775 //
776 // Failing that, grow NumRefs.
777 if (Flags.DataSize < DataSizeFlags::Uses4B)
778 Flags.DataSize = DataSizeFlags::Uses4B; // DataSize: Packed => 4B.
779 else if (Flags.DataSize < DataSizeFlags::Uses8B)
780 Flags.DataSize = DataSizeFlags::Uses8B; // DataSize: 4B => 8B.
781 else if (Flags.NumRefs < NumRefsFlags::Uses4B)
782 Flags.NumRefs = NumRefsFlags::Uses4B; // NumRefs: Packed => 4B.
783 else
784 Flags.NumRefs = NumRefsFlags::Uses8B; // NumRefs: 4B => 8B.
785 };
786
787 assert(isAligned(Align(4), RelOffset));
788 if (Flags.RefKind == RefKindFlags::InternalRef) {
789 // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
790 // without padding.
791 if (!isAligned(Align(8), RelOffset))
792 GrowSizeFieldsBy4B();
793
794 assert(isAligned(Align(8), RelOffset));
795 RefsRelOffset = RelOffset;
796 RelOffset += 8 * NumRefs;
797 } else {
798 // The array of 4B refs doesn't need 8B alignment, but the data will need
799 // to be 8B-aligned. Detect this now, and, if necessary, shift everything
800 // by 4B by growing one of the sizes.
801 // If we remove the need for 8B-alignment for data there is <1% savings in
802 // disk storage for a clang build using MCCAS but the 8B-alignment may be
803 // useful in the future so keep it for now.
804 uint64_t RefListSize = 4 * NumRefs;
805 if (!isAligned(Align(8), RelOffset + RefListSize))
806 GrowSizeFieldsBy4B();
807 RefsRelOffset = RelOffset;
808 RelOffset += RefListSize;
809 }
810
811 assert(isAligned(Align(8), RelOffset));
812 DataRelOffset = RelOffset;
813}
814
815uint64_t DataRecordHandle::getDataSize() const {
816 int64_t RelOffset = sizeof(Header);
817 auto *DataSizePtr = reinterpret_cast<const char *>(H) + RelOffset;
818 switch (getLayoutFlags().DataSize) {
819 case DataSizeFlags::Uses1B:
820 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
821 case DataSizeFlags::Uses2B:
822 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
823 UINT16_MAX;
824 case DataSizeFlags::Uses4B:
825 return support::endian::read32le(DataSizePtr);
826 case DataSizeFlags::Uses8B:
827 return support::endian::read64le(DataSizePtr);
828 }
829 llvm_unreachable("Unknown DataSizeFlags enum");
830}
831
832void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
833 if (LF.DataSize >= DataSizeFlags::Uses4B)
834 RelOffset += 4;
835 if (LF.DataSize >= DataSizeFlags::Uses8B)
836 RelOffset += 4;
837}
838
839uint32_t DataRecordHandle::getNumRefs() const {
840 LayoutFlags LF = getLayoutFlags();
841 int64_t RelOffset = sizeof(Header);
842 skipDataSize(LF, RelOffset);
843 auto *NumRefsPtr = reinterpret_cast<const char *>(H) + RelOffset;
844 switch (LF.NumRefs) {
845 case NumRefsFlags::Uses0B:
846 return 0;
847 case NumRefsFlags::Uses1B:
848 return (H->Packed >> ((sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
849 case NumRefsFlags::Uses2B:
850 return (H->Packed >> ((sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
851 UINT16_MAX;
852 case NumRefsFlags::Uses4B:
853 return support::endian::read32le(NumRefsPtr);
854 case NumRefsFlags::Uses8B:
855 return support::endian::read64le(NumRefsPtr);
856 }
857 llvm_unreachable("Unknown NumRefsFlags enum");
858}
859
860void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
861 if (LF.NumRefs >= NumRefsFlags::Uses4B)
862 RelOffset += 4;
863 if (LF.NumRefs >= NumRefsFlags::Uses8B)
864 RelOffset += 4;
865}
866
867int64_t DataRecordHandle::getRefsRelOffset() const {
868 LayoutFlags LF = getLayoutFlags();
869 int64_t RelOffset = sizeof(Header);
870 skipDataSize(LF, RelOffset);
871 skipNumRefs(LF, RelOffset);
872 return RelOffset;
873}
874
875int64_t DataRecordHandle::getDataRelOffset() const {
876 LayoutFlags LF = getLayoutFlags();
877 int64_t RelOffset = sizeof(Header);
878 skipDataSize(LF, RelOffset);
879 skipNumRefs(LF, RelOffset);
880 uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
881 RelOffset += RefSize * getNumRefs();
882 return RelOffset;
883}
884
886 if (UpstreamDB) {
887 if (auto E = UpstreamDB->validate(Deep, Hasher))
888 return E;
889 }
890 return Index.validate([&](FileOffset Offset,
892 -> Error {
893 auto formatError = [&](Twine Msg) {
894 return createStringError(
896 "bad record at 0x" +
897 utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
898 Msg.str());
899 };
900
901 if (Record.Data.size() != sizeof(TrieRecord))
902 return formatError("wrong data record size");
903 if (!isAligned(Align::Of<TrieRecord>(), Record.Data.size()))
904 return formatError("wrong data record alignment");
905
906 auto *R = reinterpret_cast<const TrieRecord *>(Record.Data.data());
907 TrieRecord::Data D = R->load();
908 std::unique_ptr<MemoryBuffer> FileBuffer;
909 if ((uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Unknown &&
910 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::DataPool &&
911 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::Standalone &&
912 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf &&
913 (uint8_t)D.SK != (uint8_t)TrieRecord::StorageKind::StandaloneLeaf0)
914 return formatError("invalid record kind value");
915
917 auto I = getIndexProxyFromRef(Ref);
918 if (!I)
919 return I.takeError();
920
921 switch (D.SK) {
922 case TrieRecord::StorageKind::Unknown:
923 // This could be an abandoned entry due to a termination before updating
924 // the record. It can be reused by later insertion so just skip this entry
925 // for now.
926 return Error::success();
927 case TrieRecord::StorageKind::DataPool:
928 // Check offset is a postive value, and large enough to hold the
929 // header for the data record.
930 if (D.Offset.get() <= 0 ||
931 D.Offset.get() + sizeof(DataRecordHandle::Header) >= DataPool.size())
932 return formatError("datapool record out of bound");
933 break;
934 case TrieRecord::StorageKind::Standalone:
935 case TrieRecord::StorageKind::StandaloneLeaf:
936 case TrieRecord::StorageKind::StandaloneLeaf0:
937 SmallString<256> Path;
938 getStandalonePath(TrieRecord::getStandaloneFilePrefix(D.SK), *I, Path);
939 // If need to validate the content of the file later, just load the
940 // buffer here. Otherwise, just check the existance of the file.
941 if (Deep) {
942 auto File = MemoryBuffer::getFile(Path, /*IsText=*/false,
943 /*RequiresNullTerminator=*/false);
944 if (!File || !*File)
945 return formatError("record file \'" + Path + "\' does not exist");
946
947 FileBuffer = std::move(*File);
948 } else if (!llvm::sys::fs::exists(Path))
949 return formatError("record file \'" + Path + "\' does not exist");
950 }
951
952 if (!Deep)
953 return Error::success();
954
955 auto dataError = [&](Twine Msg) {
957 "bad data for digest \'" + toHex(I->Hash) +
958 "\': " + Msg.str());
959 };
961 ArrayRef<char> StoredData;
962
963 switch (D.SK) {
964 case TrieRecord::StorageKind::Unknown:
965 llvm_unreachable("already handled");
966 case TrieRecord::StorageKind::DataPool: {
967 auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset);
968 if (!DataRecord)
969 return dataError(toString(DataRecord.takeError()));
970
971 for (auto InternRef : DataRecord->getRefs()) {
972 auto Index = getIndexProxyFromRef(InternRef);
973 if (!Index)
974 return Index.takeError();
975 Refs.push_back(Index->Hash);
976 }
977 StoredData = DataRecord->getData();
978 break;
979 }
980 case TrieRecord::StorageKind::Standalone: {
981 if (FileBuffer->getBufferSize() < sizeof(DataRecordHandle::Header))
982 return dataError("data record is not big enough to read the header");
983 auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
984 if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
985 return dataError(
986 "data record span passed the end of the standalone file");
987 for (auto InternRef : DataRecord.getRefs()) {
988 auto Index = getIndexProxyFromRef(InternRef);
989 if (!Index)
990 return Index.takeError();
991 Refs.push_back(Index->Hash);
992 }
993 StoredData = DataRecord.getData();
994 break;
995 }
996 case TrieRecord::StorageKind::StandaloneLeaf:
997 case TrieRecord::StorageKind::StandaloneLeaf0: {
998 StoredData = arrayRefFromStringRef<char>(FileBuffer->getBuffer());
999 if (D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
1000 if (!FileBuffer->getBuffer().ends_with('\0'))
1001 return dataError("standalone file is not zero terminated");
1002 StoredData = StoredData.drop_back(1);
1003 }
1004 break;
1005 }
1006 }
1007
1008 SmallVector<uint8_t> ComputedHash;
1009 Hasher(Refs, StoredData, ComputedHash);
1010 if (I->Hash != ArrayRef(ComputedHash))
1011 return dataError("hash mismatch, got \'" + toHex(ComputedHash) +
1012 "\' instead");
1013
1014 return Error::success();
1015 });
1016}
1017
1019 auto formatError = [&](Twine Msg) {
1020 return createStringError(
1022 "bad ref=0x" +
1023 utohexstr(ExternalRef.getOpaqueData(), /*LowerCase=*/true) + ": " +
1024 Msg.str());
1025 };
1026
1027 if (ExternalRef.getOpaqueData() == 0)
1028 return formatError("zero is not a valid ref");
1029
1030 InternalRef InternalRef = getInternalRef(ExternalRef);
1031 auto I = getIndexProxyFromRef(InternalRef);
1032 if (!I)
1033 return formatError(llvm::toString(I.takeError()));
1034 auto Hash = getDigest(*I);
1035
1036 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Hash);
1037 if (!P)
1038 return formatError("not found using hash " + toHex(Hash));
1039 IndexProxy OtherI = getIndexProxyFromPointer(P);
1040 ObjectID OtherRef = getExternalReference(makeInternalRef(OtherI.Offset));
1041 if (OtherRef != ExternalRef)
1042 return formatError("ref does not match indexed offset " +
1043 utohexstr(OtherRef.getOpaqueData(), /*LowerCase=*/true) +
1044 " for hash " + toHex(Hash));
1045 return Error::success();
1046}
1047
1049 OS << "on-disk-root-path: " << RootPath << "\n";
1050
1051 struct PoolInfo {
1053 };
1055
1056 OS << "\n";
1057 OS << "index:\n";
1058 Index.print(OS, [&](ArrayRef<char> Data) {
1059 assert(Data.size() == sizeof(TrieRecord));
1061 auto *R = reinterpret_cast<const TrieRecord *>(Data.data());
1062 TrieRecord::Data D = R->load();
1063 OS << " SK=";
1064 switch (D.SK) {
1065 case TrieRecord::StorageKind::Unknown:
1066 OS << "unknown ";
1067 break;
1068 case TrieRecord::StorageKind::DataPool:
1069 OS << "datapool ";
1070 Pool.push_back({D.Offset.get()});
1071 break;
1072 case TrieRecord::StorageKind::Standalone:
1073 OS << "standalone-data ";
1074 break;
1075 case TrieRecord::StorageKind::StandaloneLeaf:
1076 OS << "standalone-leaf ";
1077 break;
1078 case TrieRecord::StorageKind::StandaloneLeaf0:
1079 OS << "standalone-leaf+0";
1080 break;
1081 }
1082 OS << " Offset=" << (void *)D.Offset.get();
1083 });
1084 if (Pool.empty())
1085 return;
1086
1087 OS << "\n";
1088 OS << "pool:\n";
1089 llvm::sort(
1090 Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; });
1091 for (PoolInfo PI : Pool) {
1092 OS << "- addr=" << (void *)PI.Offset << " ";
1093 auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset));
1094 if (!D) {
1095 OS << "error: " << toString(D.takeError());
1096 return;
1097 }
1098
1099 OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize()
1100 << " size=" << D->getTotalSize()
1101 << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n";
1102 }
1103}
1104
1106OnDiskGraphDB::indexHash(ArrayRef<uint8_t> Hash) {
1107 auto P = Index.insertLazy(
1108 Hash, [](FileOffset TentativeOffset,
1109 OnDiskTrieRawHashMap::ValueProxy TentativeValue) {
1110 assert(TentativeValue.Data.size() == sizeof(TrieRecord));
1111 assert(
1112 isAddrAligned(Align::Of<TrieRecord>(), TentativeValue.Data.data()));
1113 new (TentativeValue.Data.data()) TrieRecord();
1114 });
1115 if (LLVM_UNLIKELY(!P))
1116 return P.takeError();
1117
1118 assert(*P && "Expected insertion");
1119 return getIndexProxyFromPointer(*P);
1120}
1121
1122OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer(
1124 assert(P);
1125 assert(P.getOffset());
1126 return IndexProxy{P.getOffset(), P->Hash,
1127 *const_cast<TrieRecord *>(
1128 reinterpret_cast<const TrieRecord *>(P->Data.data()))};
1129}
1130
1132 auto I = indexHash(Hash);
1133 if (LLVM_UNLIKELY(!I))
1134 return I.takeError();
1135 return getExternalReference(*I);
1136}
1137
1138ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) {
1139 return getExternalReference(makeInternalRef(I.Offset));
1140}
1141
1142std::optional<ObjectID>
1144 bool CheckUpstream) {
1145 auto tryUpstream =
1146 [&](std::optional<IndexProxy> I) -> std::optional<ObjectID> {
1147 if (!CheckUpstream || !UpstreamDB)
1148 return std::nullopt;
1149 std::optional<ObjectID> UpstreamID =
1150 UpstreamDB->getExistingReference(Digest);
1151 if (LLVM_UNLIKELY(!UpstreamID))
1152 return std::nullopt;
1153 auto Ref = expectedToOptional(indexHash(Digest));
1154 if (!Ref)
1155 return std::nullopt;
1156 if (!I)
1157 I.emplace(*Ref);
1158 return getExternalReference(*I);
1159 };
1160
1161 OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest);
1162 if (!P)
1163 return tryUpstream(std::nullopt);
1164 IndexProxy I = getIndexProxyFromPointer(P);
1165 TrieRecord::Data Obj = I.Ref.load();
1166 if (Obj.SK == TrieRecord::StorageKind::Unknown)
1167 return tryUpstream(I);
1168 return getExternalReference(makeInternalRef(I.Offset));
1169}
1170
1172OnDiskGraphDB::getIndexProxyFromRef(InternalRef Ref) const {
1173 auto P = Index.recoverFromFileOffset(Ref.getFileOffset());
1174 if (LLVM_UNLIKELY(!P))
1175 return P.takeError();
1176 return getIndexProxyFromPointer(*P);
1177}
1178
1180 auto I = getIndexProxyFromRef(Ref);
1181 if (!I)
1182 return I.takeError();
1183 return I->Hash;
1184}
1185
1186ArrayRef<uint8_t> OnDiskGraphDB::getDigest(const IndexProxy &I) const {
1187 return I.Hash;
1188}
1189
1190static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool,
1191 ObjectHandle OH) {
1192 // Decode ObjectHandle to locate the stored content.
1193 uint64_t Data = OH.getOpaqueData();
1194 if (Data & 1) {
1195 const auto *SDIM =
1196 reinterpret_cast<const StandaloneDataInMemory *>(Data & (-1ULL << 1));
1197 return SDIM->getContent();
1198 }
1199
1200 auto DataHandle =
1201 cantFail(DataRecordHandle::getFromDataPool(DataPool, FileOffset(Data)));
1202 assert(DataHandle.getData().end()[0] == 0 && "Null termination");
1203 return OnDiskContent{DataHandle, std::nullopt};
1204}
1205
1207 OnDiskContent Content = getContentFromHandle(DataPool, Node);
1208 if (Content.Bytes)
1209 return *Content.Bytes;
1210 assert(Content.Record && "Expected record or bytes");
1211 return Content.Record->getData();
1212}
1213
1214InternalRefArrayRef OnDiskGraphDB::getInternalRefs(ObjectHandle Node) const {
1215 if (std::optional<DataRecordHandle> Record =
1216 getContentFromHandle(DataPool, Node).Record)
1217 return Record->getRefs();
1218 return std::nullopt;
1219}
1220
1223 InternalRef Ref = getInternalRef(ExternalRef);
1224 auto I = getIndexProxyFromRef(Ref);
1225 if (!I)
1226 return I.takeError();
1227 TrieRecord::Data Object = I->Ref.load();
1228
1229 if (Object.SK == TrieRecord::StorageKind::Unknown)
1230 return faultInFromUpstream(ExternalRef);
1231
1232 if (Object.SK == TrieRecord::StorageKind::DataPool)
1233 return ObjectHandle::fromFileOffset(Object.Offset);
1234
1235 // Only TrieRecord::StorageKind::Standalone (and variants) need to be
1236 // explicitly loaded.
1237 //
1238 // There's corruption if standalone objects have offsets, or if we get here
1239 // for something that isn't standalone.
1240 if (Object.Offset)
1242 switch (Object.SK) {
1243 case TrieRecord::StorageKind::Unknown:
1244 case TrieRecord::StorageKind::DataPool:
1245 llvm_unreachable("unexpected storage kind");
1246 case TrieRecord::StorageKind::Standalone:
1247 case TrieRecord::StorageKind::StandaloneLeaf0:
1248 case TrieRecord::StorageKind::StandaloneLeaf:
1249 break;
1250 }
1251
1252 // Load it from disk.
1253 //
1254 // Note: Creation logic guarantees that data that needs null-termination is
1255 // suitably 0-padded. Requiring null-termination here would be too expensive
1256 // for extremely large objects that happen to be page-aligned.
1257 SmallString<256> Path;
1258 getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *I, Path);
1259
1260 auto BypassSandbox = sys::sandbox::scopedDisable();
1261
1262 auto File = sys::fs::openNativeFileForRead(Path);
1263 if (!File)
1264 return createFileError(Path, File.takeError());
1265
1266 llvm::scope_exit CloseFile([&]() { sys::fs::closeFile(*File); });
1267
1269 if (std::error_code EC = sys::fs::status(*File, Status))
1271
1272 std::error_code EC;
1273 auto Region = std::make_unique<sys::fs::mapped_file_region>(
1274 *File, sys::fs::mapped_file_region::readonly, Status.getSize(), 0, EC);
1275 if (EC)
1277
1279 static_cast<StandaloneDataMapTy *>(StandaloneData)
1280 ->insert(I->Hash, Object.SK, std::move(Region)));
1281}
1282
1284 auto Presence = getObjectPresence(Ref, /*CheckUpstream=*/true);
1285 if (!Presence)
1286 return Presence.takeError();
1287
1288 switch (*Presence) {
1289 case ObjectPresence::Missing:
1290 return false;
1291 case ObjectPresence::InPrimaryDB:
1292 return true;
1293 case ObjectPresence::OnlyInUpstreamDB:
1294 if (auto FaultInResult = faultInFromUpstream(Ref); !FaultInResult)
1295 return FaultInResult.takeError();
1296 return true;
1297 }
1298 llvm_unreachable("Unknown ObjectPresence enum");
1299}
1300
1302OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
1303 bool CheckUpstream) const {
1304 InternalRef Ref = getInternalRef(ExternalRef);
1305 auto I = getIndexProxyFromRef(Ref);
1306 if (!I)
1307 return I.takeError();
1308
1309 TrieRecord::Data Object = I->Ref.load();
1310 if (Object.SK != TrieRecord::StorageKind::Unknown)
1311 return ObjectPresence::InPrimaryDB;
1312
1313 if (!CheckUpstream || !UpstreamDB)
1314 return ObjectPresence::Missing;
1315
1316 std::optional<ObjectID> UpstreamID =
1317 UpstreamDB->getExistingReference(getDigest(*I));
1318 return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
1319 : ObjectPresence::Missing;
1320}
1321
1322InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) {
1323 return InternalRef::getFromOffset(IndexOffset);
1324}
1325
1326void OnDiskGraphDB::getStandalonePath(StringRef Prefix, const IndexProxy &I,
1327 SmallVectorImpl<char> &Path) const {
1328 Path.assign(RootPath.begin(), RootPath.end());
1329 sys::path::append(Path,
1330 Prefix + Twine(I.Offset.get()) + "." + CASFormatVersion);
1331}
1332
1333OnDiskContent StandaloneDataInMemory::getContent() const {
1334 bool Leaf0 = false;
1335 bool Leaf = false;
1336 switch (SK) {
1337 default:
1338 llvm_unreachable("Storage kind must be standalone");
1339 case TrieRecord::StorageKind::Standalone:
1340 break;
1341 case TrieRecord::StorageKind::StandaloneLeaf0:
1342 Leaf = Leaf0 = true;
1343 break;
1344 case TrieRecord::StorageKind::StandaloneLeaf:
1345 Leaf = true;
1346 break;
1347 }
1348
1349 if (Leaf) {
1350 StringRef Data(Region->data(), Region->size());
1351 assert(Data.drop_back(Leaf0).end()[0] == 0 &&
1352 "Standalone node data missing null termination");
1353 return OnDiskContent{std::nullopt,
1354 arrayRefFromStringRef<char>(Data.drop_back(Leaf0))};
1355 }
1356
1357 DataRecordHandle Record = DataRecordHandle::get(Region->data());
1358 assert(Record.getData().end()[0] == 0 &&
1359 "Standalone object record missing null termination for data");
1360 return OnDiskContent{Record, std::nullopt};
1361}
1362
1364 uint64_t Size) {
1365 auto BypassSandbox = sys::sandbox::scopedDisable();
1366
1367 assert(Size && "Unexpected request for an empty temp file");
1368 Expected<TempFile> File = TempFile::create(FinalPath + ".%%%%%%");
1369 if (!File)
1370 return File.takeError();
1371
1372 if (Error E = preallocateFileTail(File->FD, 0, Size).takeError())
1373 return createFileError(File->TmpName, std::move(E));
1374
1375 if (auto EC = sys::fs::resize_file_before_mapping_readwrite(File->FD, Size))
1376 return createFileError(File->TmpName, EC);
1377
1378 std::error_code EC;
1381 0, EC);
1382 if (EC)
1383 return createFileError(File->TmpName, EC);
1384 return MappedTempFile(std::move(*File), std::move(Map));
1385}
1386
1387static size_t getPageSize() {
1389 return PageSize;
1390}
1391
1392Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
1393 assert(Data.size() > TrieRecord::MaxEmbeddedSize &&
1394 "Expected a bigger file for external content...");
1395
1396 bool Leaf0 = isAligned(Align(getPageSize()), Data.size());
1397 TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
1398 : TrieRecord::StorageKind::StandaloneLeaf;
1399
1400 SmallString<256> Path;
1401 int64_t FileSize = Data.size() + Leaf0;
1402 getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK), I, Path);
1403
1404 auto BypassSandbox = sys::sandbox::scopedDisable();
1405
1406 // Write the file. Don't reuse this mapped_file_region, which is read/write.
1407 // Let load() pull up one that's read-only.
1408 Expected<MappedTempFile> File = createTempFile(Path, FileSize);
1409 if (!File)
1410 return File.takeError();
1411 assert(File->size() == (uint64_t)FileSize);
1412 llvm::copy(Data, File->data());
1413 if (Leaf0)
1414 File->data()[Data.size()] = 0;
1415 assert(File->data()[Data.size()] == 0);
1416 if (Error E = File->keep(Path))
1417 return E;
1418
1419 // Store the object reference.
1420 TrieRecord::Data Existing;
1421 {
1422 TrieRecord::Data Leaf{SK, FileOffset()};
1423 if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
1424 recordStandaloneSizeIncrease(FileSize);
1425 return Error::success();
1426 }
1427 }
1428
1429 // If there was a race, confirm that the new value has valid storage.
1430 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1431 return createCorruptObjectError(getDigest(I));
1432
1433 return Error::success();
1434}
1435
1438 auto I = getIndexProxyFromRef(getInternalRef(ID));
1439 if (LLVM_UNLIKELY(!I))
1440 return I.takeError();
1441
1442 // Early return in case the node exists.
1443 {
1444 TrieRecord::Data Existing = I->Ref.load();
1445 if (Existing.SK != TrieRecord::StorageKind::Unknown)
1446 return Error::success();
1447 }
1448
1449 // Big leaf nodes.
1450 if (Refs.empty() && Data.size() > TrieRecord::MaxEmbeddedSize)
1451 return createStandaloneLeaf(*I, Data);
1452
1453 // TODO: Check whether it's worth checking the index for an already existing
1454 // object (like storeTreeImpl() does) before building up the
1455 // InternalRefVector.
1456 InternalRefVector InternalRefs;
1457 for (ObjectID Ref : Refs)
1458 InternalRefs.push_back(getInternalRef(Ref));
1459
1460 // Create the object.
1461
1462 DataRecordHandle::Input Input{InternalRefs, Data};
1463
1464 // Compute the storage kind, allocate it, and create the record.
1465 TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
1466 FileOffset PoolOffset;
1467 SmallString<256> Path;
1468 std::optional<MappedTempFile> File;
1469 std::optional<uint64_t> FileSize;
1470 auto AllocStandaloneFile = [&](size_t Size) -> Expected<char *> {
1471 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
1472 TrieRecord::StorageKind::Standalone),
1473 *I, Path);
1474 if (Error E = createTempFile(Path, Size).moveInto(File))
1475 return std::move(E);
1476 assert(File->size() == Size);
1477 FileSize = Size;
1478 SK = TrieRecord::StorageKind::Standalone;
1479 return File->data();
1480 };
1481 auto Alloc = [&](size_t Size) -> Expected<char *> {
1482 if (Size <= TrieRecord::MaxEmbeddedSize) {
1483 SK = TrieRecord::StorageKind::DataPool;
1484 auto P = DataPool.allocate(Size);
1485 if (LLVM_UNLIKELY(!P)) {
1486 char *NewAlloc = nullptr;
1487 auto NewE = handleErrors(
1488 P.takeError(), [&](std::unique_ptr<StringError> E) -> Error {
1489 if (E->convertToErrorCode() == std::errc::not_enough_memory)
1490 return AllocStandaloneFile(Size).moveInto(NewAlloc);
1491 return Error(std::move(E));
1492 });
1493 if (!NewE)
1494 return NewAlloc;
1495 return std::move(NewE);
1496 }
1497 PoolOffset = P->getOffset();
1498 LLVM_DEBUG({
1499 dbgs() << "pool-alloc addr=" << (void *)PoolOffset.get()
1500 << " size=" << Size
1501 << " end=" << (void *)(PoolOffset.get() + Size) << "\n";
1502 });
1503 return (*P)->data();
1504 }
1505 return AllocStandaloneFile(Size);
1506 };
1507
1508 DataRecordHandle Record;
1509 if (Error E =
1510 DataRecordHandle::createWithError(Alloc, Input).moveInto(Record))
1511 return E;
1512 assert(Record.getData().end()[0] == 0 && "Expected null-termination");
1513 assert(Record.getData() == Input.Data && "Expected initialization");
1514 assert(SK != TrieRecord::StorageKind::Unknown);
1515 assert(bool(File) != bool(PoolOffset) &&
1516 "Expected either a mapped file or a pooled offset");
1517
1518 // Check for a race before calling MappedTempFile::keep().
1519 //
1520 // Then decide what to do with the file. Better to discard than overwrite if
1521 // another thread/process has already added this.
1522 TrieRecord::Data Existing = I->Ref.load();
1523 {
1524 TrieRecord::Data NewObject{SK, PoolOffset};
1525 if (File) {
1526 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1527 // Keep the file!
1528 if (Error E = File->keep(Path))
1529 return E;
1530 } else {
1531 File.reset();
1532 }
1533 }
1534
1535 // If we didn't already see a racing/existing write, then try storing the
1536 // new object. If that races, confirm that the new value has valid storage.
1537 //
1538 // TODO: Find a way to reuse the storage from the new-but-abandoned record
1539 // handle.
1540 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1541 if (I->Ref.compare_exchange_strong(Existing, NewObject)) {
1542 if (FileSize)
1543 recordStandaloneSizeIncrease(*FileSize);
1544 return Error::success();
1545 }
1546 }
1547 }
1548
1549 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1551
1552 // Load existing object.
1553 return Error::success();
1554}
1555
1556void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
1557 standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1558}
1559
1560std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize() const {
1561 MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
1562 assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
1563 assert(isAddrAligned(Align(8), UserHeader.data()));
1564 return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
1565}
1566
1567uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
1568 return standaloneStorageSize().load(std::memory_order_relaxed);
1569}
1570
1572 return Index.size() + DataPool.size() + getStandaloneStorageSize();
1573}
1574
1576 unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1577 unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1578 return std::max(IndexPercent, DataPercent);
1579}
1580
1583 unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
1584 FaultInPolicy Policy) {
1585 if (std::error_code EC = sys::fs::create_directories(AbsPath))
1586 return createFileError(AbsPath, EC);
1587
1588 constexpr uint64_t MB = 1024ull * 1024ull;
1589 constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
1590
1591 uint64_t MaxIndexSize = 12 * GB;
1592 uint64_t MaxDataPoolSize = 24 * GB;
1593
1594 if (useSmallMappingSize(AbsPath)) {
1595 MaxIndexSize = 1 * GB;
1596 MaxDataPoolSize = 2 * GB;
1597 }
1598
1599 auto CustomSize = getOverriddenMaxMappingSize();
1600 if (!CustomSize)
1601 return CustomSize.takeError();
1602 if (*CustomSize)
1603 MaxIndexSize = MaxDataPoolSize = **CustomSize;
1604
1605 SmallString<256> IndexPath(AbsPath);
1607 std::optional<OnDiskTrieRawHashMap> Index;
1609 IndexPath, IndexTableName + "[" + HashName + "]",
1610 HashByteSize * CHAR_BIT,
1611 /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
1612 /*MinFileSize=*/MB)
1613 .moveInto(Index))
1614 return std::move(E);
1615
1616 uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
1617
1618 SmallString<256> DataPoolPath(AbsPath);
1620 std::optional<OnDiskDataAllocator> DataPool;
1621 StringRef PolicyName =
1622 Policy == FaultInPolicy::SingleNode ? "single" : "full";
1624 DataPoolPath,
1625 DataPoolTableName + "[" + HashName + "]" + PolicyName,
1626 MaxDataPoolSize, /*MinFileSize=*/MB, UserHeaderSize,
1627 [](void *UserHeaderPtr) {
1628 new (UserHeaderPtr) std::atomic<uint64_t>(0);
1629 })
1630 .moveInto(DataPool))
1631 return std::move(E);
1632 if (DataPool->getUserHeader().size() != UserHeaderSize)
1634 "unexpected user header in '" + DataPoolPath +
1635 "'");
1636
1637 return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
1638 AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
1639}
1640
1641OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
1642 OnDiskDataAllocator DataPool,
1643 OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
1644 : Index(std::move(Index)), DataPool(std::move(DataPool)),
1645 RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
1646 /// Lifetime for "big" objects not in DataPool.
1647 ///
1648 /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
1649 /// simpler on the assumption there won't be much contention since most data
1650 /// is not big. If there is contention, and we've already fixed ObjectProxy
1651 /// object handles to be cheap enough to use consistently, the fix might be
1652 /// to use better use of them rather than optimizing this map.
1653 ///
1654 /// FIXME: Figure out the right number of shards, if any.
1655 StandaloneData = new StandaloneDataMapTy();
1656}
1657
1659 delete static_cast<StandaloneDataMapTy *>(StandaloneData);
1660}
1661
1662Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
1663 ObjectHandle UpstreamNode) {
1664 // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1665 // against the process dying during importing and leaving the database with an
1666 // incomplete tree. Note that if the upstream has missing nodes then the tree
1667 // will be copied with missing nodes as well, it won't be considered an error.
1668 struct UpstreamCursor {
1670 size_t RefsCount;
1673 };
1674 /// Keeps track of the state of visitation for current node and all of its
1675 /// parents.
1677 /// Keeps track of the currently visited nodes as they are imported into
1678 /// primary database, from current node and its parents. When a node is
1679 /// entered for visitation it appends its own ID, then appends referenced IDs
1680 /// as they get imported. When a node is fully imported it removes the
1681 /// referenced IDs from the bottom of the stack which leaves its own ID at the
1682 /// bottom, adding to the list of referenced IDs for the parent node.
1683 SmallVector<ObjectID, 128> PrimaryNodesStack;
1684
1685 auto enqueueNode = [&](ObjectID PrimaryID, std::optional<ObjectHandle> Node) {
1686 PrimaryNodesStack.push_back(PrimaryID);
1687 if (!Node)
1688 return;
1689 auto Refs = UpstreamDB->getObjectRefs(*Node);
1690 CursorStack.push_back(
1691 {*Node, (size_t)llvm::size(Refs), Refs.begin(), Refs.end()});
1692 };
1693
1694 enqueueNode(PrimaryID, UpstreamNode);
1695
1696 while (!CursorStack.empty()) {
1697 UpstreamCursor &Cur = CursorStack.back();
1698 if (Cur.RefI == Cur.RefE) {
1699 // Copy the node data into the primary store.
1700 // FIXME: Use hard-link or cloning if the file-system supports it and data
1701 // is stored into a separate file.
1702
1703 // The bottom of \p PrimaryNodesStack contains the primary ID for the
1704 // current node plus the list of imported referenced IDs.
1705 assert(PrimaryNodesStack.size() >= Cur.RefsCount + 1);
1706 ObjectID PrimaryID = *(PrimaryNodesStack.end() - Cur.RefsCount - 1);
1707 auto PrimaryRefs = ArrayRef(PrimaryNodesStack)
1708 .slice(PrimaryNodesStack.size() - Cur.RefsCount);
1709 auto Data = UpstreamDB->getObjectData(Cur.Node);
1710 if (Error E = store(PrimaryID, PrimaryRefs, Data))
1711 return E;
1712 // Remove the current node and its IDs from the stack.
1713 PrimaryNodesStack.truncate(PrimaryNodesStack.size() - Cur.RefsCount);
1714 CursorStack.pop_back();
1715 continue;
1716 }
1717
1718 ObjectID UpstreamID = *(Cur.RefI++);
1719 auto PrimaryID = getReference(UpstreamDB->getDigest(UpstreamID));
1720 if (LLVM_UNLIKELY(!PrimaryID))
1721 return PrimaryID.takeError();
1722 if (containsObject(*PrimaryID, /*CheckUpstream=*/false)) {
1723 // This \p ObjectID already exists in the primary. Either it was imported
1724 // via \p importFullTree or the client created it, in which case the
1725 // client takes responsibility for how it was formed.
1726 enqueueNode(*PrimaryID, std::nullopt);
1727 continue;
1728 }
1729 Expected<std::optional<ObjectHandle>> UpstreamNode =
1730 UpstreamDB->load(UpstreamID);
1731 if (!UpstreamNode)
1732 return UpstreamNode.takeError();
1733 enqueueNode(*PrimaryID, *UpstreamNode);
1734 }
1735
1736 assert(PrimaryNodesStack.size() == 1);
1737 assert(PrimaryNodesStack.front() == PrimaryID);
1738 return Error::success();
1739}
1740
1741Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
1742 ObjectHandle UpstreamNode) {
1743 // Copies only a single node, it doesn't copy the referenced nodes.
1744
1745 // Copy the node data into the primary store.
1746 // FIXME: Use hard-link or cloning if the file-system supports it and data is
1747 // stored into a separate file.
1748 auto Data = UpstreamDB->getObjectData(UpstreamNode);
1749 auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
1751 Refs.reserve(llvm::size(UpstreamRefs));
1752 for (ObjectID UpstreamRef : UpstreamRefs) {
1753 auto Ref = getReference(UpstreamDB->getDigest(UpstreamRef));
1754 if (LLVM_UNLIKELY(!Ref))
1755 return Ref.takeError();
1756 Refs.push_back(*Ref);
1757 }
1758
1759 return store(PrimaryID, Refs, Data);
1760}
1761
1762Expected<std::optional<ObjectHandle>>
1763OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
1764 if (!UpstreamDB)
1765 return std::nullopt;
1766
1767 auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
1768 if (LLVM_UNLIKELY(!UpstreamID))
1769 return UpstreamID.takeError();
1770
1771 Expected<std::optional<ObjectHandle>> UpstreamNode =
1772 UpstreamDB->load(*UpstreamID);
1773 if (!UpstreamNode)
1774 return UpstreamNode.takeError();
1775 if (!*UpstreamNode)
1776 return std::nullopt;
1777
1778 if (Error E = FIPolicy == FaultInPolicy::SingleNode
1779 ? importSingleNode(PrimaryID, **UpstreamNode)
1780 : importFullTree(PrimaryID, **UpstreamNode))
1781 return std::move(E);
1782 return load(PrimaryID);
1783}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
This file defines the DenseMap class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file declares interface for OnDiskDataAllocator, a file backed data pool can be used to allocate...
static constexpr StringLiteral FilePrefixLeaf0
static constexpr StringLiteral DataPoolTableName
static constexpr StringLiteral FilePrefixObject
static constexpr StringLiteral FilePrefixLeaf
static constexpr StringLiteral IndexFilePrefix
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH)
static constexpr StringLiteral DataPoolFilePrefix
static Error createCorruptObjectError(Expected< ArrayRef< uint8_t > > ID)
static size_t getPageSize()
static Expected< MappedTempFile > createTempFile(StringRef FinalPath, uint64_t Size)
static constexpr StringLiteral IndexTableName
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
#define P(N)
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static Split data
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
The Input class is used to parse a yaml document into in-memory structs and vectors.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition ArrayRef.h:201
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
const T * data() const
Definition ArrayRef.h:139
Lightweight error class with error context and mandatory checking.
Definition Error.h:160
static ErrorSuccess success()
Create a success value.
Definition Error.h:337
Tagged union holding either a T or a Error.
Definition Error.h:486
Error takeError()
Take ownership of the stored error.
Definition Error.h:613
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Definition FileOffset.h:24
uint64_t get() const
Definition FileOffset.h:26
Handle to a loaded object in a ObjectStore instance.
LLVM_ABI_FOR_TEST Expected< ArrayRef< char > > get(FileOffset Offset, size_t Size) const
Get the data of Size stored at the given Offset.
static LLVM_ABI_FOR_TEST Expected< OnDiskDataAllocator > create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, uint32_t UserHeaderSize=0, function_ref< void(void *)> UserHeaderInit=nullptr)
LLVM_ABI_FOR_TEST size_t size() const
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
static LLVM_ABI_FOR_TEST Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
static std::optional< InternalRef4B > tryToShrink(InternalRef Ref)
Shrink to 4B reference.
Array of internal node references.
Standard 8 byte reference inside OnDiskGraphDB.
static InternalRef getFromOffset(FileOffset Offset)
Handle for a loaded node object.
static ObjectHandle fromFileOffset(FileOffset Offset)
static ObjectHandle fromMemory(uintptr_t Ptr)
Reference to a node.
uint64_t getOpaqueData() const
On-disk CAS nodes database, independent of a particular hashing algorithm.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ SingleNode
Copy only the requested node.
void print(raw_ostream &OS) const
LLVM_ABI_FOR_TEST Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
object_refs_range getObjectRefs(ObjectHandle Node) const
unsigned getHardStorageLimitUtilization() const
LLVM_ABI_FOR_TEST Error validateObjectID(ObjectID ID)
Checks that ID exists in the index.
LLVM_ABI_FOR_TEST Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
static LLVM_ABI_FOR_TEST Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
LLVM_ABI_FOR_TEST size_t getStorageSize() const
bool containsObject(ObjectID Ref, bool CheckUpstream=true) const
Check whether the object associated with Ref is stored in the CAS.
LLVM_ABI_FOR_TEST Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
LLVM_ABI_FOR_TEST ArrayRef< char > getObjectData(ObjectHandle Node) const
LLVM_ABI_FOR_TEST std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest, bool CheckUpstream=true)
Get an existing reference to the object Digest.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static unsigned getPageSizeEstimate()
Get the process's estimated page size.
Definition Process.h:62
LLVM_ABI Error keep(const Twine &Name)
static LLVM_ABI Expected< TempFile > create(const Twine &Model, unsigned Mode=all_read|all_write, OpenFlags ExtraFlags=OF_None)
This creates a temporary file with createUniqueFile and schedules it for deletion with sys::RemoveFil...
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
LLVM_ABI size_t size() const
Definition Path.cpp:1182
@ readonly
May only access map via const_data as read only.
@ readwrite
May access map via data and modify it. Written to path.
LLVM_ABI char * data() const
Definition Path.cpp:1187
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr StringLiteral CASFormatVersion
The version for all the ondisk database files.
Expected< std::optional< uint64_t > > getOverriddenMaxMappingSize()
Retrieves an overridden maximum mapping size for CAS files, if any, speicified by LLVM_CAS_MAX_MAPPIN...
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
bool useSmallMappingSize(const Twine &Path)
Whether to use a small file mapping for ondisk databases created in Path.
uint64_t getDataSize(const FuncRecordTy *Record)
Return the coverage map data size for the function.
uint64_t read64le(const void *P)
Definition Endian.h:435
void write64le(void *P, uint64_t V)
Definition Endian.h:478
void write32le(void *P, uint32_t V)
Definition Endian.h:475
uint32_t read32le(const void *P)
Definition Endian.h:432
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
std::error_code resize_file_before_mapping_readwrite(int FD, uint64_t Size)
Resize FD to Size before mapping mapped_file_region::readwrite.
Definition FileSystem.h:410
LLVM_ABI bool exists(const basic_file_status &status)
Does file exist?
Definition Path.cpp:1090
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
Definition Path.cpp:874
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
Definition Path.cpp:976
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
ScopedSetting scopedDisable()
Definition IOSandbox.h:36
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1415
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
std::error_code make_error_code(BitcodeError E)
@ Done
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
Error handleErrors(Error E, HandlerTs &&... Hs)
Pass the ErrorInfo(s) contained in E to their respective handlers.
Definition Error.h:968
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1306
@ argument_out_of_domain
Definition Errc.h:37
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
std::optional< T > expectedToOptional(Expected< T > &&E)
Convert an Expected to an Optional without doing anything.
Definition Error.h:1095
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:770
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1883
void toHex(ArrayRef< uint8_t > Input, bool LowerCase, SmallVectorImpl< char > &Output)
Convert buffer Input to its hexadecimal representation. The returned string is double the size of Inp...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1915
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:107
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1084
bool isAddrAligned(Align Lhs, const void *Addr)
Checks that Addr is a multiple of the alignment.
Definition Alignment.h:139
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
Proxy for an on-disk index record.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:94
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.