LLVM 23.0.0git
MappedFileRegionArena.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file Implements MappedFileRegionArena.
9///
10/// A bump pointer allocator, backed by a memory-mapped file.
11///
12/// The effect we want is:
13///
14/// Step 1. If it doesn't exist, create the file with an initial size.
15/// Step 2. Reserve virtual memory large enough for the max file size.
16/// Step 3. Map the file into memory in the reserved region.
17/// Step 4. Increase the file size and update the mapping when necessary.
18///
19/// However, updating the mapping is challenging when it needs to work portably,
20/// and across multiple processes without locking for every read. Our current
21/// implementation handles the steps above in following ways:
22///
23/// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max
24/// size (typically several GB). If the file system doesn't support
25/// sparse file, this may return a fully allocated file.
26/// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file.
27/// Step 3. [Automatic as part of step 2.]
28/// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file
29/// system storage for the sparse file so we won't end up with partial
30/// file if the disk is out of space.
31///
32/// Additionally, we attempt to resize the file to its actual data size when
33/// closing the mapping, if this is the only concurrent instance. This is done
34/// using file locks. Shrinking the file mitigates problems with having large
35/// files: on filesystems without sparse files it avoids unnecessary space use;
36/// it also avoids allocating the full size if another process copies the file,
37/// which typically loses sparseness. These mitigations only work while the file
38/// is not in use.
39///
40/// The capacity and the header offset is determined by the first user of the
41/// MappedFileRegionArena instance and any future mismatched value from the
42/// original will result in error on creation.
43///
44/// To support resizing, we use two separate file locks:
45/// 1. We use a shared reader lock on a ".shared" file until destruction.
46/// 2. We use a lock on the main file during initialization - shared to check
47/// the status, upgraded to exclusive to resize/initialize the file.
48///
49/// Then during destruction we attempt to get exclusive access on (1), which
50/// requires no concurrent readers. If so, we shrink the file. Using two
51/// separate locks simplifies the implementation and enables it to work on
52/// platforms (e.g. Windows) where a shared/reader lock prevents writing.
53//===----------------------------------------------------------------------===//
54
56#include "OnDiskCommon.h"
59
60#if LLVM_ON_UNIX
61#include <sys/stat.h>
62#if __has_include(<sys/param.h>)
63#include <sys/param.h>
64#endif
65#ifdef DEV_BSIZE
66#define MAPPED_FILE_BSIZE DEV_BSIZE
67#elif __linux__
68#define MAPPED_FILE_BSIZE 512
69#endif
70#endif
71
72using namespace llvm;
73using namespace llvm::cas;
74using namespace llvm::cas::ondisk;
75
76namespace {
77struct FileWithLock {
78 std::string Path;
79 int FD = -1;
80 std::optional<sys::fs::LockKind> Locked;
81
82private:
83 FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) {
84 ErrorAsOutParameter EOP(&E);
85 if (std::error_code EC = sys::fs::openFileForReadWrite(
87 E = createFileError(Path, EC);
88 }
89
90public:
91 FileWithLock(FileWithLock &) = delete;
92 FileWithLock(FileWithLock &&Other) {
93 Path = std::move(Other.Path);
94 FD = Other.FD;
95 Other.FD = -1;
96 Locked = Other.Locked;
97 Other.Locked = std::nullopt;
98 }
99
100 ~FileWithLock() { consumeError(unlock()); }
101
102 static Expected<FileWithLock> open(StringRef Path) {
104 FileWithLock Result(Path.str(), E);
105 if (E)
106 return std::move(E);
107 return std::move(Result);
108 }
109
110 Error lock(sys::fs::LockKind LK) {
111 assert(!Locked && "already locked");
112 if (std::error_code EC = lockFileThreadSafe(FD, LK))
113 return createFileError(Path, EC);
114 Locked = LK;
115 return Error::success();
116 }
117
118 Error switchLock(sys::fs::LockKind LK) {
119 assert(Locked && "not locked");
120 if (auto E = unlock())
121 return E;
122
123 return lock(LK);
124 }
125
126 Error unlock() {
127 if (Locked) {
128 Locked = std::nullopt;
129 if (std::error_code EC = unlockFileThreadSafe(FD))
130 return createFileError(Path, EC);
131 }
132 return Error::success();
133 }
134
135 // Return true if succeed to lock the file exclusively.
136 bool tryLockExclusive() {
137 assert(!Locked && "can only try to lock if not locked");
138 if (tryLockFileThreadSafe(FD) == std::error_code()) {
139 Locked = sys::fs::LockKind::Exclusive;
140 return true;
141 }
142
143 return false;
144 }
145
146 // Release the lock so it will not be unlocked on destruction.
147 void release() {
148 Locked = std::nullopt;
149 FD = -1;
150 }
151};
152
153struct FileSizeInfo {
154 uint64_t Size;
155 uint64_t AllocatedSize;
156
157 static ErrorOr<FileSizeInfo> get(sys::fs::file_t File);
158};
159} // end anonymous namespace
160
162 const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
163 std::shared_ptr<ondisk::OnDiskCASLogger> Logger,
164 function_ref<Error(MappedFileRegionArena &)> NewFileConstructor) {
165 uint64_t MinCapacity = HeaderOffset + sizeof(Header);
166 if (Capacity < MinCapacity)
167 return createStringError(
168 std::make_error_code(std::errc::invalid_argument),
169 "capacity is too small to hold MappedFileRegionArena");
170
172 Result.Path = Path.str();
173 Result.Logger = std::move(Logger);
174
175 // Open the support file. See file comment for details of locking scheme.
176 SmallString<128> SharedFilePath(Result.Path);
177 SharedFilePath.append(".shared");
178
179 auto SharedFileLock = FileWithLock::open(SharedFilePath);
180 if (!SharedFileLock)
181 return SharedFileLock.takeError();
182 Result.SharedLockFD = SharedFileLock->FD;
183
184 // Take shared/reader lock that will be held until destroyImpl if construction
185 // is successful.
186 if (auto E = SharedFileLock->lock(sys::fs::LockKind::Shared))
187 return std::move(E);
188
189 // Take shared/reader lock for initialization.
190 auto MainFile = FileWithLock::open(Result.Path);
191 if (!MainFile)
192 return MainFile.takeError();
193 if (Error E = MainFile->lock(sys::fs::LockKind::Shared))
194 return std::move(E);
195 Result.FD = MainFile->FD;
196
198 auto FileSize = FileSizeInfo::get(File);
199 if (!FileSize)
200 return createFileError(Result.Path, FileSize.getError());
201
202 // If the size is smaller than the capacity, we need to initialize the file.
203 // It maybe empty, or may have been shrunk during a previous close.
204 if (FileSize->Size < Capacity) {
205 // Lock the file exclusively so only one process will do the initialization.
206 if (Error E = MainFile->switchLock(sys::fs::LockKind::Exclusive))
207 return std::move(E);
208 // Retrieve the current size now that we have exclusive access.
209 FileSize = FileSizeInfo::get(File);
210 if (!FileSize)
211 return createFileError(Result.Path, FileSize.getError());
212 }
213
214 if (FileSize->Size >= MinCapacity) {
215 // File is initialized. Read out the header to check for capacity and
216 // offset.
217 SmallVector<char, sizeof(Header)> HeaderContent(sizeof(Header));
218 auto Size = sys::fs::readNativeFileSlice(File, HeaderContent, HeaderOffset);
219 if (!Size)
220 return Size.takeError();
221
222 Header H;
223 memcpy(&H, HeaderContent.data(), sizeof(H));
224 if (H.HeaderOffset != HeaderOffset)
225 return createStringError(
226 std::make_error_code(std::errc::invalid_argument),
227 "specified header offset (" + utostr(HeaderOffset) +
228 ") does not match existing config (" + utostr(H.HeaderOffset) +
229 ")");
230
231 // If the capacity doesn't match, use the existing capacity instead.
232 if (H.Capacity != Capacity)
233 Capacity = H.Capacity;
234 }
235
236 // If the size is smaller than capacity, we need to resize the file.
237 if (FileSize->Size < Capacity) {
238 assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
239 if (std::error_code EC =
240 sys::fs::resize_file_sparse(MainFile->FD, Capacity))
241 return createFileError(Result.Path, EC);
242 if (Result.Logger)
243 Result.Logger->logMappedFileRegionArenaResizeFile(
244 Result.Path, FileSize->Size, Capacity);
245 }
246
247 // Create the mapped region.
248 {
249 std::error_code EC;
251 File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC);
252 if (EC)
253 return createFileError(Result.Path, EC);
254 Result.Region = std::move(Map);
255 }
256
257 // Initialize the header.
258 Result.initializeHeader(HeaderOffset);
259
260 if (FileSize->Size < MinCapacity) {
261 assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
262 // If we need to fully initialize the file, call NewFileConstructor.
263 if (Error E = NewFileConstructor(Result))
264 return std::move(E);
265
266 Result.H->HeaderOffset.exchange(HeaderOffset);
267 Result.H->Capacity.exchange(Capacity);
268 }
269
270 if (MainFile->Locked == sys::fs::LockKind::Exclusive) {
271 // If holding an exclusive lock, we might have resized the file and
272 // performed some read/write to the file. Query the file size again to make
273 // sure everything is up-to-date. Otherwise, FileSize info is already
274 // up-to-date.
275 FileSize = FileSizeInfo::get(File);
276 if (!FileSize)
277 return createFileError(Result.Path, FileSize.getError());
278 Result.H->AllocatedSize.exchange(FileSize->AllocatedSize);
279 }
280
281 // Release the shared lock so it can be closed in destoryImpl().
282 SharedFileLock->release();
283 return std::move(Result);
284}
285
286void MappedFileRegionArena::destroyImpl() {
287 if (!FD)
288 return;
289
290 // Drop the shared lock indicating we are no longer accessing the file.
291 if (SharedLockFD)
292 (void)unlockFileThreadSafe(*SharedLockFD);
293
294 // Attempt to truncate the file if we can get exclusive access. Ignore any
295 // errors.
296 if (H) {
297 assert(SharedLockFD && "Must have shared lock file open");
298 if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
299 size_t Size = size();
300 size_t Capacity = capacity();
301 // sync to file system to make sure all contents are up-to-date.
302 (void)Region.sync();
303 // unmap the file before resizing since that is the requirement for
304 // some platforms.
305 Region.unmap();
306 (void)sys::fs::resize_file(*FD, Size);
307 (void)unlockFileThreadSafe(*SharedLockFD);
308 if (Logger)
309 Logger->logMappedFileRegionArenaResizeFile(Path, Capacity, Size);
310 }
311 }
312
313 auto Close = [](std::optional<int> &FD) {
314 if (FD) {
316 sys::fs::closeFile(File);
317 FD = std::nullopt;
318 }
319 };
320
321 // Close the file and shared lock.
322 Close(FD);
323 Close(SharedLockFD);
324
325 if (Logger)
326 Logger->logMappedFileRegionArenaClose(Path);
327}
328
329void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
330 assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
331 uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
332 assert(HeaderEndOffset <= capacity() &&
333 "Expected end offset to be pre-allocated");
334 assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) &&
335 "Expected end offset to be aligned");
336 H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
337
338 uint64_t ExistingValue = 0;
339 if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset))
340 assert(ExistingValue >= HeaderEndOffset &&
341 "Expected 0, or past the end of the header itself");
342 if (Logger)
343 Logger->logMappedFileRegionArenaCreate(Path, *FD, data(), capacity(),
344 size());
345}
346
348 return createStringError(std::make_error_code(std::errc::not_enough_memory),
349 "memory mapped file allocator is out of space");
350}
351
353 AllocSize = alignTo(AllocSize, getAlign());
354 uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize);
355 uint64_t NewEnd = OldEnd + AllocSize;
356 if (LLVM_UNLIKELY(NewEnd > capacity())) {
357 // Return the allocation. If the start already passed the end, that means
358 // some other concurrent allocations already consumed all the capacity.
359 // There is no need to return the original value. If the start was not
360 // passed the end, current allocation certainly bumped it passed the end.
361 // All other allocation afterwards must have failed and current allocation
362 // is in charge of return the allocation back to a valid value.
363 if (OldEnd <= capacity())
364 (void)H->BumpPtr.exchange(OldEnd);
365
366 if (Logger)
367 Logger->logMappedFileRegionArenaOom(Path, capacity(), OldEnd, AllocSize);
368
370 }
371
372 uint64_t DiskSize = H->AllocatedSize;
373 if (LLVM_UNLIKELY(NewEnd > DiskSize)) {
374 uint64_t NewSize;
375 // The minimum increment is a page, but allocate more to amortize the cost.
376 constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB
377 if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment)
378 .moveInto(NewSize))
379 return std::move(E);
380 assert(NewSize >= DiskSize + Increment);
381 // FIXME: on Darwin this can under-count the size if there is a race to
382 // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes
383 // to the end of the file, not to allocate up to a fixed size.
384 // Any discrepancy will be resolved the next time the file is truncated and
385 // then reopend.
386 while (DiskSize < NewSize)
387 H->AllocatedSize.compare_exchange_strong(DiskSize, NewSize);
388 }
389
390 if (Logger)
391 Logger->logMappedFileRegionArenaAllocate(data(), OldEnd, AllocSize);
392
393 return OldEnd;
394}
395
396ErrorOr<FileSizeInfo> FileSizeInfo::get(sys::fs::file_t File) {
397#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE)
398 struct stat Status;
399 int StatRet = ::fstat(File, &Status);
400 if (StatRet)
401 return errnoAsErrorCode();
402 uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE;
403 return FileSizeInfo{uint64_t(Status.st_size), AllocatedSize};
404#else
405 // Fallback: assume the file is fully allocated. Note: this may result in
406 // data loss on out-of-space.
408 if (std::error_code EC = sys::fs::status(File, Status))
409 return EC;
410 return FileSizeInfo{Status.getSize(), Status.getSize()};
411#endif
412}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
#define H(x, y, z)
Definition MD5.cpp:56
static Error createAllocatorOutOfSpaceError()
This file declares interface for MappedFileRegionArena, a bump pointer allocator, backed by a memory-...
This file declares interface for OnDiskCASLogger, an interface that can be used to log CAS events to ...
This file contains some functions that are useful when dealing with strings.
Represents either an error or a value T.
Definition ErrorOr.h:56
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Logging utility - given an ordered specification of features, and assuming a scalar reward,...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static LLVM_ABI_FOR_TEST Expected< MappedFileRegionArena > create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, std::shared_ptr< ondisk::OnDiskCASLogger > Logger, function_ref< Error(MappedFileRegionArena &)> NewFileConstructor)
Create a MappedFileRegionArena.
LLVM_ABI_FOR_TEST Expected< int64_t > allocateOffset(uint64_t AllocSize)
Allocate, returning the offset from data() instead of a pointer.
static constexpr Align getAlign()
Minimum alignment for allocations, currently hardcoded to 8B.
An efficient, type-erasing, non-owning reference to a callable.
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
@ readwrite
May access map via data and modify it. Written to path.
#define INT64_MAX
Definition DataTypes.h:71
std::error_code lockFileThreadSafe(int FD, llvm::sys::fs::LockKind Kind)
Thread-safe alternative to sys::fs::lockFile.
std::error_code unlockFileThreadSafe(int FD)
Thread-safe alternative to sys::fs::unlockFile.
std::error_code tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout=std::chrono::milliseconds(0), llvm::sys::fs::LockKind Kind=llvm::sys::fs::LockKind::Exclusive)
Thread-safe alternative to sys::fs::tryLockFile.
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
std::error_code openFileForReadWrite(const Twine &Name, int &ResultFD, CreationDisposition Disp, OpenFlags Flags, unsigned Mode=0666)
Opens the file with the given name in a write-only or read-write mode, returning its open file descri...
@ CD_OpenAlways
CD_OpenAlways - When opening a file:
Definition FileSystem.h:742
LLVM_ABI Expected< size_t > readNativeFileSlice(file_t FileHandle, MutableArrayRef< char > Buf, uint64_t Offset)
Reads Buf.size() bytes from FileHandle at offset Offset into Buf.
LLVM_ABI std::error_code resize_file_sparse(int FD, uint64_t Size)
Resize path to size with sparse files explicitly enabled.
LockKind
An enumeration for the lock kind.
LLVM_ABI std::error_code resize_file(int FD, uint64_t Size)
Resize path to size.
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
Definition FileSystem.h:991
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
std::string utostr(uint64_t X, bool isNeg=false)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1915
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::error_code errnoAsErrorCode()
Helper to get errno as an std::error_code.
Definition Error.h:1240
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:94
Header for MappedFileRegionArena.