LLVM 22.0.0git
MappedFileRegionArena.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file Implements MappedFileRegionArena.
9///
10/// A bump pointer allocator, backed by a memory-mapped file.
11///
12/// The effect we want is:
13///
14/// Step 1. If it doesn't exist, create the file with an initial size.
15/// Step 2. Reserve virtual memory large enough for the max file size.
16/// Step 3. Map the file into memory in the reserved region.
17/// Step 4. Increase the file size and update the mapping when necessary.
18///
19/// However, updating the mapping is challenging when it needs to work portably,
20/// and across multiple processes without locking for every read. Our current
21/// implementation handles the steps above in following ways:
22///
23/// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max
24/// size (typically several GB). If the file system doesn't support
25/// sparse file, this may return a fully allocated file.
26/// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file.
27/// Step 3. [Automatic as part of step 2.]
28/// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file
29/// system storage for the sparse file so we won't end up with partial
30/// file if the disk is out of space.
31///
32/// Additionally, we attempt to resize the file to its actual data size when
33/// closing the mapping, if this is the only concurrent instance. This is done
34/// using file locks. Shrinking the file mitigates problems with having large
35/// files: on filesystems without sparse files it avoids unnecessary space use;
36/// it also avoids allocating the full size if another process copies the file,
37/// which typically loses sparseness. These mitigations only work while the file
38/// is not in use.
39///
40/// The capacity and the header offset is determined by the first user of the
41/// MappedFileRegionArena instance and any future mismatched value from the
42/// original will result in error on creation.
43///
44/// To support resizing, we use two separate file locks:
45/// 1. We use a shared reader lock on a ".shared" file until destruction.
46/// 2. We use a lock on the main file during initialization - shared to check
47/// the status, upgraded to exclusive to resize/initialize the file.
48///
49/// Then during destruction we attempt to get exclusive access on (1), which
50/// requires no concurrent readers. If so, we shrink the file. Using two
51/// separate locks simplifies the implementation and enables it to work on
52/// platforms (e.g. Windows) where a shared/reader lock prevents writing.
53//===----------------------------------------------------------------------===//
54
56#include "OnDiskCommon.h"
58
59#if LLVM_ON_UNIX
60#include <sys/stat.h>
61#if __has_include(<sys/param.h>)
62#include <sys/param.h>
63#endif
64#ifdef DEV_BSIZE
65#define MAPPED_FILE_BSIZE DEV_BSIZE
66#elif __linux__
67#define MAPPED_FILE_BSIZE 512
68#endif
69#endif
70
71using namespace llvm;
72using namespace llvm::cas;
73using namespace llvm::cas::ondisk;
74
75namespace {
76struct FileWithLock {
77 std::string Path;
78 int FD = -1;
79 std::optional<sys::fs::LockKind> Locked;
80
81private:
82 FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) {
83 ErrorAsOutParameter EOP(&E);
84 if (std::error_code EC = sys::fs::openFileForReadWrite(
86 E = createFileError(Path, EC);
87 }
88
89public:
90 FileWithLock(FileWithLock &) = delete;
91 FileWithLock(FileWithLock &&Other) {
92 Path = std::move(Other.Path);
93 FD = Other.FD;
94 Other.FD = -1;
95 Locked = Other.Locked;
96 Other.Locked = std::nullopt;
97 }
98
99 ~FileWithLock() { consumeError(unlock()); }
100
101 static Expected<FileWithLock> open(StringRef Path) {
103 FileWithLock Result(Path.str(), E);
104 if (E)
105 return std::move(E);
106 return std::move(Result);
107 }
108
109 Error lock(sys::fs::LockKind LK) {
110 assert(!Locked && "already locked");
111 if (std::error_code EC = lockFileThreadSafe(FD, LK))
112 return createFileError(Path, EC);
113 Locked = LK;
114 return Error::success();
115 }
116
117 Error switchLock(sys::fs::LockKind LK) {
118 assert(Locked && "not locked");
119 if (auto E = unlock())
120 return E;
121
122 return lock(LK);
123 }
124
125 Error unlock() {
126 if (Locked) {
127 Locked = std::nullopt;
128 if (std::error_code EC = unlockFileThreadSafe(FD))
129 return createFileError(Path, EC);
130 }
131 return Error::success();
132 }
133
134 // Return true if succeed to lock the file exclusively.
135 bool tryLockExclusive() {
136 assert(!Locked && "can only try to lock if not locked");
137 if (tryLockFileThreadSafe(FD) == std::error_code()) {
138 Locked = sys::fs::LockKind::Exclusive;
139 return true;
140 }
141
142 return false;
143 }
144
145 // Release the lock so it will not be unlocked on destruction.
146 void release() {
147 Locked = std::nullopt;
148 FD = -1;
149 }
150};
151
152struct FileSizeInfo {
153 uint64_t Size;
154 uint64_t AllocatedSize;
155
156 static ErrorOr<FileSizeInfo> get(sys::fs::file_t File);
157};
158} // end anonymous namespace
159
161 const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
162 function_ref<Error(MappedFileRegionArena &)> NewFileConstructor) {
163 uint64_t MinCapacity = HeaderOffset + sizeof(Header);
164 if (Capacity < MinCapacity)
165 return createStringError(
166 std::make_error_code(std::errc::invalid_argument),
167 "capacity is too small to hold MappedFileRegionArena");
168
170 Result.Path = Path.str();
171
172 // Open the shared lock file. See file comment for details of locking scheme.
173 SmallString<128> SharedFilePath(Result.Path);
174 SharedFilePath.append(".shared");
175
176 auto SharedFileLock = FileWithLock::open(SharedFilePath);
177 if (!SharedFileLock)
178 return SharedFileLock.takeError();
179 Result.SharedLockFD = SharedFileLock->FD;
180
181 // Take shared/reader lock that will be held until destroyImpl if construction
182 // is successful.
183 if (auto E = SharedFileLock->lock(sys::fs::LockKind::Shared))
184 return std::move(E);
185
186 // Take shared/reader lock for initialization.
187 auto MainFile = FileWithLock::open(Result.Path);
188 if (!MainFile)
189 return MainFile.takeError();
190 if (Error E = MainFile->lock(sys::fs::LockKind::Shared))
191 return std::move(E);
192 Result.FD = MainFile->FD;
193
195 auto FileSize = FileSizeInfo::get(File);
196 if (!FileSize)
197 return createFileError(Result.Path, FileSize.getError());
198
199 // If the size is smaller than the capacity, we need to initialize the file.
200 // It maybe empty, or may have been shrunk during a previous close.
201 if (FileSize->Size < Capacity) {
202 // Lock the file exclusively so only one process will do the initialization.
203 if (Error E = MainFile->switchLock(sys::fs::LockKind::Exclusive))
204 return std::move(E);
205 // Retrieve the current size now that we have exclusive access.
206 FileSize = FileSizeInfo::get(File);
207 if (!FileSize)
208 return createFileError(Result.Path, FileSize.getError());
209 }
210
211 if (FileSize->Size >= MinCapacity) {
212 // File is initialized. Read out the header to check for capacity and
213 // offset.
214 SmallVector<char, sizeof(Header)> HeaderContent(sizeof(Header));
215 auto Size = sys::fs::readNativeFileSlice(File, HeaderContent, HeaderOffset);
216 if (!Size)
217 return Size.takeError();
218
219 Header *H = reinterpret_cast<Header *>(HeaderContent.data());
220 if (H->HeaderOffset != HeaderOffset)
221 return createStringError(
222 std::make_error_code(std::errc::invalid_argument),
223 "specified header offset (" + utostr(HeaderOffset) +
224 ") does not match existing config (" + utostr(H->HeaderOffset) +
225 ")");
226
227 // If the capacity doesn't match, use the existing capacity instead.
228 if (H->Capacity != Capacity)
229 Capacity = H->Capacity;
230 }
231
232 // If the size is smaller than capacity, we need to resize the file.
233 if (FileSize->Size < Capacity) {
234 assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
235 if (std::error_code EC =
236 sys::fs::resize_file_sparse(MainFile->FD, Capacity))
237 return createFileError(Result.Path, EC);
238 }
239
240 // Create the mapped region.
241 {
242 std::error_code EC;
244 File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC);
245 if (EC)
246 return createFileError(Result.Path, EC);
247 Result.Region = std::move(Map);
248 }
249
250 // Initialize the header.
251 Result.initializeHeader(HeaderOffset);
252 if (FileSize->Size < MinCapacity) {
253 assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
254 // If we need to fully initialize the file, call NewFileConstructor.
255 if (Error E = NewFileConstructor(Result))
256 return std::move(E);
257
258 Result.H->HeaderOffset.exchange(HeaderOffset);
259 Result.H->Capacity.exchange(Capacity);
260 }
261
262 if (MainFile->Locked == sys::fs::LockKind::Exclusive) {
263 // If holding an exclusive lock, we might have resized the file and
264 // performed some read/write to the file. Query the file size again to make
265 // sure everything is up-to-date. Otherwise, FileSize info is already
266 // up-to-date.
267 FileSize = FileSizeInfo::get(File);
268 if (!FileSize)
269 return createFileError(Result.Path, FileSize.getError());
270 Result.H->AllocatedSize.exchange(FileSize->AllocatedSize);
271 }
272
273 // Release the shared lock so it can be closed in destoryImpl().
274 SharedFileLock->release();
275 return std::move(Result);
276}
277
278void MappedFileRegionArena::destroyImpl() {
279 if (!FD)
280 return;
281
282 // Drop the shared lock indicating we are no longer accessing the file.
283 if (SharedLockFD)
284 (void)unlockFileThreadSafe(*SharedLockFD);
285
286 // Attempt to truncate the file if we can get exclusive access. Ignore any
287 // errors.
288 if (H) {
289 assert(SharedLockFD && "Must have shared lock file open");
290 if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
291 size_t Size = size();
292 // sync to file system to make sure all contents are up-to-date.
293 (void)Region.sync();
294 // unmap the file before resizing since that is the requirement for
295 // some platforms.
296 Region.unmap();
297 (void)sys::fs::resize_file(*FD, Size);
298 (void)unlockFileThreadSafe(*SharedLockFD);
299 }
300 }
301
302 auto Close = [](std::optional<int> &FD) {
303 if (FD) {
305 sys::fs::closeFile(File);
306 FD = std::nullopt;
307 }
308 };
309
310 // Close the file and shared lock.
311 Close(FD);
312 Close(SharedLockFD);
313}
314
315void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
316 assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
317 uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
318 assert(HeaderEndOffset <= capacity() &&
319 "Expected end offset to be pre-allocated");
320 assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) &&
321 "Expected end offset to be aligned");
322 H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
323
324 uint64_t ExistingValue = 0;
325 if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset))
326 assert(ExistingValue >= HeaderEndOffset &&
327 "Expected 0, or past the end of the header itself");
328}
329
331 return createStringError(std::make_error_code(std::errc::not_enough_memory),
332 "memory mapped file allocator is out of space");
333}
334
336 AllocSize = alignTo(AllocSize, getAlign());
337 uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize);
338 uint64_t NewEnd = OldEnd + AllocSize;
339 if (LLVM_UNLIKELY(NewEnd > capacity())) {
340 // Return the allocation. If the start already passed the end, that means
341 // some other concurrent allocations already consumed all the capacity.
342 // There is no need to return the original value. If the start was not
343 // passed the end, current allocation certainly bumped it passed the end.
344 // All other allocation afterwards must have failed and current allocation
345 // is in charge of return the allocation back to a valid value.
346 if (OldEnd <= capacity())
347 (void)H->BumpPtr.exchange(OldEnd);
348
350 }
351
352 uint64_t DiskSize = H->AllocatedSize;
353 if (LLVM_UNLIKELY(NewEnd > DiskSize)) {
354 uint64_t NewSize;
355 // The minimum increment is a page, but allocate more to amortize the cost.
356 constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB
357 if (Error E = preallocateFileTail(*FD, DiskSize, DiskSize + Increment)
358 .moveInto(NewSize))
359 return std::move(E);
360 assert(NewSize >= DiskSize + Increment);
361 // FIXME: on Darwin this can under-count the size if there is a race to
362 // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes
363 // to the end of the file, not to allocate up to a fixed size.
364 // Any discrepancy will be resolved the next time the file is truncated and
365 // then reopend.
366 while (DiskSize < NewSize)
367 H->AllocatedSize.compare_exchange_strong(DiskSize, NewSize);
368 }
369 return OldEnd;
370}
371
372ErrorOr<FileSizeInfo> FileSizeInfo::get(sys::fs::file_t File) {
373#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE)
374 struct stat Status;
375 int StatRet = ::fstat(File, &Status);
376 if (StatRet)
377 return errnoAsErrorCode();
378 uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE;
379 return FileSizeInfo{uint64_t(Status.st_size), AllocatedSize};
380#else
381 // Fallback: assume the file is fully allocated. Note: this may result in
382 // data loss on out-of-space.
384 if (std::error_code EC = sys::fs::status(File, Status))
385 return EC;
386 return FileSizeInfo{Status.getSize(), Status.getSize()};
387#endif
388}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
Definition Compiler.h:336
#define H(x, y, z)
Definition MD5.cpp:57
static Error createAllocatorOutOfSpaceError()
This file declares interface for MappedFileRegionArena, a bump pointer allocator, backed by a memory-...
This file contains some functions that are useful when dealing with strings.
Represents either an error or a value T.
Definition ErrorOr.h:56
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
Expected< int64_t > allocateOffset(uint64_t AllocSize)
Allocate, returning the offset from data() instead of a pointer.
static Expected< MappedFileRegionArena > create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset, function_ref< Error(MappedFileRegionArena &)> NewFileConstructor)
Create a MappedFileRegionArena.
static constexpr Align getAlign()
Minimum alignment for allocations, currently hardcoded to 8B.
An efficient, type-erasing, non-owning reference to a callable.
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
This class represents a memory mapped file.
@ readwrite
May access map via data and modify it. Written to path.
#define INT64_MAX
Definition DataTypes.h:71
std::error_code lockFileThreadSafe(int FD, llvm::sys::fs::LockKind Kind)
Thread-safe alternative to sys::fs::lockFile.
std::error_code unlockFileThreadSafe(int FD)
Thread-safe alternative to sys::fs::unlockFile.
std::error_code tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout=std::chrono::milliseconds(0), llvm::sys::fs::LockKind Kind=llvm::sys::fs::LockKind::Exclusive)
Thread-safe alternative to sys::fs::tryLockFile.
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
std::error_code openFileForReadWrite(const Twine &Name, int &ResultFD, CreationDisposition Disp, OpenFlags Flags, unsigned Mode=0666)
Opens the file with the given name in a write-only or read-write mode, returning its open file descri...
@ CD_OpenAlways
CD_OpenAlways - When opening a file:
Definition FileSystem.h:754
LLVM_ABI Expected< size_t > readNativeFileSlice(file_t FileHandle, MutableArrayRef< char > Buf, uint64_t Offset)
Reads Buf.size() bytes from FileHandle at offset Offset into Buf.
LLVM_ABI std::error_code resize_file_sparse(int FD, uint64_t Size)
Resize path to size with sparse files explicitly enabled.
LockKind
An enumeration for the lock kind.
LLVM_ABI std::error_code resize_file(int FD, uint64_t Size)
Resize path to size.
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
std::string utostr(uint64_t X, bool isNeg=false)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1849
std::error_code errnoAsErrorCode()
Helper to get errno as an std::error_code.
Definition Error.h:1240
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Definition Alignment.h:102
Header for MappedFileRegionArena.