LLVM 23.0.0git
DTLTO.cpp
Go to the documentation of this file.
1//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Distributed ThinLTO, focusing on
11// archive file handling.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
19#include "llvm/ADT/StringRef.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
25#include "llvm/Support/Path.h"
30
31#include <string>
32
33using namespace llvm;
34
35namespace {
36
37// Writes the content of a memory buffer into a file.
38llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
39 std::error_code EC;
41 if (EC) {
43 "Failed to create file %s: %s", FilePath.data(),
44 EC.message().c_str());
45 }
46 OS.write(FileBuffer.data(), FileBuffer.size());
47 if (OS.has_error()) {
49 "Failed writing to file %s", FilePath.data());
50 }
51 return Error::success();
52}
53
54// Compute the file path for a thin archive member.
55//
56// For thin archives, an archive member name is typically a file path relative
57// to the archive file's directory. This function resolves that path.
58SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
59 const StringRef MemberName) {
60 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
61 SmallString<64> MemberPath;
62 if (sys::path::is_relative(MemberName)) {
63 MemberPath = sys::path::parent_path(ArchivePath);
64 sys::path::append(MemberPath, MemberName);
65 } else
66 MemberPath = MemberName;
67 sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
68 return MemberPath;
69}
70
71} // namespace
72
73// Determines if a file at the given path is a thin archive file.
74//
75// This function uses a cache to avoid repeatedly reading the same file.
76// It reads only the header portion (magic bytes) of the file to identify
77// the archive type.
78Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
79 // Return cached result if available.
80 auto Cached = ArchiveFiles.find(ArchivePath);
81 if (Cached != ArchiveFiles.end())
82 return Cached->second;
83
84 uint64_t FileSize = -1;
85 bool IsThin = false;
86 std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
87 if (EC)
89 "Failed to get file size from archive %s: %s",
90 ArchivePath.data(), EC.message().c_str());
91 if (FileSize < sizeof(object::ThinArchiveMagic))
93 "Archive file size is too small %s",
94 ArchivePath.data());
95
96 // Read only the first few bytes containing the magic signature.
97 ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
99 0);
100
101 if ((EC = MemBufferOrError.getError()))
103 "Failed to read from archive %s: %s",
104 ArchivePath.data(), EC.message().c_str());
105
106 StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
107 if (file_magic::archive != identify_magic(MemBuf))
109 "Unknown format for archive %s",
110 ArchivePath.data());
111
112 IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
113
114 // Cache the result
115 ArchiveFiles[ArchivePath] = IsThin;
116 return IsThin;
117}
118
119// This function performs the following tasks:
120// 1. Adds the input file to the LTO object's list of input files.
121// 2. For thin archive members, generates a new module ID which is a path to a
122// thin archive member file.
123// 3. For regular archive members, generates a new unique module ID.
124// 4. Updates the bitcode module's identifier.
125Expected<std::shared_ptr<lto::InputFile>>
126lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
127 TimeTraceScope TimeScope("Add input for DTLTO");
128
129 // Add the input file to the LTO object.
130 InputFiles.emplace_back(InputPtr.release());
131 std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
132
133 StringRef ModuleId = Input->getName();
134 StringRef ArchivePath = Input->getArchivePath();
135
136 // In most cases, the module ID already points to an individual bitcode file
137 // on disk, so no further preparation for distribution is required.
138 if (ArchivePath.empty() && !Input->isFatLTOObject())
139 return Input;
140
141 SmallString<64> NewModuleId;
142 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
143
144 // For a member of a thin archive that is not a FatLTO object, there is an
145 // existing file on disk that can be used, so we can avoid having to
146 // materialize.
147 Expected<bool> UseThinMember =
148 Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
149 if (!UseThinMember)
150 return UseThinMember.takeError();
151
152 if (*UseThinMember) {
153 // For thin archives, use the path to the actual file.
154 NewModuleId =
155 computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
156 } else {
157 // For regular archives and FatLTO objects, generate a unique name.
158 Input->setSerializeForDistribution(true);
159
160 // Create unique identifier using process ID and sequence number.
161 std::string PID = utohexstr(sys::Process::getProcessId());
162 std::string Seq = std::to_string(InputFiles.size());
163
164 NewModuleId = sys::path::parent_path(LinkerOutputFile);
165 sys::path::append(NewModuleId, sys::path::filename(ModuleId) + "." + Seq +
166 "." + PID + ".o");
167 }
168
169 // Update the module identifier and save it.
170 BM.setModuleIdentifier(Saver.save(NewModuleId.str()));
171
172 return Input;
173}
174
175// Write the archive member content to a file named after the module ID.
176// If a file with that name already exists, it's likely a leftover from a
177// previously terminated linker process and can be safely overwritten.
178Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
179 StringRef ModuleId = Input->getName();
180 if (Input->getSerializeForDistribution()) {
181 TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
182 // Cleanup this file on abnormal process exit.
183 if (!SaveTemps)
185 MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
186 if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
187 return EC;
188 }
189 return Error::success();
190}
191
192// Iterates through all ThinLTO-enabled input files and saves their content
193// to separate files if they are regular archive members.
194Error lto::DTLTO::saveInputArchiveMembers() {
195 for (auto &Input : InputFiles) {
196 if (!Input->isThinLTO())
197 continue;
198 if (Error EC = saveInputArchiveMember(Input.get()))
199 return EC;
200 }
201 return Error::success();
202}
203
204// Entry point for DTLTO archives support.
205//
206// Sets up the temporary file remover and processes archive members.
207// Must be called after all inputs are added but before optimization begins.
209
210 // Process and save archive members to separate files if needed.
211 if (Error EC = saveInputArchiveMembers())
212 return EC;
213 return Error::success();
214}
215
216// Remove temporary archive member files created to enable distribution.
218 if (!SaveTemps) {
219 TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
220 for (auto &Input : InputFiles) {
221 if (!Input->getSerializeForDistribution())
222 continue;
223 std::error_code EC =
224 sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
225 if (EC &&
226 EC != std::make_error_code(std::errc::no_such_file_or_directory))
227 errs() << "warning: could not remove temporary DTLTO input file '"
228 << Input->getName() << "': " << EC.message() << "\n";
229 }
230 }
232}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
The Input class is used to parse a yaml document into in-memory structs and vectors.
Represents a module in a bitcode file.
void setModuleIdentifier(llvm::StringRef ModuleId)
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
StringRef getBuffer() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Map a subrange of the specified file as a MemoryBuffer.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef str() const
Explicit conversion to StringRef.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
LLVM_ABI void cleanup() override
Definition DTLTO.cpp:217
LLVM_ABI llvm::Error handleArchiveInputs() override
Definition DTLTO.cpp:208
LLVM_ABI Expected< std::shared_ptr< InputFile > > addInput(std::unique_ptr< InputFile > InputPtr) override
Definition DTLTO.cpp:126
An input file.
Definition LTO.h:114
virtual void cleanup()
Definition LTO.h:471
A raw_ostream that writes to a file descriptor.
static LLVM_ABI Pid getProcessId()
Get the process's identifier.
const char ThinArchiveMagic[]
Definition Archive.h:35
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
std::error_code file_size(const Twine &Path, uint64_t &Result)
Get file size.
Definition FileSystem.h:684
LLVM_ABI bool remove_dots(SmallVectorImpl< char > &path, bool remove_dot_dot=false, Style style=Style::native)
In-place remove any '.
Definition Path.cpp:765
LLVM_ABI StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get parent path.
Definition Path.cpp:468
LLVM_ABI bool is_relative(const Twine &path, Style style=Style::native)
Is path relative?
Definition Path.cpp:700
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:578
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
LLVM_ABI bool RemoveFileOnSignal(StringRef Filename, std::string *ErrMsg=nullptr)
This function registers signal handlers to ensure that if a signal gets delivered that the named file...
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ archive
ar style archive file
Definition Magic.h:26