LLVM 22.0.0git
OffloadBundle.cpp
Go to the documentation of this file.
1//===- OffloadBundle.cpp - Utilities for offload bundles---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------===//
8
11#include "llvm/IR/Module.h"
14#include "llvm/Object/Archive.h"
15#include "llvm/Object/Binary.h"
16#include "llvm/Object/COFF.h"
18#include "llvm/Object/Error.h"
23#include "llvm/Support/Timer.h"
24
25using namespace llvm;
26using namespace llvm::object;
27
29 OffloadBundlerTimerGroup("Offload Bundler Timer Group",
30 "Timer group for offload bundler");
31
32// Extract an Offload bundle (usually a Offload Bundle) from a fat_bin
33// section
35 StringRef FileName,
37
38 size_t Offset = 0;
39 size_t NextbundleStart = 0;
40
41 // There could be multiple offloading bundles stored at this section.
42 while (NextbundleStart != StringRef::npos) {
43 std::unique_ptr<MemoryBuffer> Buffer =
45 /*RequiresNullTerminator=*/false);
46
47 // Create the FatBinBindle object. This will also create the Bundle Entry
48 // list info.
49 auto FatBundleOrErr =
50 OffloadBundleFatBin::create(*Buffer, SectionOffset + Offset, FileName);
51 if (!FatBundleOrErr)
52 return FatBundleOrErr.takeError();
53
54 // Add current Bundle to list.
55 Bundles.emplace_back(std::move(**FatBundleOrErr));
56
57 // Find the next bundle by searching for the magic string
58 StringRef Str = Buffer->getBuffer();
59 NextbundleStart = Str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24);
60
61 if (NextbundleStart != StringRef::npos)
62 Offset += NextbundleStart;
63 }
64
65 return Error::success();
66}
67
69 uint64_t SectionOffset) {
70 uint64_t NumOfEntries = 0;
71
73
74 // Read the Magic String first.
75 StringRef Magic;
76 if (auto EC = Reader.readFixedString(Magic, 24))
78
79 // Read the number of Code Objects (Entries) in the current Bundle.
80 if (auto EC = Reader.readInteger(NumOfEntries))
82
83 NumberOfEntries = NumOfEntries;
84
85 // For each Bundle Entry (code object)
86 for (uint64_t I = 0; I < NumOfEntries; I++) {
87 uint64_t EntrySize;
88 uint64_t EntryOffset;
89 uint64_t EntryIDSize;
90 StringRef EntryID;
91
92 if (Error Err = Reader.readInteger(EntryOffset))
93 return Err;
94
95 if (Error Err = Reader.readInteger(EntrySize))
96 return Err;
97
98 if (Error Err = Reader.readInteger(EntryIDSize))
99 return Err;
100
101 if (Error Err = Reader.readFixedString(EntryID, EntryIDSize))
102 return Err;
103
104 auto Entry = std::make_unique<OffloadBundleEntry>(
105 EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID);
106
107 Entries.push_back(*Entry);
108 }
109
110 return Error::success();
111}
112
115 StringRef FileName) {
116 if (Buf.getBufferSize() < 24)
118
119 // Check for magic bytes.
122
123 std::unique_ptr<OffloadBundleFatBin> TheBundle(
124 new OffloadBundleFatBin(Buf, FileName));
125
126 // Read the Bundle Entries
127 Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
128 if (Err)
129 return Err;
130
131 return std::move(TheBundle);
132}
133
135 // This will extract all entries in the Bundle
136 for (OffloadBundleEntry &Entry : Entries) {
137
138 if (Entry.Size == 0)
139 continue;
140
141 // create output file name. Which should be
142 // <fileName>-offset<Offset>-size<Size>.co"
143 std::string Str = getFileName().str() + "-offset" + itostr(Entry.Offset) +
144 "-size" + itostr(Entry.Size) + ".co";
145 if (Error Err = object::extractCodeObject(Source, Entry.Offset, Entry.Size,
146 StringRef(Str)))
147 return Err;
148 }
149
150 return Error::success();
151}
152
155 assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type");
156
157 // Iterate through Sections until we find an offload_bundle section.
158 for (SectionRef Sec : Obj.sections()) {
159 Expected<StringRef> Buffer = Sec.getContents();
160 if (!Buffer)
161 return Buffer.takeError();
162
163 // If it does not start with the reserved suffix, just skip this section.
165 (llvm::identify_magic(*Buffer) ==
167
168 uint64_t SectionOffset = 0;
169 if (Obj.isELF()) {
170 SectionOffset = ELFSectionRef(Sec).getOffset();
171 } else if (Obj.isCOFF()) // TODO: add COFF Support
173 "COFF object files not supported.\n");
174
175 MemoryBufferRef Contents(*Buffer, Obj.getFileName());
176
177 if (llvm::identify_magic(*Buffer) ==
179 // Decompress the input if necessary.
180 Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
182
183 if (!DecompressedBufferOrErr)
184 return createStringError(
186 "Failed to decompress input: " +
187 llvm::toString(DecompressedBufferOrErr.takeError()));
188
189 MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
190 if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset,
191 Obj.getFileName(), Bundles))
192 return Err;
193 } else {
194 if (Error Err = extractOffloadBundle(Contents, SectionOffset,
195 Obj.getFileName(), Bundles))
196 return Err;
197 }
198 }
199 }
200 return Error::success();
201}
202
204 int64_t Size, StringRef OutputFileName) {
206 FileOutputBuffer::create(OutputFileName, Size);
207
208 if (!BufferOrErr)
209 return BufferOrErr.takeError();
210
211 Expected<MemoryBufferRef> InputBuffOrErr = Source.getMemoryBufferRef();
212 if (Error Err = InputBuffOrErr.takeError())
213 return Err;
214
215 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
216 std::copy(InputBuffOrErr->getBufferStart() + Offset,
217 InputBuffOrErr->getBufferStart() + Offset + Size,
218 Buf->getBufferStart());
219 if (Error E = Buf->commit())
220 return E;
221
222 return Error::success();
223}
224
225// given a file name, offset, and size, extract data into a code object file,
226// into file <SourceFile>-offset<Offset>-size<Size>.co
228 // create a URI object
231 if (!UriOrErr)
232 return UriOrErr.takeError();
233
234 OffloadBundleURI &Uri = **UriOrErr;
235 std::string OutputFile = Uri.FileName.str();
236 OutputFile +=
237 "-offset" + itostr(Uri.Offset) + "-size" + itostr(Uri.Size) + ".co";
238
239 // Create an ObjectFile object from uri.file_uri
240 auto ObjOrErr = ObjectFile::createObjectFile(Uri.FileName);
241 if (!ObjOrErr)
242 return ObjOrErr.takeError();
243
244 auto Obj = ObjOrErr->getBinary();
245 if (Error Err =
246 object::extractCodeObject(*Obj, Uri.Offset, Uri.Size, OutputFile))
247 return Err;
248
249 return Error::success();
250}
251
252// Utility function to format numbers with commas
253static std::string formatWithCommas(unsigned long long Value) {
254 std::string Num = std::to_string(Value);
255 int InsertPosition = Num.length() - 3;
256 while (InsertPosition > 0) {
257 Num.insert(InsertPosition, ",");
258 InsertPosition -= 3;
259 }
260 return Num;
261}
262
263llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
265 bool Verbose) {
266 StringRef Blob = Input.getBuffer();
267
268 if (Blob.size() < V1HeaderSize)
270
271 if (llvm::identify_magic(Blob) !=
273 if (Verbose)
274 llvm::errs() << "Uncompressed bundle.\n";
276 }
277
278 size_t CurrentOffset = MagicSize;
279
280 uint16_t ThisVersion;
281 memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
282 CurrentOffset += VersionFieldSize;
283
284 uint16_t CompressionMethod;
285 memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
286 CurrentOffset += MethodFieldSize;
287
288 uint32_t TotalFileSize;
289 if (ThisVersion >= 2) {
290 if (Blob.size() < V2HeaderSize)
292 "Compressed bundle header size too small");
293 memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
294 CurrentOffset += FileSizeFieldSize;
295 }
296
297 uint32_t UncompressedSize;
298 memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
299 CurrentOffset += UncompressedSizeFieldSize;
300
301 uint64_t StoredHash;
302 memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
303 CurrentOffset += HashFieldSize;
304
305 llvm::compression::Format CompressionFormat;
306 if (CompressionMethod ==
308 CompressionFormat = llvm::compression::Format::Zlib;
309 else if (CompressionMethod ==
311 CompressionFormat = llvm::compression::Format::Zstd;
312 else
314 "Unknown compressing method");
315
316 llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
318 if (Verbose)
319 DecompressTimer.startTimer();
320
321 SmallVector<uint8_t, 0> DecompressedData;
322 StringRef CompressedData = Blob.substr(CurrentOffset);
323 if (llvm::Error DecompressionError = llvm::compression::decompress(
324 CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
325 DecompressedData, UncompressedSize))
327 "Could not decompress embedded file contents: " +
328 llvm::toString(std::move(DecompressionError)));
329
330 if (Verbose) {
331 DecompressTimer.stopTimer();
332
333 double DecompressionTimeSeconds =
334 DecompressTimer.getTotalTime().getWallTime();
335
336 // Recalculate MD5 hash for integrity check.
337 llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
338 "Hash recalculation time",
340 HashRecalcTimer.startTimer();
341 llvm::MD5 Hash;
343 Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData));
344 Hash.final(Result);
345 uint64_t RecalculatedHash = Result.low();
346 HashRecalcTimer.stopTimer();
347 bool HashMatch = (StoredHash == RecalculatedHash);
348
349 double CompressionRate =
350 static_cast<double>(UncompressedSize) / CompressedData.size();
351 double DecompressionSpeedMBs =
352 (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
353
354 llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
355 if (ThisVersion >= 2)
356 llvm::errs() << "Total file size (from header): "
357 << formatWithCommas(TotalFileSize) << " bytes\n";
358 llvm::errs() << "Decompression method: "
359 << (CompressionFormat == llvm::compression::Format::Zlib
360 ? "zlib"
361 : "zstd")
362 << "\n"
363 << "Size before decompression: "
364 << formatWithCommas(CompressedData.size()) << " bytes\n"
365 << "Size after decompression: "
366 << formatWithCommas(UncompressedSize) << " bytes\n"
367 << "Compression rate: "
368 << llvm::format("%.2lf", CompressionRate) << "\n"
369 << "Compression ratio: "
370 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
371 << "Decompression speed: "
372 << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
373 << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
374 << "Recalculated hash: "
375 << llvm::format_hex(RecalculatedHash, 16) << "\n"
376 << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
377 }
378
380 llvm::toStringRef(DecompressedData));
381}
382
386 bool Verbose) {
390 "Compression not supported");
391
392 llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
394 if (Verbose)
395 HashTimer.startTimer();
396 llvm::MD5 Hash;
398 Hash.update(Input.getBuffer());
399 Hash.final(Result);
400 uint64_t TruncatedHash = Result.low();
401 if (Verbose)
402 HashTimer.stopTimer();
403
404 SmallVector<uint8_t, 0> CompressedBuffer;
405 auto BufferUint8 = llvm::ArrayRef<uint8_t>(
406 reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
407 Input.getBuffer().size());
408
409 llvm::Timer CompressTimer("Compression Timer", "Compression time",
411 if (Verbose)
412 CompressTimer.startTimer();
413 llvm::compression::compress(P, BufferUint8, CompressedBuffer);
414 if (Verbose)
415 CompressTimer.stopTimer();
416
417 uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
418 uint32_t UncompressedSize = Input.getBuffer().size();
419 uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
420 sizeof(Version) + sizeof(CompressionMethod) +
421 sizeof(UncompressedSize) + sizeof(TruncatedHash) +
422 CompressedBuffer.size();
423
424 SmallVector<char, 0> FinalBuffer;
425 llvm::raw_svector_ostream OS(FinalBuffer);
426 OS << MagicNumber;
427 OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
428 OS.write(reinterpret_cast<const char *>(&CompressionMethod),
429 sizeof(CompressionMethod));
430 OS.write(reinterpret_cast<const char *>(&TotalFileSize),
431 sizeof(TotalFileSize));
432 OS.write(reinterpret_cast<const char *>(&UncompressedSize),
433 sizeof(UncompressedSize));
434 OS.write(reinterpret_cast<const char *>(&TruncatedHash),
435 sizeof(TruncatedHash));
436 OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
437 CompressedBuffer.size());
438
439 if (Verbose) {
440 auto MethodUsed =
441 P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
442 double CompressionRate =
443 static_cast<double>(UncompressedSize) / CompressedBuffer.size();
444 double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
445 double CompressionSpeedMBs =
446 (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
447
448 llvm::errs() << "Compressed bundle format version: " << Version << "\n"
449 << "Total file size (including headers): "
450 << formatWithCommas(TotalFileSize) << " bytes\n"
451 << "Compression method used: " << MethodUsed << "\n"
452 << "Compression level: " << P.level << "\n"
453 << "Binary size before compression: "
454 << formatWithCommas(UncompressedSize) << " bytes\n"
455 << "Binary size after compression: "
456 << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
457 << "Compression rate: "
458 << llvm::format("%.2lf", CompressionRate) << "\n"
459 << "Compression ratio: "
460 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
461 << "Compression speed: "
462 << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
463 << "Truncated MD5 hash: "
464 << llvm::format_hex(TruncatedHash, 16) << "\n";
465 }
467 llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
468}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition MD5.cpp:58
Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset, StringRef FileName, SmallVectorImpl< OffloadBundleFatBin > &Bundles)
static llvm::TimerGroup OffloadBundlerTimerGroup("Offload Bundler Timer Group", "Timer group for offload bundler")
static std::string formatWithCommas(unsigned long long Value)
#define P(N)
The Input class is used to parse a yaml document into in-memory structs and vectors.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Provides read only access to a subclass of BinaryStream.
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream's offset.
LLVM_ABI Error readFixedString(StringRef &Dest, uint32_t Length)
Read a Length byte string into Dest.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
static LLVM_ABI Expected< std::unique_ptr< FileOutputBuffer > > create(StringRef FilePath, size_t Size, unsigned Flags=0)
Factory method to create an OutputBuffer object which manages a read/write buffer of the specified si...
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition MD5.cpp:189
LLVM_ABI void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition MD5.cpp:234
size_t getBufferSize() const
StringRef getBuffer() const
This interface provides simple read-only access to a block of memory, and provides simple methods for...
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
static std::unique_ptr< MemoryBuffer > getMemBufferCopy(StringRef InputData, const Twine &BufferName="")
Open the specified memory range as a MemoryBuffer, copying the contents and taking ownership of it.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static constexpr size_t npos
Definition StringRef.h:57
double getWallTime() const
Definition Timer.h:46
The TimerGroup class is used to group together related timers into a single report that is printed wh...
Definition Timer.h:186
This class is used to track the amount of time spent between invocations of its startTimer()/stopTime...
Definition Timer.h:82
LLVM_ABI void stopTimer()
Stop the timer.
Definition Timer.cpp:159
LLVM_ABI void startTimer()
Start the timer running.
Definition Timer.cpp:150
TimeRecord getTotalTime() const
Return the duration for which this timer has been running.
Definition Timer.h:140
LLVM Value Representation.
Definition Value.h:75
bool isCOFF() const
Definition Binary.h:133
StringRef getFileName() const
Definition Binary.cpp:41
bool isELF() const
Definition Binary.h:125
static LLVM_ABI llvm::Expected< std::unique_ptr< llvm::MemoryBuffer > > compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input, bool Verbose=false)
static LLVM_ABI llvm::Expected< std::unique_ptr< llvm::MemoryBuffer > > decompress(llvm::MemoryBufferRef &Input, bool Verbose=false)
This class is the base class for all object file types.
Definition ObjectFile.h:231
section_iterator_range sections() const
Definition ObjectFile.h:331
static Expected< OwningBinary< ObjectFile > > createObjectFile(StringRef ObjectPath)
static LLVM_ABI Expected< std::unique_ptr< OffloadBundleFatBin > > create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName)
LLVM_ABI Error readEntries(StringRef Section, uint64_t SectionOffset)
OffloadBundleFatBin(MemoryBufferRef Source, StringRef File)
LLVM_ABI Error extractBundle(const ObjectFile &Source)
This is a value type class that represents a single section in the list of sections in the object fil...
Definition ObjectFile.h:83
raw_ostream & write(unsigned char C)
A raw_ostream that writes to an SmallVector or SmallString.
LLVM_ABI bool isAvailable()
LLVM_ABI bool isAvailable()
LLVM_ABI Error decompress(DebugCompressionType T, ArrayRef< uint8_t > Input, uint8_t *Output, size_t UncompressedSize)
LLVM_ABI void compress(Params P, ArrayRef< uint8_t > Input, SmallVectorImpl< uint8_t > &Output)
LLVM_ABI Error extractCodeObject(const ObjectFile &Source, int64_t Offset, int64_t Size, StringRef OutputFileName)
Extract code object memory from the given Source object file at Offset and of Size,...
LLVM_ABI Error extractOffloadBundleByURI(StringRef URIstr)
Extracts an Offload Bundle Entry given by URI.
LLVM_ABI Error extractOffloadBundleFatBinary(const ObjectFile &Obj, SmallVectorImpl< OffloadBundleFatBin > &Bundles)
Extracts fat binary in binary clang-offload-bundler format from object Obj and return it in Bundles.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
@ Offset
Definition DWP.cpp:477
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition Format.h:180
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:118
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
StringRef toStringRef(bool B)
Construct a string ref from a boolean.
std::string itostr(int64_t X)
@ offload_bundle
Clang offload bundle file.
Definition Magic.h:60
@ offload_bundle_compressed
Compressed clang offload bundle file.
Definition Magic.h:61
Bundle entry in binary clang-offload-bundler format.
static Expected< std::unique_ptr< OffloadBundleURI > > createOffloadBundleURI(StringRef Str, UriTypeT Type)