LLVM 22.0.0git
OffloadWrapper.cpp
Go to the documentation of this file.
1//===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/ArrayRef.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/ADT/Twine.h"
16#include "llvm/IR/Constants.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/Type.h"
24#include "llvm/Support/Error.h"
30
31#include <memory>
32#include <utility>
33
34using namespace llvm;
35using namespace llvm::object;
36using namespace llvm::offloading;
37
38namespace {
39/// Magic number that begins the section containing the CUDA fatbinary.
40constexpr unsigned CudaFatMagic = 0x466243b1;
41constexpr unsigned HIPFatMagic = 0x48495046;
42
44 return M.getDataLayout().getIntPtrType(M.getContext());
45}
46
47// struct __tgt_device_image {
48// void *ImageStart;
49// void *ImageEnd;
50// __tgt_offload_entry *EntriesBegin;
51// __tgt_offload_entry *EntriesEnd;
52// };
53StructType *getDeviceImageTy(Module &M) {
54 LLVMContext &C = M.getContext();
55 StructType *ImageTy = StructType::getTypeByName(C, "__tgt_device_image");
56 if (!ImageTy)
57 ImageTy =
58 StructType::create("__tgt_device_image", PointerType::getUnqual(C),
61 return ImageTy;
62}
63
64PointerType *getDeviceImagePtrTy(Module &M) {
65 return PointerType::getUnqual(M.getContext());
66}
67
68// struct __tgt_bin_desc {
69// int32_t NumDeviceImages;
70// __tgt_device_image *DeviceImages;
71// __tgt_offload_entry *HostEntriesBegin;
72// __tgt_offload_entry *HostEntriesEnd;
73// };
74StructType *getBinDescTy(Module &M) {
75 LLVMContext &C = M.getContext();
76 StructType *DescTy = StructType::getTypeByName(C, "__tgt_bin_desc");
77 if (!DescTy)
78 DescTy = StructType::create(
79 "__tgt_bin_desc", Type::getInt32Ty(C), getDeviceImagePtrTy(M),
81 return DescTy;
82}
83
84PointerType *getBinDescPtrTy(Module &M) {
85 return PointerType::getUnqual(M.getContext());
86}
87
88/// Creates binary descriptor for the given device images. Binary descriptor
89/// is an object that is passed to the offloading runtime at program startup
90/// and it describes all device images available in the executable or shared
91/// library. It is defined as follows
92///
93/// __attribute__((visibility("hidden")))
94/// extern __tgt_offload_entry *__start_omp_offloading_entries;
95/// __attribute__((visibility("hidden")))
96/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
97///
98/// static const char Image0[] = { <Bufs.front() contents> };
99/// ...
100/// static const char ImageN[] = { <Bufs.back() contents> };
101///
102/// static const __tgt_device_image Images[] = {
103/// {
104/// Image0, /*ImageStart*/
105/// Image0 + sizeof(Image0), /*ImageEnd*/
106/// __start_omp_offloading_entries, /*EntriesBegin*/
107/// __stop_omp_offloading_entries /*EntriesEnd*/
108/// },
109/// ...
110/// {
111/// ImageN, /*ImageStart*/
112/// ImageN + sizeof(ImageN), /*ImageEnd*/
113/// __start_omp_offloading_entries, /*EntriesBegin*/
114/// __stop_omp_offloading_entries /*EntriesEnd*/
115/// }
116/// };
117///
118/// static const __tgt_bin_desc BinDesc = {
119/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
120/// Images, /*DeviceImages*/
121/// __start_omp_offloading_entries, /*HostEntriesBegin*/
122/// __stop_omp_offloading_entries /*HostEntriesEnd*/
123/// };
124///
125/// Global variable that represents BinDesc is returned.
126GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
127 EntryArrayTy EntryArray, StringRef Suffix,
128 bool Relocatable) {
129 LLVMContext &C = M.getContext();
130 auto [EntriesB, EntriesE] = EntryArray;
131
132 auto *Zero = ConstantInt::get(getSizeTTy(M), 0u);
133 Constant *ZeroZero[] = {Zero, Zero};
134
135 // Create initializer for the images array.
136 SmallVector<Constant *, 4u> ImagesInits;
137 ImagesInits.reserve(Bufs.size());
138 for (ArrayRef<char> Buf : Bufs) {
139 // We embed the full offloading entry so the binary utilities can parse it.
140 auto *Data = ConstantDataArray::get(C, Buf);
141 auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant=*/true,
143 ".omp_offloading.device_image" + Suffix);
145 Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
146 : ".llvm.offloading");
148
149 StringRef Binary(Buf.data(), Buf.size());
150
151 uint64_t BeginOffset = 0;
152 uint64_t EndOffset = Binary.size();
153
154 // Optionally use an offload binary for its offload dumping support.
155 // The device image struct contains the pointer to the beginning and end of
156 // the image stored inside of the offload binary. There should only be one
157 // of these for each buffer so we parse it out manually.
159 const auto *Header =
160 reinterpret_cast<const object::OffloadBinary::Header *>(
161 Binary.bytes_begin());
162 const auto *Entry =
163 reinterpret_cast<const object::OffloadBinary::Entry *>(
164 Binary.bytes_begin() + Header->EntryOffset);
165 BeginOffset = Entry->ImageOffset;
166 EndOffset = Entry->ImageOffset + Entry->ImageSize;
167 }
168
169 auto *Begin = ConstantInt::get(getSizeTTy(M), BeginOffset);
170 auto *Size = ConstantInt::get(getSizeTTy(M), EndOffset);
171 Constant *ZeroBegin[] = {Zero, Begin};
172 Constant *ZeroSize[] = {Zero, Size};
173
174 auto *ImageB =
175 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroBegin);
176 auto *ImageE =
177 ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize);
178
179 ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
180 ImageE, EntriesB, EntriesE));
181 }
182
183 // Then create images array.
184 auto *ImagesData = ConstantArray::get(
185 ArrayType::get(getDeviceImageTy(M), ImagesInits.size()), ImagesInits);
186
187 auto *Images =
188 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
190 ".omp_offloading.device_images" + Suffix);
191 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
192
193 auto *ImagesB =
194 ConstantExpr::getGetElementPtr(Images->getValueType(), Images, ZeroZero);
195
196 // And finally create the binary descriptor object.
197 auto *DescInit = ConstantStruct::get(
198 getBinDescTy(M),
199 ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
200 EntriesB, EntriesE);
201
202 return new GlobalVariable(M, DescInit->getType(), /*isConstant=*/true,
204 ".omp_offloading.descriptor" + Suffix);
205}
206
207Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
208 StringRef Suffix) {
209 LLVMContext &C = M.getContext();
210 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
211 auto *Func =
213 ".omp_offloading.descriptor_unreg" + Suffix, &M);
214 Func->setSection(".text.startup");
215
216 // Get __tgt_unregister_lib function declaration.
217 auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
218 /*isVarArg*/ false);
219 FunctionCallee UnRegFuncC =
220 M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
221
222 // Construct function body
223 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
224 Builder.CreateCall(UnRegFuncC, BinDesc);
225 Builder.CreateRetVoid();
226
227 return Func;
228}
229
230void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
231 StringRef Suffix) {
232 LLVMContext &C = M.getContext();
233 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
235 ".omp_offloading.descriptor_reg" + Suffix, &M);
236 Func->setSection(".text.startup");
237
238 // Get __tgt_register_lib function declaration.
239 auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
240 /*isVarArg*/ false);
241 FunctionCallee RegFuncC =
242 M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
243
244 auto *AtExitTy = FunctionType::get(
245 Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false);
246 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
247
248 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
249
250 // Construct function body
251 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
252
253 Builder.CreateCall(RegFuncC, BinDesc);
254
255 // Register the destructors with 'atexit'. This is expected by the CUDA
256 // runtime and ensures that we clean up before dynamic objects are destroyed.
257 // This needs to be done after plugin initialization to ensure that it is
258 // called before the plugin runtime is destroyed.
259 Builder.CreateCall(AtExit, UnregFunc);
260 Builder.CreateRetVoid();
261
262 // Add this function to constructors.
263 appendToGlobalCtors(M, Func, /*Priority=*/101);
264}
265
266// struct fatbin_wrapper {
267// int32_t magic;
268// int32_t version;
269// void *image;
270// void *reserved;
271//};
272StructType *getFatbinWrapperTy(Module &M) {
273 LLVMContext &C = M.getContext();
274 StructType *FatbinTy = StructType::getTypeByName(C, "fatbin_wrapper");
275 if (!FatbinTy)
276 FatbinTy = StructType::create(
277 "fatbin_wrapper", Type::getInt32Ty(C), Type::getInt32Ty(C),
279 return FatbinTy;
280}
281
282/// Embed the image \p Image into the module \p M so it can be found by the
283/// runtime.
284GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP,
285 StringRef Suffix) {
286 LLVMContext &C = M.getContext();
287 llvm::Type *Int8PtrTy = PointerType::getUnqual(C);
288 const llvm::Triple &Triple = M.getTargetTriple();
289
290 // Create the global string containing the fatbinary.
291 StringRef FatbinConstantSection =
292 IsHIP ? ".hip_fatbin"
293 : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
294 auto *Data = ConstantDataArray::get(C, Image);
295 auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
297 ".fatbin_image" + Suffix);
298 Fatbin->setSection(FatbinConstantSection);
299
300 // Create the fatbinary wrapper
301 StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
302 : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
303 : ".nvFatBinSegment";
304 Constant *FatbinWrapper[] = {
305 ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
306 ConstantInt::get(Type::getInt32Ty(C), 1),
309
310 Constant *FatbinInitializer =
311 ConstantStruct::get(getFatbinWrapperTy(M), FatbinWrapper);
312
313 auto *FatbinDesc =
314 new GlobalVariable(M, getFatbinWrapperTy(M),
315 /*isConstant*/ true, GlobalValue::InternalLinkage,
316 FatbinInitializer, ".fatbin_wrapper" + Suffix);
317 FatbinDesc->setSection(FatbinWrapperSection);
318 FatbinDesc->setAlignment(Align(8));
319
320 return FatbinDesc;
321}
322
323/// Create the register globals function. We will iterate all of the offloading
324/// entries stored at the begin / end symbols and register them according to
325/// their type. This creates the following function in IR:
326///
327/// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
328/// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
329///
330/// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
331/// void *, void *, void *, void *, int *);
332/// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
333/// int64_t, int32_t, int32_t);
334///
335/// void __cudaRegisterTest(void **fatbinHandle) {
336/// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
337/// entry != &__stop_cuda_offloading_entries; ++entry) {
338/// if (entry->Kind != OFK_CUDA)
339/// continue
340///
341/// if (!entry->Size)
342/// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
343/// entry->name, -1, 0, 0, 0, 0, 0);
344/// else
345/// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
346/// 0, entry->size, 0, 0);
347/// }
348/// }
349Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
350 EntryArrayTy EntryArray,
351 StringRef Suffix,
352 bool EmitSurfacesAndTextures) {
353 LLVMContext &C = M.getContext();
354 auto [EntriesB, EntriesE] = EntryArray;
355
356 // Get the __cudaRegisterFunction function declaration.
357 PointerType *Int8PtrTy = PointerType::get(C, 0);
358 PointerType *Int8PtrPtrTy = PointerType::get(C, 0);
359 PointerType *Int32PtrTy = PointerType::get(C, 0);
360 auto *RegFuncTy = FunctionType::get(
362 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
363 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
364 /*isVarArg*/ false);
365 FunctionCallee RegFunc = M.getOrInsertFunction(
366 IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
367
368 // Get the __cudaRegisterVar function declaration.
369 auto *RegVarTy = FunctionType::get(
371 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
373 /*isVarArg*/ false);
374 FunctionCallee RegVar = M.getOrInsertFunction(
375 IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
376
377 // Get the __cudaRegisterSurface function declaration.
378 FunctionType *RegManagedVarTy =
380 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
382 /*isVarArg=*/false);
383 FunctionCallee RegManagedVar = M.getOrInsertFunction(
384 IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
385 RegManagedVarTy);
386
387 // Get the __cudaRegisterSurface function declaration.
388 FunctionType *RegSurfaceTy =
390 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
392 /*isVarArg=*/false);
393 FunctionCallee RegSurface = M.getOrInsertFunction(
394 IsHIP ? "__hipRegisterSurface" : "__cudaRegisterSurface", RegSurfaceTy);
395
396 // Get the __cudaRegisterTexture function declaration.
397 FunctionType *RegTextureTy = FunctionType::get(
399 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Type::getInt32Ty(C),
401 /*isVarArg=*/false);
402 FunctionCallee RegTexture = M.getOrInsertFunction(
403 IsHIP ? "__hipRegisterTexture" : "__cudaRegisterTexture", RegTextureTy);
404
405 auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Int8PtrPtrTy,
406 /*isVarArg*/ false);
407 auto *RegGlobalsFn =
409 IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
410 RegGlobalsFn->setSection(".text.startup");
411
412 // Create the loop to register all the entries.
413 IRBuilder<> Builder(BasicBlock::Create(C, "entry", RegGlobalsFn));
414 auto *EntryBB = BasicBlock::Create(C, "while.entry", RegGlobalsFn);
415 auto *IfKindBB = BasicBlock::Create(C, "if.kind", RegGlobalsFn);
416 auto *IfThenBB = BasicBlock::Create(C, "if.then", RegGlobalsFn);
417 auto *IfElseBB = BasicBlock::Create(C, "if.else", RegGlobalsFn);
418 auto *SwGlobalBB = BasicBlock::Create(C, "sw.global", RegGlobalsFn);
419 auto *SwManagedBB = BasicBlock::Create(C, "sw.managed", RegGlobalsFn);
420 auto *SwSurfaceBB = BasicBlock::Create(C, "sw.surface", RegGlobalsFn);
421 auto *SwTextureBB = BasicBlock::Create(C, "sw.texture", RegGlobalsFn);
422 auto *IfEndBB = BasicBlock::Create(C, "if.end", RegGlobalsFn);
423 auto *ExitBB = BasicBlock::Create(C, "while.end", RegGlobalsFn);
424
425 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
426 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
427 Builder.SetInsertPoint(EntryBB);
428 auto *Entry = Builder.CreatePHI(PointerType::getUnqual(C), 2, "entry");
429 auto *AddrPtr =
430 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
431 {ConstantInt::get(Type::getInt32Ty(C), 0),
432 ConstantInt::get(Type::getInt32Ty(C), 4)});
433 auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr, "addr");
434 auto *AuxAddrPtr =
435 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
436 {ConstantInt::get(Type::getInt32Ty(C), 0),
437 ConstantInt::get(Type::getInt32Ty(C), 8)});
438 auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr, "aux_addr");
439 auto *KindPtr =
440 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
441 {ConstantInt::get(Type::getInt32Ty(C), 0),
442 ConstantInt::get(Type::getInt32Ty(C), 2)});
443 auto *Kind = Builder.CreateLoad(Type::getInt16Ty(C), KindPtr, "kind");
444 auto *NamePtr =
445 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
446 {ConstantInt::get(Type::getInt32Ty(C), 0),
447 ConstantInt::get(Type::getInt32Ty(C), 5)});
448 auto *Name = Builder.CreateLoad(Int8PtrTy, NamePtr, "name");
449 auto *SizePtr =
450 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
451 {ConstantInt::get(Type::getInt32Ty(C), 0),
452 ConstantInt::get(Type::getInt32Ty(C), 6)});
453 auto *Size = Builder.CreateLoad(Type::getInt64Ty(C), SizePtr, "size");
454 auto *FlagsPtr =
455 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
456 {ConstantInt::get(Type::getInt32Ty(C), 0),
457 ConstantInt::get(Type::getInt32Ty(C), 3)});
458 auto *Flags = Builder.CreateLoad(Type::getInt32Ty(C), FlagsPtr, "flags");
459 auto *DataPtr =
460 Builder.CreateInBoundsGEP(offloading::getEntryTy(M), Entry,
461 {ConstantInt::get(Type::getInt32Ty(C), 0),
462 ConstantInt::get(Type::getInt32Ty(C), 7)});
463 auto *Data = Builder.CreateTrunc(
464 Builder.CreateLoad(Type::getInt64Ty(C), DataPtr, "data"),
466 auto *Type = Builder.CreateAnd(
467 Flags, ConstantInt::get(Type::getInt32Ty(C), 0x7), "type");
468
469 // Extract the flags stored in the bit-field and convert them to C booleans.
470 auto *ExternBit = Builder.CreateAnd(
471 Flags, ConstantInt::get(Type::getInt32Ty(C),
473 auto *Extern = Builder.CreateLShr(
474 ExternBit, ConstantInt::get(Type::getInt32Ty(C), 3), "extern");
475 auto *ConstantBit = Builder.CreateAnd(
476 Flags, ConstantInt::get(Type::getInt32Ty(C),
478 auto *Const = Builder.CreateLShr(
479 ConstantBit, ConstantInt::get(Type::getInt32Ty(C), 4), "constant");
480 auto *NormalizedBit = Builder.CreateAnd(
481 Flags, ConstantInt::get(Type::getInt32Ty(C),
483 auto *Normalized = Builder.CreateLShr(
484 NormalizedBit, ConstantInt::get(Type::getInt32Ty(C), 5), "normalized");
485 auto *KindCond = Builder.CreateICmpEQ(
486 Kind, ConstantInt::get(Type::getInt16Ty(C),
489 Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB);
490 Builder.SetInsertPoint(IfKindBB);
491 auto *FnCond = Builder.CreateICmpEQ(
493 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
494
495 // Create kernel registration code.
496 Builder.SetInsertPoint(IfThenBB);
497 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
498 ConstantInt::get(Type::getInt32Ty(C), -1),
499 ConstantPointerNull::get(Int8PtrTy),
500 ConstantPointerNull::get(Int8PtrTy),
501 ConstantPointerNull::get(Int8PtrTy),
502 ConstantPointerNull::get(Int8PtrTy),
503 ConstantPointerNull::get(Int32PtrTy)});
504 Builder.CreateBr(IfEndBB);
505 Builder.SetInsertPoint(IfElseBB);
506
507 auto *Switch = Builder.CreateSwitch(Type, IfEndBB);
508 // Create global variable registration code.
509 Builder.SetInsertPoint(SwGlobalBB);
510 Builder.CreateCall(RegVar,
511 {RegGlobalsFn->arg_begin(), Addr, Name, Name, Extern, Size,
512 Const, ConstantInt::get(Type::getInt32Ty(C), 0)});
513 Builder.CreateBr(IfEndBB);
514 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalEntry),
515 SwGlobalBB);
516
517 // Create managed variable registration code.
518 Builder.SetInsertPoint(SwManagedBB);
519 Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
520 Name, Size, Data});
521 Builder.CreateBr(IfEndBB);
522 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry),
523 SwManagedBB);
524 // Create surface variable registration code.
525 Builder.SetInsertPoint(SwSurfaceBB);
526 if (EmitSurfacesAndTextures)
527 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
528 Data, Extern});
529 Builder.CreateBr(IfEndBB);
530 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalSurfaceEntry),
531 SwSurfaceBB);
532
533 // Create texture variable registration code.
534 Builder.SetInsertPoint(SwTextureBB);
535 if (EmitSurfacesAndTextures)
536 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr, Name, Name,
537 Data, Normalized, Extern});
538 Builder.CreateBr(IfEndBB);
539 Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalTextureEntry),
540 SwTextureBB);
541
542 Builder.SetInsertPoint(IfEndBB);
543 auto *NewEntry = Builder.CreateInBoundsGEP(
544 offloading::getEntryTy(M), Entry, ConstantInt::get(getSizeTTy(M), 1));
545 auto *Cmp = Builder.CreateICmpEQ(
546 NewEntry,
548 ArrayType::get(offloading::getEntryTy(M), 0), EntriesE,
549 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
550 ConstantInt::get(getSizeTTy(M), 0)})));
551 Entry->addIncoming(
553 ArrayType::get(offloading::getEntryTy(M), 0), EntriesB,
554 ArrayRef<Constant *>({ConstantInt::get(getSizeTTy(M), 0),
555 ConstantInt::get(getSizeTTy(M), 0)})),
556 &RegGlobalsFn->getEntryBlock());
557 Entry->addIncoming(NewEntry, IfEndBB);
558 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
559 Builder.SetInsertPoint(ExitBB);
560 Builder.CreateRetVoid();
561
562 return RegGlobalsFn;
563}
564
565// Create the constructor and destructor to register the fatbinary with the CUDA
566// runtime.
567void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
568 bool IsHIP, EntryArrayTy EntryArray,
569 StringRef Suffix,
570 bool EmitSurfacesAndTextures) {
571 LLVMContext &C = M.getContext();
572 auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
573 auto *CtorFunc = Function::Create(
575 (IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg") + Suffix, &M);
576 CtorFunc->setSection(".text.startup");
577
578 auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
579 auto *DtorFunc = Function::Create(
581 (IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg") + Suffix, &M);
582 DtorFunc->setSection(".text.startup");
583
584 auto *PtrTy = PointerType::getUnqual(C);
585
586 // Get the __cudaRegisterFatBinary function declaration.
587 auto *RegFatTy = FunctionType::get(PtrTy, PtrTy, /*isVarArg=*/false);
588 FunctionCallee RegFatbin = M.getOrInsertFunction(
589 IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
590 // Get the __cudaRegisterFatBinaryEnd function declaration.
591 auto *RegFatEndTy =
592 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
593 FunctionCallee RegFatbinEnd =
594 M.getOrInsertFunction("__cudaRegisterFatBinaryEnd", RegFatEndTy);
595 // Get the __cudaUnregisterFatBinary function declaration.
596 auto *UnregFatTy =
597 FunctionType::get(Type::getVoidTy(C), PtrTy, /*isVarArg=*/false);
598 FunctionCallee UnregFatbin = M.getOrInsertFunction(
599 IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
600 UnregFatTy);
601
602 auto *AtExitTy =
603 FunctionType::get(Type::getInt32Ty(C), PtrTy, /*isVarArg=*/false);
604 FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
605
606 auto *BinaryHandleGlobal = new llvm::GlobalVariable(
607 M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
609 (IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle") + Suffix);
610
611 // Create the constructor to register this image with the runtime.
612 IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
613 CallInst *Handle = CtorBuilder.CreateCall(
614 RegFatbin,
616 CtorBuilder.CreateAlignedStore(
617 Handle, BinaryHandleGlobal,
618 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
619 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
620 Suffix,
621 EmitSurfacesAndTextures),
622 Handle);
623 if (!IsHIP)
624 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
625 CtorBuilder.CreateCall(AtExit, DtorFunc);
626 CtorBuilder.CreateRetVoid();
627
628 // Create the destructor to unregister the image with the runtime. We cannot
629 // use a standard global destructor after CUDA 9.2 so this must be called by
630 // `atexit()` instead.
631 IRBuilder<> DtorBuilder(BasicBlock::Create(C, "entry", DtorFunc));
632 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
633 PtrTy, BinaryHandleGlobal,
634 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
635 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
636 DtorBuilder.CreateRetVoid();
637
638 // Add this function to constructors.
639 appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
640}
641
642/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
643struct SYCLWrapper {
644 Module &M;
645 LLVMContext &C;
646 SYCLJITOptions Options;
647
648 StructType *EntryTy = nullptr;
649 StructType *SyclDeviceImageTy = nullptr;
650 StructType *SyclBinDescTy = nullptr;
651
652 SYCLWrapper(Module &M, const SYCLJITOptions &Options)
653 : M(M), C(M.getContext()), Options(Options) {
654 EntryTy = offloading::getEntryTy(M);
655 SyclDeviceImageTy = getSyclDeviceImageTy();
656 SyclBinDescTy = getSyclBinDescTy();
657 }
658
659 IntegerType *getSizeTTy() {
660 switch (M.getDataLayout().getPointerSize()) {
661 case 4:
662 return Type::getInt32Ty(C);
663 case 8:
664 return Type::getInt64Ty(C);
665 }
666 llvm_unreachable("unsupported pointer type size");
667 }
668
669 SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
670 IntegerType *SizeTTy = getSizeTTy();
671 return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
672 ConstantInt::get(SizeTTy, Second)};
673 }
674
675 /// Note: Properties aren't supported and the support is going
676 /// to be added later.
677 /// Creates a structure corresponding to:
678 /// SYCL specific image descriptor type.
679 /// \code
680 /// struct __sycl.tgt_device_image {
681 /// // version of this structure - for backward compatibility;
682 /// // all modifications which change order/type/offsets of existing fields
683 /// // should increment the version.
684 /// uint16_t Version;
685 /// // the kind of offload model the image employs.
686 /// uint8_t OffloadKind;
687 /// // format of the image data - SPIRV, LLVMIR bitcode, etc
688 /// uint8_t Format;
689 /// // null-terminated string representation of the device's target
690 /// // architecture
691 /// const char *Arch;
692 /// // a null-terminated string; target- and compiler-specific options
693 /// // which are suggested to use to "compile" program at runtime
694 /// const char *CompileOptions;
695 /// // a null-terminated string; target- and compiler-specific options
696 /// // which are suggested to use to "link" program at runtime
697 /// const char *LinkOptions;
698 /// // Pointer to the device binary image start
699 /// void *ImageStart;
700 /// // Pointer to the device binary image end
701 /// void *ImageEnd;
702 /// // the entry table
703 /// __tgt_offload_entry *EntriesBegin;
704 /// __tgt_offload_entry *EntriesEnd;
705 /// const char *PropertiesBegin;
706 /// const char *PropertiesEnd;
707 /// };
708 /// \endcode
709 StructType *getSyclDeviceImageTy() {
710 return StructType::create(
711 {
712 Type::getInt16Ty(C), // Version
713 Type::getInt8Ty(C), // OffloadKind
714 Type::getInt8Ty(C), // Format
715 PointerType::getUnqual(C), // Arch
716 PointerType::getUnqual(C), // CompileOptions
717 PointerType::getUnqual(C), // LinkOptions
718 PointerType::getUnqual(C), // ImageStart
719 PointerType::getUnqual(C), // ImageEnd
720 PointerType::getUnqual(C), // EntriesBegin
721 PointerType::getUnqual(C), // EntriesEnd
722 PointerType::getUnqual(C), // PropertiesBegin
723 PointerType::getUnqual(C) // PropertiesEnd
724 },
725 "__sycl.tgt_device_image");
726 }
727
728 /// Creates a structure for SYCL specific binary descriptor type. Corresponds
729 /// to:
730 ///
731 /// \code
732 /// struct __sycl.tgt_bin_desc {
733 /// // version of this structure - for backward compatibility;
734 /// // all modifications which change order/type/offsets of existing fields
735 /// // should increment the version.
736 /// uint16_t Version;
737 /// uint16_t NumDeviceImages;
738 /// __sycl.tgt_device_image *DeviceImages;
739 /// // the offload entry table
740 /// __tgt_offload_entry *HostEntriesBegin;
741 /// __tgt_offload_entry *HostEntriesEnd;
742 /// };
743 /// \endcode
744 StructType *getSyclBinDescTy() {
745 return StructType::create(
746 {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
747 PointerType::getUnqual(C), PointerType::getUnqual(C)},
748 "__sycl.tgt_bin_desc");
749 }
750
751 /// Adds a global readonly variable that is initialized by given
752 /// \p Initializer to the module.
753 GlobalVariable *addGlobalArrayVariable(const Twine &Name,
754 ArrayRef<char> Initializer,
755 const Twine &Section = "") {
756 auto *Arr = ConstantDataArray::get(M.getContext(), Initializer);
757 auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
758 GlobalVariable::InternalLinkage, Arr, Name);
759 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
760
761 SmallVector<char, 32> NameBuf;
762 auto SectionName = Section.toStringRef(NameBuf);
763 if (!SectionName.empty())
764 Var->setSection(SectionName);
765 return Var;
766 }
767
768 /// Adds given \p Buf as a global variable into the module.
769 /// \returns Pair of pointers that point at the beginning and the end of the
770 /// variable.
771 std::pair<Constant *, Constant *>
772 addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
773 const Twine &Section = "") {
774 auto *Var = addGlobalArrayVariable(Name, Buf, Section);
775 auto *ImageB = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
776 getSizetConstPair(0, 0));
777 auto *ImageE = ConstantExpr::getGetElementPtr(
778 Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
779 return std::make_pair(ImageB, ImageE);
780 }
781
782 /// Adds given \p Data as constant byte array in the module.
783 /// \returns Constant pointer to the added data. The pointer type does not
784 /// carry size information.
785 Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
786 auto *Var = addGlobalArrayVariable(Name, Data);
787 auto *DataPtr = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
788 getSizetConstPair(0, 0));
789 return DataPtr;
790 }
791
792 /// Creates a global variable of const char* type and creates an
793 /// initializer that initializes it with \p Str.
794 ///
795 /// \returns Link-time constant pointer (constant expr) to that
796 /// variable.
797 Constant *addStringToModule(StringRef Str, const Twine &Name) {
798 auto *Arr = ConstantDataArray::getString(C, Str);
799 auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
800 GlobalVariable::InternalLinkage, Arr, Name);
801 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
802 auto *Zero = ConstantInt::get(getSizeTTy(), 0);
803 Constant *ZeroZero[] = {Zero, Zero};
804 return ConstantExpr::getGetElementPtr(Var->getValueType(), Var, ZeroZero);
805 }
806
807 /// Each image contains its own set of symbols, which may contain different
808 /// symbols than other images. This function constructs an array of
809 /// symbol entries for a particular image.
810 ///
811 /// \returns Pointers to the beginning and end of the array.
812 std::pair<Constant *, Constant *>
813 initOffloadEntriesPerImage(StringRef Entries, const Twine &OffloadKindTag) {
814 SmallVector<Constant *> EntriesInits;
815 std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
816 Entries, /*BufferName*/ "", /*RequiresNullTerminator*/ false);
817 for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
818 GlobalVariable *GV =
819 emitOffloadingEntry(M, /*Kind*/ OffloadKind::OFK_SYCL,
820 Constant::getNullValue(PointerType::getUnqual(C)),
821 /*Name*/ *LI, /*Size*/ 0,
822 /*Flags*/ 0, /*Data*/ 0);
823 EntriesInits.push_back(GV->getInitializer());
824 }
825
826 auto *Arr = ConstantArray::get(ArrayType::get(EntryTy, EntriesInits.size()),
827 EntriesInits);
828 auto *EntriesGV = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
829 GlobalVariable::InternalLinkage, Arr,
830 OffloadKindTag + "entries_arr");
831
832 auto *EntriesB = ConstantExpr::getGetElementPtr(
833 EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
834 auto *EntriesE = ConstantExpr::getGetElementPtr(
835 EntriesGV->getValueType(), EntriesGV,
836 getSizetConstPair(0, EntriesInits.size()));
837 return std::make_pair(EntriesB, EntriesE);
838 }
839
840 Constant *wrapImage(const OffloadBinary &OB, const Twine &ImageID,
841 StringRef OffloadKindTag) {
842 // Note: Intel DPC++ compiler had 2 versions of this structure
843 // and clang++ has a third different structure. To avoid ABI incompatibility
844 // between generated device images the Version here starts from 3.
845 constexpr uint16_t DeviceImageStructVersion = 3;
847 ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
848 Constant *OffloadKindConstant = ConstantInt::get(
849 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getOffloadKind()));
850 Constant *ImageKindConstant = ConstantInt::get(
851 Type::getInt8Ty(C), static_cast<uint8_t>(OB.getImageKind()));
852 StringRef Triple = OB.getString("triple");
853 Constant *TripleConstant =
854 addStringToModule(Triple, Twine(OffloadKindTag) + "target." + ImageID);
855 Constant *CompileOptions =
856 addStringToModule(Options.CompileOptions,
857 Twine(OffloadKindTag) + "opts.compile." + ImageID);
858 Constant *LinkOptions = addStringToModule(
859 Options.LinkOptions, Twine(OffloadKindTag) + "opts.link." + ImageID);
860
861 // Note: NULL for now.
862 std::pair<Constant *, Constant *> PropertiesConstants = {
863 Constant::getNullValue(PointerType::getUnqual(C)),
864 Constant::getNullValue(PointerType::getUnqual(C))};
865
866 StringRef RawImage = OB.getImage();
867 std::pair<Constant *, Constant *> Binary = addArrayToModule(
868 ArrayRef<char>(RawImage.begin(), RawImage.end()),
869 Twine(OffloadKindTag) + ImageID + ".data", ".llvm.offloading");
870
871 // For SYCL images offload entries are defined here per image.
872 std::pair<Constant *, Constant *> ImageEntriesPtrs =
873 initOffloadEntriesPerImage(OB.getString("symbols"), OffloadKindTag);
874 Constant *WrappedBinary = ConstantStruct::get(
875 SyclDeviceImageTy, Version, OffloadKindConstant, ImageKindConstant,
876 TripleConstant, CompileOptions, LinkOptions, Binary.first,
877 Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
878 PropertiesConstants.first, PropertiesConstants.second);
879
880 return WrappedBinary;
881 }
882
883 GlobalVariable *combineWrappedImages(ArrayRef<Constant *> WrappedImages,
884 StringRef OffloadKindTag) {
885 auto *ImagesData = ConstantArray::get(
886 ArrayType::get(SyclDeviceImageTy, WrappedImages.size()), WrappedImages);
887 auto *ImagesGV =
888 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
890 Twine(OffloadKindTag) + "device_images");
891 ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
892
893 auto *Zero = ConstantInt::get(getSizeTTy(), 0);
894 Constant *ZeroZero[] = {Zero, Zero};
895 auto *ImagesB = ConstantExpr::getGetElementPtr(ImagesGV->getValueType(),
896 ImagesGV, ZeroZero);
897
898 Constant *EntriesB = Constant::getNullValue(PointerType::getUnqual(C));
899 Constant *EntriesE = Constant::getNullValue(PointerType::getUnqual(C));
900 static constexpr uint16_t BinDescStructVersion = 1;
901 auto *DescInit = ConstantStruct::get(
902 SyclBinDescTy,
903 ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
904 ConstantInt::get(Type::getInt16Ty(C), WrappedImages.size()), ImagesB,
905 EntriesB, EntriesE);
906
907 return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
909 Twine(OffloadKindTag) + "descriptor");
910 }
911
912 /// Creates binary descriptor for the given device images. Binary descriptor
913 /// is an object that is passed to the offloading runtime at program startup
914 /// and it describes all device images available in the executable or shared
915 /// library. It is defined as follows:
916 ///
917 /// \code
918 /// __attribute__((visibility("hidden")))
919 /// __tgt_offload_entry *__sycl_offload_entries_arr0[];
920 /// ...
921 /// __attribute__((visibility("hidden")))
922 /// __tgt_offload_entry *__sycl_offload_entries_arrN[];
923 ///
924 /// __attribute__((visibility("hidden")))
925 /// extern const char *CompileOptions = "...";
926 /// ...
927 /// __attribute__((visibility("hidden")))
928 /// extern const char *LinkOptions = "...";
929 /// ...
930 ///
931 /// static const char Image0[] = { ... };
932 /// ...
933 /// static const char ImageN[] = { ... };
934 ///
935 /// static const __sycl.tgt_device_image Images[] = {
936 /// {
937 /// Version, // Version
938 /// OffloadKind, // OffloadKind
939 /// Format, // Format of the image.
940 // TripleString, // Arch
941 /// CompileOptions, // CompileOptions
942 /// LinkOptions, // LinkOptions
943 /// Image0, // ImageStart
944 /// Image0 + IMAGE0_SIZE, // ImageEnd
945 /// __sycl_offload_entries_arr0, // EntriesBegin
946 /// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
947 /// NULL, // PropertiesBegin
948 /// NULL, // PropertiesEnd
949 /// },
950 /// ...
951 /// };
952 ///
953 /// static const __sycl.tgt_bin_desc FatbinDesc = {
954 /// Version, //Version
955 /// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
956 /// Images, //DeviceImages
957 /// NULL, //HostEntriesBegin
958 /// NULL //HostEntriesEnd
959 /// };
960 /// \endcode
961 ///
962 /// \returns Global variable that represents FatbinDesc.
963 GlobalVariable *createFatbinDesc(ArrayRef<OffloadFile> OffloadFiles) {
964 StringRef OffloadKindTag = ".sycl_offloading.";
965 SmallVector<Constant *> WrappedImages;
966 WrappedImages.reserve(OffloadFiles.size());
967 for (size_t I = 0, E = OffloadFiles.size(); I != E; ++I)
968 WrappedImages.push_back(
969 wrapImage(*OffloadFiles[I].getBinary(), Twine(I), OffloadKindTag));
970
971 return combineWrappedImages(WrappedImages, OffloadKindTag);
972 }
973
974 void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
975 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
977 Twine("sycl") + ".descriptor_reg", &M);
978 Func->setSection(".text.startup");
979
980 // Get RegFuncName function declaration.
981 auto *RegFuncTy =
982 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
983 /*isVarArg=*/false);
984 FunctionCallee RegFuncC =
985 M.getOrInsertFunction("__sycl_register_lib", RegFuncTy);
986
987 // Construct function body
988 IRBuilder Builder(BasicBlock::Create(C, "entry", Func));
989 Builder.CreateCall(RegFuncC, FatbinDesc);
990 Builder.CreateRetVoid();
991
992 // Add this function to constructors.
993 appendToGlobalCtors(M, Func, /*Priority*/ 1);
994 }
995
996 void createUnregisterFunction(GlobalVariable *FatbinDesc) {
997 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
999 "sycl.descriptor_unreg", &M);
1000 Func->setSection(".text.startup");
1001
1002 // Get UnregFuncName function declaration.
1003 auto *UnRegFuncTy =
1004 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
1005 /*isVarArg=*/false);
1006 FunctionCallee UnRegFuncC =
1007 M.getOrInsertFunction("__sycl_unregister_lib", UnRegFuncTy);
1008
1009 // Construct function body
1010 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
1011 Builder.CreateCall(UnRegFuncC, FatbinDesc);
1012 Builder.CreateRetVoid();
1013
1014 // Add this function to global destructors.
1015 appendToGlobalDtors(M, Func, /*Priority*/ 1);
1016 }
1017}; // end of SYCLWrapper
1018
1019} // namespace
1020
1022 EntryArrayTy EntryArray,
1023 llvm::StringRef Suffix, bool Relocatable) {
1025 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
1026 if (!Desc)
1028 "No binary descriptors created.");
1029 createRegisterFunction(M, Desc, Suffix);
1030 return Error::success();
1031}
1032
1034 EntryArrayTy EntryArray,
1035 llvm::StringRef Suffix,
1036 bool EmitSurfacesAndTextures) {
1037 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/false, Suffix);
1038 if (!Desc)
1040 "No fatbin section created.");
1041
1042 createRegisterFatbinFunction(M, Desc, /*IsHip=*/false, EntryArray, Suffix,
1043 EmitSurfacesAndTextures);
1044 return Error::success();
1045}
1046
1048 EntryArrayTy EntryArray, llvm::StringRef Suffix,
1049 bool EmitSurfacesAndTextures) {
1050 GlobalVariable *Desc = createFatbinDesc(M, Image, /*IsHip=*/true, Suffix);
1051 if (!Desc)
1053 "No fatbin section created.");
1054
1055 createRegisterFatbinFunction(M, Desc, /*IsHip=*/true, EntryArray, Suffix,
1056 EmitSurfacesAndTextures);
1057 return Error::success();
1058}
1059
1062 SYCLWrapper W(M, Options);
1063 MemoryBufferRef MBR(StringRef(Buffer.begin(), Buffer.size()),
1064 /*Identifier*/ "");
1065 SmallVector<OffloadFile> OffloadFiles;
1066 if (Error E = extractOffloadBinaries(MBR, OffloadFiles))
1067 return E;
1068
1069 GlobalVariable *Desc = W.createFatbinDesc(OffloadFiles);
1070 if (!Desc)
1072 "No binary descriptors created.");
1073
1074 W.createRegisterFatbinFunction(Desc);
1075 W.createUnregisterFunction(Desc);
1076 return Error::success();
1077}
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file defines the SmallVector class.
@ ConstantBit
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition Constants.h:1301
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
iterator begin() const
Definition StringRef.h:112
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
static LLVM_ABI StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
Definition Type.cpp:738
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition Type.cpp:619
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isMacOSX() const
Is this a Mac OS X triple.
Definition Triple.h:582
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:295
static uint64_t getAlignment()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char SectionName[]
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Definition CoroShape.h:31
LLVM_ABI Error extractOffloadBinaries(MemoryBufferRef Buffer, SmallVectorImpl< OffloadFile > &Binaries)
Extracts embedded device offloading code from a memory Buffer to a list of Binaries.
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
Definition Utility.cpp:86
LLVM_ABI StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
Definition Utility.cpp:26
LLVM_ABI llvm::Error wrapSYCLBinaries(llvm::Module &M, llvm::ArrayRef< char > Buffer, SYCLJITOptions Options=SYCLJITOptions())
Wraps OffloadBinaries in the given Buffers into the module M as global symbols and registers the imag...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
Definition Utility.h:58
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
Definition Utility.h:60
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
Definition Utility.h:66
@ OffloadGlobalEntry
Mark the entry as a global entry.
Definition Utility.h:54
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
Definition Utility.h:56
@ OffloadGlobalExtern
Mark the entry as being extern.
Definition Utility.h:62
@ OffloadGlobalConstant
Mark the entry as being constant.
Definition Utility.h:64
LLVM_ABI llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
LLVM_ABI llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
LLVM_ABI llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Same as appendToGlobalCtors(), but for global dtors.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
Definition CodeGen.h:163
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ offload_binary
LLVM offload object file.
Definition Magic.h:58