LLVM 23.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
17#include "llvm/ADT/APSInt.h"
18#include "llvm/ADT/SetVector.h"
22#include "llvm/IR/CallingConv.h"
23#include "llvm/IR/DebugLoc.h"
24#include "llvm/IR/IRBuilder.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/ValueMap.h"
29#include "llvm/Support/Error.h"
31#include <forward_list>
32#include <map>
33#include <optional>
34
35namespace llvm {
37class CodeExtractor;
38class ScanInfo;
41class OpenMPIRBuilder;
42class Loop;
43class LoopAnalysis;
44class LoopInfo;
45
46namespace vfs {
47class FileSystem;
48} // namespace vfs
49
50/// Move the instruction after an InsertPoint to the beginning of another
51/// BasicBlock.
52///
53/// The instructions after \p IP are moved to the beginning of \p New which must
54/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
55/// \p New will be added such that there is no semantic change. Otherwise, the
56/// \p IP insert block remains degenerate and it is up to the caller to insert a
57/// terminator. \p DL is used as the debug location for the branch instruction
58/// if one is created.
60 bool CreateBranch, DebugLoc DL);
61
62/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
63/// insert location will stick to after the instruction before the insertion
64/// point (instead of moving with the instruction the InsertPoint stores
65/// internally).
66LLVM_ABI void spliceBB(IRBuilder<> &Builder, BasicBlock *New,
67 bool CreateBranch);
68
69/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
70/// (missing the terminator).
71///
72/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
73/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
74/// is true, a branch to the new successor will new created such that
75/// semantically there is no change; otherwise the block of the insertion point
76/// remains degenerate and it is the caller's responsibility to insert a
77/// terminator. \p DL is used as the debug location for the branch instruction
78/// if one is created. Returns the new successor block.
80 DebugLoc DL, llvm::Twine Name = {});
81
82/// Split a BasicBlock at \p Builder's insertion point, even if the block is
83/// degenerate (missing the terminator). Its new insert location will stick to
84/// after the instruction before the insertion point (instead of moving with the
85/// instruction the InsertPoint stores internally).
86LLVM_ABI BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
87 llvm::Twine Name = {});
88
89/// Split a BasicBlock at \p Builder's insertion point, even if the block is
90/// degenerate (missing the terminator). Its new insert location will stick to
91/// after the instruction before the insertion point (instead of moving with the
92/// instruction the InsertPoint stores internally).
93LLVM_ABI BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch,
94 llvm::Twine Name);
95
96/// Like splitBB, but reuses the current block's name for the new name.
98 bool CreateBranch,
99 llvm::Twine Suffix = ".split");
100
101/// Captures attributes that affect generating LLVM-IR using the
102/// OpenMPIRBuilder and related classes. Note that not all attributes are
103/// required for all classes or functions. In some use cases the configuration
104/// is not necessary at all, because because the only functions that are called
105/// are ones that are not dependent on the configuration.
107public:
108 /// Flag to define whether to generate code for the role of the OpenMP host
109 /// (if set to false) or device (if set to true) in an offloading context. It
110 /// is set when the -fopenmp-is-target-device compiler frontend option is
111 /// specified.
112 std::optional<bool> IsTargetDevice;
113
114 /// Flag for specifying if the compilation is done for an accelerator. It is
115 /// set according to the architecture of the target triple and currently only
116 /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform
117 /// the role of an OpenMP target device, so `IsTargetDevice` must also be true
118 /// if `IsGPU` is true. This restriction might be lifted if an accelerator-
119 /// like target with the ability to work as the OpenMP host is added, or if
120 /// the capabilities of the currently supported GPU architectures are
121 /// expanded.
122 std::optional<bool> IsGPU;
123
124 /// Flag for specifying if LLVMUsed information should be emitted.
125 std::optional<bool> EmitLLVMUsedMetaInfo;
126
127 /// Flag for specifying if offloading is mandatory.
128 std::optional<bool> OpenMPOffloadMandatory;
129
130 /// First separator used between the initial two parts of a name.
131 std::optional<StringRef> FirstSeparator;
132 /// Separator used between all of the rest consecutive parts of s name.
133 std::optional<StringRef> Separator;
134
135 // Grid Value for the GPU target.
136 std::optional<omp::GV> GridValue;
137
138 /// When compilation is being done for the OpenMP host (i.e. `IsTargetDevice =
139 /// false`), this contains the list of offloading triples associated, if any.
141
142 // Default address space for the target.
143 unsigned DefaultTargetAS = 0;
144
146
150 bool HasRequiresReverseOffload,
151 bool HasRequiresUnifiedAddress,
152 bool HasRequiresUnifiedSharedMemory,
153 bool HasRequiresDynamicAllocators);
154
155 // Getters functions that assert if the required values are not present.
156 bool isTargetDevice() const {
157 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
158 return *IsTargetDevice;
159 }
160
161 bool isGPU() const {
162 assert(IsGPU.has_value() && "IsGPU is not set");
163 return *IsGPU;
164 }
165
167 assert(OpenMPOffloadMandatory.has_value() &&
168 "OpenMPOffloadMandatory is not set");
170 }
171
173 assert(GridValue.has_value() && "GridValue is not set");
174 return *GridValue;
175 }
176
177 unsigned getDefaultTargetAS() const { return DefaultTargetAS; }
178
180
181 bool hasRequiresFlags() const { return RequiresFlags; }
186
187 /// Returns requires directive clauses as flags compatible with those expected
188 /// by libomptarget.
189 LLVM_ABI int64_t getRequiresFlags() const;
190
191 // Returns the FirstSeparator if set, otherwise use the default separator
192 // depending on isGPU
194 if (FirstSeparator.has_value())
195 return *FirstSeparator;
196 if (isGPU())
197 return "_";
198 return ".";
199 }
200
201 // Returns the Separator if set, otherwise use the default separator depending
202 // on isGPU
204 if (Separator.has_value())
205 return *Separator;
206 if (isGPU())
207 return "$";
208 return ".";
209 }
210
212 void setIsGPU(bool Value) { IsGPU = Value; }
218 void setDefaultTargetAS(unsigned AS) { DefaultTargetAS = AS; }
220
225
226private:
227 /// Flags for specifying which requires directive clauses are present.
228 int64_t RequiresFlags;
229};
230
231/// Data structure to contain the information needed to uniquely identify
232/// a target entry.
234 /// The prefix used for kernel names.
235 static constexpr const char *KernelNamePrefix = "__omp_offloading_";
236
237 std::string ParentName;
238 unsigned DeviceID;
239 unsigned FileID;
240 unsigned Line;
241 unsigned Count;
242
245 unsigned FileID, unsigned Line, unsigned Count = 0)
247 Count(Count) {}
248
249 LLVM_ABI static void
251 unsigned DeviceID, unsigned FileID, unsigned Line,
252 unsigned Count);
253
255 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
256 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
257 RHS.Count);
258 }
259};
260
261/// Class that manages information about offload code regions and data
263 /// Number of entries registered so far.
264 OpenMPIRBuilder *OMPBuilder;
265 unsigned OffloadingEntriesNum = 0;
266
267public:
268 /// Base class of the entries info.
270 public:
271 /// Kind of a given entry.
272 enum OffloadingEntryInfoKinds : unsigned {
273 /// Entry is a target region.
275 /// Entry is a declare target variable.
277 /// Invalid entry info.
279 };
280
281 protected:
283 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
284 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
285 uint32_t Flags)
286 : Flags(Flags), Order(Order), Kind(Kind) {}
287 ~OffloadEntryInfo() = default;
288
289 public:
290 bool isValid() const { return Order != ~0u; }
291 unsigned getOrder() const { return Order; }
292 OffloadingEntryInfoKinds getKind() const { return Kind; }
293 uint32_t getFlags() const { return Flags; }
294 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
295 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
297 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
298 Addr = V;
299 }
300 static bool classof(const OffloadEntryInfo *Info) { return true; }
301
302 private:
303 /// Address of the entity that has to be mapped for offloading.
304 WeakTrackingVH Addr;
305
306 /// Flags associated with the device global.
307 uint32_t Flags = 0u;
308
309 /// Order this entry was emitted.
310 unsigned Order = ~0u;
311
312 OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
313 };
314
315 /// Return true if a there are no entries defined.
316 LLVM_ABI bool empty() const;
317 /// Return number of entries defined so far.
318 unsigned size() const { return OffloadingEntriesNum; }
319
320 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
321
322 //
323 // Target region entries related.
324 //
325
326 /// Kind of the target registry entry.
328 /// Mark the entry as target region.
330 };
331
332 /// Target region entries info.
334 /// Address that can be used as the ID of the entry.
335 Constant *ID = nullptr;
336
337 public:
340 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
341 Constant *ID,
344 ID(ID) {
345 setAddress(Addr);
346 }
347
348 Constant *getID() const { return ID; }
349 void setID(Constant *V) {
350 assert(!ID && "ID has been set before!");
351 ID = V;
352 }
353 static bool classof(const OffloadEntryInfo *Info) {
354 return Info->getKind() == OffloadingEntryInfoTargetRegion;
355 }
356 };
357
358 /// Initialize target region entry.
359 /// This is ONLY needed for DEVICE compilation.
360 LLVM_ABI void
362 unsigned Order);
363 /// Register target region entry.
365 Constant *Addr, Constant *ID,
367 /// Return true if a target region entry with the provided information
368 /// exists.
370 bool IgnoreAddressId = false) const;
371
372 // Return the Name based on \a EntryInfo using the next available Count.
373 LLVM_ABI void
375 const TargetRegionEntryInfo &EntryInfo);
376
377 /// brief Applies action \a Action on all registered entries.
378 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
379 const OffloadEntryInfoTargetRegion &)>
381 LLVM_ABI void
383
384 //
385 // Device global variable entries related.
386 //
387
388 /// Kind of the global variable entry..
390 /// Mark the entry as a to declare target.
392 /// Mark the entry as a to declare target link.
394 /// Mark the entry as a declare target enter.
396 /// Mark the entry as having no declare target entry kind.
398 /// Mark the entry as a declare target indirect global.
400 /// Mark the entry as a register requires global.
402 /// Mark the entry as a declare target indirect vtable.
404 };
405
406 /// Kind of device clause for declare target variables
407 /// and functions
408 /// NOTE: Currently not used as a part of a variable entry
409 /// used for Flang and Clang to interface with the variable
410 /// related registration functions
412 /// The target is marked for all devices
414 /// The target is marked for non-host devices
416 /// The target is marked for host devices
418 /// The target is marked as having no clause
420 };
421
422 /// Device global variable entries info.
424 /// Type of the global variable.
425 int64_t VarSize;
427 const std::string VarName;
428
429 public:
435 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
436 int64_t VarSize,
439 const std::string &VarName)
441 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
442 setAddress(Addr);
443 }
444
445 int64_t getVarSize() const { return VarSize; }
446 StringRef getVarName() const { return VarName; }
447 void setVarSize(int64_t Size) { VarSize = Size; }
448 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
449 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
450 static bool classof(const OffloadEntryInfo *Info) {
451 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
452 }
453 };
454
455 /// Initialize device global variable entry.
456 /// This is ONLY used for DEVICE compilation.
458 StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order);
459
460 /// Register device global variable entry.
462 StringRef VarName, Constant *Addr, int64_t VarSize,
464 /// Checks if the variable with the given name has been registered already.
466 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
467 }
468 /// Applies action \a Action on all registered entries.
469 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
473
474private:
475 /// Return the count of entries at a particular source location.
476 unsigned
477 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
478
479 /// Update the count of entries at a particular source location.
480 void
481 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
482
484 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
485 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
486 EntryInfo.FileID, EntryInfo.Line, 0);
487 }
488
489 // Count of entries at a location.
490 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
491
492 // Storage for target region entries kind.
493 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
494 OffloadEntriesTargetRegionTy;
495 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
496 /// Storage for device global variable entries kind. The storage is to be
497 /// indexed by mangled name.
499 OffloadEntriesDeviceGlobalVarTy;
500 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
501};
502
503/// An interface to create LLVM-IR for OpenMP directives.
504///
505/// Each OpenMP directive has a corresponding public generator method.
507public:
508 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
509 /// not have an effect on \p M (see initialize)
511 : M(M), Builder(M.getContext()), OffloadInfoManager(this),
512 T(M.getTargetTriple()), IsFinalized(false) {}
514
516 llvm::Value *AtomicVar;
517
518 public:
526
527 llvm::Value *getAtomicPointer() const override { return AtomicVar; }
530 const llvm::Twine &Name) const override {
531 llvm::AllocaInst *allocaInst = Builder->CreateAlloca(Ty);
532 allocaInst->setName(Name);
533 return allocaInst;
534 }
535 };
536 /// Initialize the internal state, this will put structures types and
537 /// potentially other helpers into the underlying module. Must be called
538 /// before any other method and only once! This internal state includes types
539 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
540 LLVM_ABI void initialize();
541
543
544 /// Finalize the underlying module, e.g., by outlining regions.
545 /// \param Fn The function to be finalized. If not used,
546 /// all functions are finalized.
547 LLVM_ABI void finalize(Function *Fn = nullptr);
548
549 /// Check whether the finalize function has already run
550 /// \return true if the finalize function has already run
551 LLVM_ABI bool isFinalized();
552
553 /// Add attributes known for \p FnID to \p Fn.
555
556 /// Type used throughout for insertion points.
558
559 /// Type used to represent an insertion point or an error value.
561
562 /// Get the create a name using the platform specific separators.
563 /// \param Parts parts of the final name that needs separation
564 /// The created name has a first separator between the first and second part
565 /// and a second separator between all other parts.
566 /// E.g. with FirstSeparator "$" and Separator "." and
567 /// parts: "p1", "p2", "p3", "p4"
568 /// The resulting name is "p1$p2.p3.p4"
569 /// The separators are retrieved from the OpenMPIRBuilderConfig.
570 LLVM_ABI std::string
572
573 /// Callback type for variable finalization (think destructors).
574 ///
575 /// \param CodeGenIP is the insertion point at which the finalization code
576 /// should be placed.
577 ///
578 /// A finalize callback knows about all objects that need finalization, e.g.
579 /// destruction, when the scope of the currently generated construct is left
580 /// at the time, and location, the callback is invoked.
581 using FinalizeCallbackTy = std::function<Error(InsertPointTy CodeGenIP)>;
582
584 FinalizationInfo(FinalizeCallbackTy FiniCB, omp::Directive DK,
585 bool IsCancellable)
586 : DK(DK), IsCancellable(IsCancellable), FiniCB(std::move(FiniCB)) {}
587 /// The directive kind of the innermost directive that has an associated
588 /// region which might require finalization when it is left.
589 const omp::Directive DK;
590
591 /// Flag to indicate if the directive is cancellable.
592 const bool IsCancellable;
593
594 /// The basic block to which control should be transferred to
595 /// implement the FiniCB. Memoized to avoid generating finalization
596 /// multiple times.
598
599 /// For cases where there is an unavoidable existing finalization block
600 /// (e.g. loop finialization after omp sections). The existing finalization
601 /// block must not contain any non-finalization code.
603 BasicBlock *ExistingFiniBB);
604
605 private:
606 /// Access via getFiniBB.
607 BasicBlock *FiniBB = nullptr;
608
609 /// The finalization callback provided by the last in-flight invocation of
610 /// createXXXX for the directive of kind DK.
611 FinalizeCallbackTy FiniCB;
612 };
613
614 /// Push a finalization callback on the finalization stack.
615 ///
616 /// NOTE: Temporary solution until Clang CG is gone.
618 FinalizationStack.push_back(FI);
619 }
620
621 /// Pop the last finalization callback from the finalization stack.
622 ///
623 /// NOTE: Temporary solution until Clang CG is gone.
625
626 /// Callback type for body (=inner region) code generation
627 ///
628 /// The callback takes code locations as arguments, each describing a
629 /// location where additional instructions can be inserted.
630 ///
631 /// The CodeGenIP may be in the middle of a basic block or point to the end of
632 /// it. The basic block may have a terminator or be degenerate. The callback
633 /// function may just insert instructions at that position, but also split the
634 /// block (without the Before argument of BasicBlock::splitBasicBlock such
635 /// that the identify of the split predecessor block is preserved) and insert
636 /// additional control flow, including branches that do not lead back to what
637 /// follows the CodeGenIP. Note that since the callback is allowed to split
638 /// the block, callers must assume that InsertPoints to positions in the
639 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
640 /// such InsertPoints need to be preserved, it can split the block itself
641 /// before calling the callback.
642 ///
643 /// AllocaIP and CodeGenIP must not point to the same position.
644 ///
645 /// \param AllocaIP is the insertion point at which new allocations should
646 /// be placed. The BasicBlock it is pointing to must not be
647 /// split.
648 /// \param CodeGenIP is the insertion point at which the body code should be
649 /// placed.
650 /// \param DeallocBlocks is the list of insertion blocks where explicit
651 /// deallocations, if needed, should be placed.
652 /// \return an error, if any were triggered during execution.
654 function_ref<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
655 ArrayRef<BasicBlock *> DeallocBlocks)>;
656
657 /// Callback type for task duplication function code generation. This is the
658 /// task duplication function passed to __kmpc_taskloop. It is expected that
659 /// this function will set up (first)private variables in the duplicated task
660 /// which have non-trivial (copy-)constructors. Insertion points are handled
661 /// the same way as for BodyGenCallbackTy.
662 ///
663 /// \ref createTaskloop lays out the task's auxiliary data structure as:
664 /// `{ lower bound, upper bound, step, data... }`. DestPtr and SrcPtr point
665 /// to this data.
666 ///
667 /// It is acceptable for the callback to be set to nullptr. In that case no
668 /// function will be generated and nullptr will be passed as the task
669 /// duplication function to __kmpc_taskloop.
670 ///
671 /// \param AllocaIP is the insertion point at which new alloca instructions
672 /// should be placed. The BasicBlock it is pointing to must
673 /// not be split.
674 /// \param CodeGenIP is the insertion point at which the body code should be
675 /// placed.
676 /// \param DestPtr This is a pointer to data inside the newly duplicated
677 /// task's auxiliary data structure (allocated after the task
678 /// descriptor.)
679 /// \param SrcPtr This is a pointer to data inside the original task's
680 /// auxiliary data structure (allocated after the task
681 /// descriptor.)
682 ///
683 /// \return The insertion point immediately after the generated code, or an
684 /// error if any occured.
686 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr,
687 Value *SrcPtr)>;
688
689 // This is created primarily for sections construct as llvm::function_ref
690 // (BodyGenCallbackTy) is not storable (as described in the comments of
691 // function_ref class - function_ref contains non-ownable reference
692 // to the callable.
693 ///
694 /// \return an error, if any were triggered during execution.
696 std::function<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
697 ArrayRef<BasicBlock *> DeallocBlocks)>;
698
699 /// Callback type for loop body code generation.
700 ///
701 /// \param CodeGenIP is the insertion point where the loop's body code must be
702 /// placed. This will be a dedicated BasicBlock with a
703 /// conditional branch from the loop condition check and
704 /// terminated with an unconditional branch to the loop
705 /// latch.
706 /// \param IndVar is the induction variable usable at the insertion point.
707 ///
708 /// \return an error, if any were triggered during execution.
710 function_ref<Error(InsertPointTy CodeGenIP, Value *IndVar)>;
711
712 /// Callback type for variable privatization (think copy & default
713 /// constructor).
714 ///
715 /// \param AllocaIP is the insertion point at which new alloca instructions
716 /// should be placed.
717 /// \param CodeGenIP is the insertion point at which the privatization code
718 /// should be placed.
719 /// \param Original The value being copied/created, should not be used in the
720 /// generated IR.
721 /// \param Inner The equivalent of \p Original that should be used in the
722 /// generated IR; this is equal to \p Original if the value is
723 /// a pointer and can thus be passed directly, otherwise it is
724 /// an equivalent but different value.
725 /// \param ReplVal The replacement value, thus a copy or new created version
726 /// of \p Inner.
727 ///
728 /// \returns The new insertion point where code generation continues and
729 /// \p ReplVal the replacement value.
731 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
732 Value &Inner, Value *&ReplVal)>;
733
734 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
735 /// (filename, line, column, ...).
738 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
741 : IP(IP), DL(DL) {}
744 };
745
746 /// Emitter methods for OpenMP directives.
747 ///
748 ///{
749
750 /// Generator for '#omp barrier'
751 ///
752 /// \param Loc The location where the barrier directive was encountered.
753 /// \param Kind The kind of directive that caused the barrier.
754 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
755 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
756 /// should be checked and acted upon.
757 /// \param ThreadID Optional parameter to pass in any existing ThreadID value.
758 ///
759 /// \returns The insertion point after the barrier.
761 omp::Directive Kind,
762 bool ForceSimpleCall = false,
763 bool CheckCancelFlag = true);
764
765 /// Generator for '#omp cancel'
766 ///
767 /// \param Loc The location where the directive was encountered.
768 /// \param IfCondition The evaluated 'if' clause expression, if any.
769 /// \param CanceledDirective The kind of directive that is cancled.
770 ///
771 /// \returns The insertion point after the barrier.
773 Value *IfCondition,
774 omp::Directive CanceledDirective);
775
776 /// Generator for '#omp cancellation point'
777 ///
778 /// \param Loc The location where the directive was encountered.
779 /// \param CanceledDirective The kind of directive that is cancled.
780 ///
781 /// \returns The insertion point after the barrier.
783 const LocationDescription &Loc, omp::Directive CanceledDirective);
784
785 /// Creates a ScanInfo object, allocates and returns the pointer.
787
788 /// Generator for '#omp parallel'
789 ///
790 /// \param Loc The insert and source location description.
791 /// \param AllocaIP The insertion point to be used for allocations.
792 /// \param DeallocBlocks The insertion blocks to be used for explicit
793 /// deallocations, if needed.
794 /// \param BodyGenCB Callback that will generate the region code.
795 /// \param PrivCB Callback to copy a given variable (think copy constructor).
796 /// \param FiniCB Callback to finalize variable copies.
797 /// \param IfCondition The evaluated 'if' clause expression, if any.
798 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
799 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
800 /// \param IsCancellable Flag to indicate a cancellable parallel region.
801 ///
802 /// \returns The insertion position *after* the parallel.
804 const LocationDescription &Loc, InsertPointTy AllocaIP,
805 ArrayRef<BasicBlock *> DeallocBlocks, BodyGenCallbackTy BodyGenCB,
806 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
807 Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable);
808
809 /// Generator for the control flow structure of an OpenMP canonical loop.
810 ///
811 /// This generator operates on the logical iteration space of the loop, i.e.
812 /// the caller only has to provide a loop trip count of the loop as defined by
813 /// base language semantics. The trip count is interpreted as an unsigned
814 /// integer. The induction variable passed to \p BodyGenCB will be of the same
815 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
816 /// convert the logical iteration variable to the loop counter variable in the
817 /// loop body.
818 ///
819 /// \param Loc The insert and source location description. The insert
820 /// location can be between two instructions or the end of a
821 /// degenerate block (e.g. a BB under construction).
822 /// \param BodyGenCB Callback that will generate the loop body code.
823 /// \param TripCount Number of iterations the loop body is executed.
824 /// \param Name Base name used to derive BB and instruction names.
825 ///
826 /// \returns An object representing the created control flow structure which
827 /// can be used for loop-associated directives.
830 LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
831 const Twine &Name = "loop");
832
833 /// Generator for the control flow structure of an OpenMP canonical loops if
834 /// the parent directive has an `inscan` modifier specified.
835 /// If the `inscan` modifier is specified, the region of the parent is
836 /// expected to have a `scan` directive. Based on the clauses in
837 /// scan directive, the body of the loop is split into two loops: Input loop
838 /// and Scan Loop. Input loop contains the code generated for input phase of
839 /// scan and Scan loop contains the code generated for scan phase of scan.
840 /// From the bodyGen callback of these loops, `createScan` would be called
841 /// when a scan directive is encountered from the loop body. `createScan`
842 /// based on whether 1. inclusive or exclusive scan is specified and, 2. input
843 /// loop or scan loop is generated, lowers the body of the for loop
844 /// accordingly.
845 ///
846 /// \param Loc The insert and source location description.
847 /// \param BodyGenCB Callback that will generate the loop body code.
848 /// \param Start Value of the loop counter for the first iterations.
849 /// \param Stop Loop counter values past this will stop the loop.
850 /// \param Step Loop counter increment after each iteration; negative
851 /// means counting down.
852 /// \param IsSigned Whether Start, Stop and Step are signed integers.
853 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
854 /// counter.
855 /// \param ComputeIP Insertion point for instructions computing the trip
856 /// count. Can be used to ensure the trip count is available
857 /// at the outermost loop of a loop nest. If not set,
858 /// defaults to the preheader of the generated loop.
859 /// \param Name Base name used to derive BB and instruction names.
860 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
861 /// `ScanInfoInitialize`.
862 ///
863 /// \returns A vector containing Loop Info of Input Loop and Scan Loop.
866 LoopBodyGenCallbackTy BodyGenCB, Value *Start,
867 Value *Stop, Value *Step, bool IsSigned,
868 bool InclusiveStop, InsertPointTy ComputeIP,
869 const Twine &Name, ScanInfo *ScanRedInfo);
870
871 /// Calculate the trip count of a canonical loop.
872 ///
873 /// This allows specifying user-defined loop counter values using increment,
874 /// upper- and lower bounds. To disambiguate the terminology when counting
875 /// downwards, instead of lower bounds we use \p Start for the loop counter
876 /// value in the first body iteration.
877 ///
878 /// Consider the following limitations:
879 ///
880 /// * A loop counter space over all integer values of its bit-width cannot be
881 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
882 /// stored into an 8 bit integer):
883 ///
884 /// DO I = 0, 255, 1
885 ///
886 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
887 /// effectively counting downwards:
888 ///
889 /// for (uint8_t i = 100u; i > 0; i += 127u)
890 ///
891 ///
892 /// TODO: May need to add additional parameters to represent:
893 ///
894 /// * Allow representing downcounting with unsigned integers.
895 ///
896 /// * Sign of the step and the comparison operator might disagree:
897 ///
898 /// for (int i = 0; i < 42; i -= 1u)
899 ///
900 /// \param Loc The insert and source location description.
901 /// \param Start Value of the loop counter for the first iterations.
902 /// \param Stop Loop counter values past this will stop the loop.
903 /// \param Step Loop counter increment after each iteration; negative
904 /// means counting down.
905 /// \param IsSigned Whether Start, Stop and Step are signed integers.
906 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
907 /// counter.
908 /// \param Name Base name used to derive instruction names.
909 ///
910 /// \returns The value holding the calculated trip count.
912 const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
913 bool IsSigned, bool InclusiveStop, const Twine &Name = "loop");
914
915 /// Generator for the control flow structure of an OpenMP canonical loop.
916 ///
917 /// Instead of a logical iteration space, this allows specifying user-defined
918 /// loop counter values using increment, upper- and lower bounds. To
919 /// disambiguate the terminology when counting downwards, instead of lower
920 /// bounds we use \p Start for the loop counter value in the first body
921 ///
922 /// It calls \see calculateCanonicalLoopTripCount for trip count calculations,
923 /// so limitations of that method apply here as well.
924 ///
925 /// \param Loc The insert and source location description.
926 /// \param BodyGenCB Callback that will generate the loop body code.
927 /// \param Start Value of the loop counter for the first iterations.
928 /// \param Stop Loop counter values past this will stop the loop.
929 /// \param Step Loop counter increment after each iteration; negative
930 /// means counting down.
931 /// \param IsSigned Whether Start, Stop and Step are signed integers.
932 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
933 /// counter.
934 /// \param ComputeIP Insertion point for instructions computing the trip
935 /// count. Can be used to ensure the trip count is available
936 /// at the outermost loop of a loop nest. If not set,
937 /// defaults to the preheader of the generated loop.
938 /// \param Name Base name used to derive BB and instruction names.
939 /// \param InScan Whether loop has a scan reduction specified.
940 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
941 /// `ScanInfoInitialize`.
942 ///
943 /// \returns An object representing the created control flow structure which
944 /// can be used for loop-associated directives.
947 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
948 InsertPointTy ComputeIP = {}, const Twine &Name = "loop",
949 bool InScan = false, ScanInfo *ScanRedInfo = nullptr);
950
951 /// Collapse a loop nest into a single loop.
952 ///
953 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
954 /// that has the same number of innermost loop iterations as the origin loop
955 /// nest. The induction variables of the input loops are derived from the
956 /// collapsed loop's induction variable. This is intended to be used to
957 /// implement OpenMP's collapse clause. Before applying a directive,
958 /// collapseLoops normalizes a loop nest to contain only a single loop and the
959 /// directive's implementation does not need to handle multiple loops itself.
960 /// This does not remove the need to handle all loop nest handling by
961 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
962 /// modifier of the worksharing-loop directive.
963 ///
964 /// Example:
965 /// \code
966 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
967 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
968 /// body(i, j);
969 /// \endcode
970 ///
971 /// After collapsing with Loops={i,j}, the loop is changed to
972 /// \code
973 /// for (int ij = 0; ij < 63; ++ij) {
974 /// int i = ij / 9;
975 /// int j = ij % 9;
976 /// body(i, j);
977 /// }
978 /// \endcode
979 ///
980 /// In the current implementation, the following limitations apply:
981 ///
982 /// * All input loops have an induction variable of the same type.
983 ///
984 /// * The collapsed loop will have the same trip count integer type as the
985 /// input loops. Therefore it is possible that the collapsed loop cannot
986 /// represent all iterations of the input loops. For instance, assuming a
987 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
988 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
989 /// which cannot be represented in an 32-bit integer. Behavior is undefined
990 /// in this case.
991 ///
992 /// * The trip counts of every input loop must be available at \p ComputeIP.
993 /// Non-rectangular loops are not yet supported.
994 ///
995 /// * At each nest level, code between a surrounding loop and its nested loop
996 /// is hoisted into the loop body, and such code will be executed more
997 /// often than before collapsing (or not at all if any inner loop iteration
998 /// has a trip count of 0). This is permitted by the OpenMP specification.
999 ///
1000 /// \param DL Debug location for instructions added for collapsing,
1001 /// such as instructions to compute/derive the input loop's
1002 /// induction variables.
1003 /// \param Loops Loops in the loop nest to collapse. Loops are specified
1004 /// from outermost-to-innermost and every control flow of a
1005 /// loop's body must pass through its directly nested loop.
1006 /// \param ComputeIP Where additional instruction that compute the collapsed
1007 /// trip count. If not set, defaults to before the generated
1008 /// loop.
1009 ///
1010 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
1013 InsertPointTy ComputeIP);
1014
1015 /// Get the default alignment value for given target
1016 ///
1017 /// \param TargetTriple Target triple
1018 /// \param Features StringMap which describes extra CPU features
1019 LLVM_ABI static unsigned
1020 getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
1021 const StringMap<bool> &Features);
1022
1023 /// Retrieve (or create if non-existent) the address of a declare
1024 /// target variable, used in conjunction with registerTargetGlobalVariable
1025 /// to create declare target global variables.
1026 ///
1027 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
1028 /// clause used in conjunction with the variable being registered (link,
1029 /// to, enter).
1030 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
1031 /// clause used in conjunction with the variable being registered (nohost,
1032 /// host, any)
1033 /// \param IsDeclaration - boolean stating if the variable being registered
1034 /// is a declaration-only and not a definition
1035 /// \param IsExternallyVisible - boolean stating if the variable is externally
1036 /// visible
1037 /// \param EntryInfo - Unique entry information for the value generated
1038 /// using getTargetEntryUniqueInfo, used to name generated pointer references
1039 /// to the declare target variable
1040 /// \param MangledName - the mangled name of the variable being registered
1041 /// \param GeneratedRefs - references generated by invocations of
1042 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
1043 /// these are required by Clang for book keeping.
1044 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
1045 /// \param TargetTriple - The OpenMP device target triple we are compiling
1046 /// for
1047 /// \param LlvmPtrTy - The type of the variable we are generating or
1048 /// retrieving an address for
1049 /// \param GlobalInitializer - a lambda function which creates a constant
1050 /// used for initializing a pointer reference to the variable in certain
1051 /// cases. If a nullptr is passed, it will default to utilising the original
1052 /// variable to initialize the pointer reference.
1053 /// \param VariableLinkage - a lambda function which returns the variables
1054 /// linkage type, if unspecified and a nullptr is given, it will instead
1055 /// utilise the linkage stored on the existing global variable in the
1056 /// LLVMModule.
1060 bool IsDeclaration, bool IsExternallyVisible,
1061 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
1062 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
1063 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
1064 std::function<Constant *()> GlobalInitializer,
1065 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
1066
1067 /// Registers a target variable for device or host.
1068 ///
1069 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
1070 /// clause used in conjunction with the variable being registered (link,
1071 /// to, enter).
1072 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
1073 /// clause used in conjunction with the variable being registered (nohost,
1074 /// host, any)
1075 /// \param IsDeclaration - boolean stating if the variable being registered
1076 /// is a declaration-only and not a definition
1077 /// \param IsExternallyVisible - boolean stating if the variable is externally
1078 /// visible
1079 /// \param EntryInfo - Unique entry information for the value generated
1080 /// using getTargetEntryUniqueInfo, used to name generated pointer references
1081 /// to the declare target variable
1082 /// \param MangledName - the mangled name of the variable being registered
1083 /// \param GeneratedRefs - references generated by invocations of
1084 /// registerTargetGlobalVariable these are required by Clang for book
1085 /// keeping.
1086 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
1087 /// \param TargetTriple - The OpenMP device target triple we are compiling
1088 /// for
1089 /// \param GlobalInitializer - a lambda function which creates a constant
1090 /// used for initializing a pointer reference to the variable in certain
1091 /// cases. If a nullptr is passed, it will default to utilising the original
1092 /// variable to initialize the pointer reference.
1093 /// \param VariableLinkage - a lambda function which returns the variables
1094 /// linkage type, if unspecified and a nullptr is given, it will instead
1095 /// utilise the linkage stored on the existing global variable in the
1096 /// LLVMModule.
1097 /// \param LlvmPtrTy - The type of the variable we are generating or
1098 /// retrieving an address for
1099 /// \param Addr - the original llvm value (addr) of the variable to be
1100 /// registered
1104 bool IsDeclaration, bool IsExternallyVisible,
1105 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
1106 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
1107 std::vector<Triple> TargetTriple,
1108 std::function<Constant *()> GlobalInitializer,
1109 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
1110 Type *LlvmPtrTy, Constant *Addr);
1111
1112 /// Get the offset of the OMP_MAP_MEMBER_OF field.
1113 LLVM_ABI unsigned getFlagMemberOffset();
1114
1115 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
1116 /// the position given.
1117 /// \param Position - A value indicating the position of the parent
1118 /// of the member in the kernel argument structure, often retrieved
1119 /// by the parents position in the combined information vectors used
1120 /// to generate the structure itself. Multiple children (member's of)
1121 /// with the same parent will use the same returned member flag.
1123
1124 /// Given an initial flag set, this function modifies it to contain
1125 /// the passed in MemberOfFlag generated from the getMemberOfFlag
1126 /// function. The results are dependent on the existing flag bits
1127 /// set in the original flag set.
1128 /// \param Flags - The original set of flags to be modified with the
1129 /// passed in MemberOfFlag.
1130 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
1131 /// slightly based on the getMemberOfFlag which adjusts the flag bits
1132 /// based on the members position in its parent.
1133 LLVM_ABI void
1135 omp::OpenMPOffloadMappingFlags MemberOfFlag);
1136
1137private:
1138 /// Modifies the canonical loop to be a statically-scheduled workshare loop
1139 /// which is executed on the device
1140 ///
1141 /// This takes a \p CLI representing a canonical loop, such as the one
1142 /// created by \see createCanonicalLoop and emits additional instructions to
1143 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1144 /// runtime function in the preheader to call OpenMP device rtl function
1145 /// which handles worksharing of loop body interations.
1146 ///
1147 /// \param DL Debug location for instructions added for the
1148 /// workshare-loop construct itself.
1149 /// \param CLI A descriptor of the canonical loop to workshare.
1150 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1151 /// preheader of the loop.
1152 /// \param LoopType Information about type of loop worksharing.
1153 /// It corresponds to type of loop workshare OpenMP pragma.
1154 /// \param NoLoop If true, no-loop code is generated.
1155 ///
1156 /// \returns Point where to insert code after the workshare construct.
1157 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
1158 InsertPointTy AllocaIP,
1159 omp::WorksharingLoopType LoopType,
1160 bool NoLoop);
1161
1162 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
1163 ///
1164 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1165 /// created by \p createCanonicalLoop and emits additional instructions to
1166 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1167 /// runtime function in the preheader to obtain the loop bounds to be used in
1168 /// the current thread, updates the relevant instructions in the canonical
1169 /// loop and calls to an OpenMP runtime finalization function after the loop.
1170 ///
1171 /// \param DL Debug location for instructions added for the
1172 /// workshare-loop construct itself.
1173 /// \param CLI A descriptor of the canonical loop to workshare.
1174 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1175 /// preheader of the loop.
1176 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
1177 /// the loop.
1178 /// \param LoopType Type of workshare loop.
1179 /// \param HasDistSchedule Defines if the clause being lowered is
1180 /// dist_schedule as this is handled slightly differently
1181 /// \param DistScheduleSchedType Defines the Schedule Type for the Distribute
1182 /// loop. Defaults to None if no Distribute loop is present.
1183 ///
1184 /// \returns Point where to insert code after the workshare construct.
1185 InsertPointOrErrorTy applyStaticWorkshareLoop(
1187 omp::WorksharingLoopType LoopType, bool NeedsBarrier,
1188 bool HasDistSchedule = false,
1189 omp::OMPScheduleType DistScheduleSchedType = omp::OMPScheduleType::None);
1190
1191 /// Modifies the canonical loop a statically-scheduled workshare loop with a
1192 /// user-specified chunk size.
1193 ///
1194 /// \param DL Debug location for instructions added for the
1195 /// workshare-loop construct itself.
1196 /// \param CLI A descriptor of the canonical loop to workshare.
1197 /// \param AllocaIP An insertion point for Alloca instructions usable in
1198 /// the preheader of the loop.
1199 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
1200 /// loop.
1201 /// \param ChunkSize The user-specified chunk size.
1202 /// \param SchedType Optional type of scheduling to be passed to the init
1203 /// function.
1204 /// \param DistScheduleChunkSize The size of dist_shcedule chunk considered
1205 /// as a unit when
1206 /// scheduling. If \p nullptr, defaults to 1.
1207 /// \param DistScheduleSchedType Defines the Schedule Type for the Distribute
1208 /// loop. Defaults to None if no Distribute loop is present.
1209 ///
1210 /// \returns Point where to insert code after the workshare construct.
1211 InsertPointOrErrorTy applyStaticChunkedWorkshareLoop(
1213 bool NeedsBarrier, Value *ChunkSize,
1214 omp::OMPScheduleType SchedType =
1216 Value *DistScheduleChunkSize = nullptr,
1217 omp::OMPScheduleType DistScheduleSchedType = omp::OMPScheduleType::None);
1218
1219 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
1220 ///
1221 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1222 /// created by \p createCanonicalLoop and emits additional instructions to
1223 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1224 /// runtime function in the preheader to obtain, and then in each iteration
1225 /// to update the loop counter.
1226 ///
1227 /// \param DL Debug location for instructions added for the
1228 /// workshare-loop construct itself.
1229 /// \param CLI A descriptor of the canonical loop to workshare.
1230 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1231 /// preheader of the loop.
1232 /// \param SchedType Type of scheduling to be passed to the init function.
1233 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1234 /// the loop.
1235 /// \param Chunk The size of loop chunk considered as a unit when
1236 /// scheduling. If \p nullptr, defaults to 1.
1237 ///
1238 /// \returns Point where to insert code after the workshare construct.
1239 InsertPointOrErrorTy applyDynamicWorkshareLoop(DebugLoc DL,
1240 CanonicalLoopInfo *CLI,
1241 InsertPointTy AllocaIP,
1242 omp::OMPScheduleType SchedType,
1243 bool NeedsBarrier,
1244 Value *Chunk = nullptr);
1245
1246 /// Create alternative version of the loop to support if clause
1247 ///
1248 /// OpenMP if clause can require to generate second loop. This loop
1249 /// will be executed when if clause condition is not met. createIfVersion
1250 /// adds branch instruction to the copied loop if \p ifCond is not met.
1251 ///
1252 /// \param Loop Original loop which should be versioned.
1253 /// \param IfCond Value which corresponds to if clause condition
1254 /// \param VMap Value to value map to define relation between
1255 /// original and copied loop values and loop blocks.
1256 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1257 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1259 LoopAnalysis &LIA, LoopInfo &LI, llvm::Loop *L,
1260 const Twine &NamePrefix = "");
1261
1262 /// Creates a task duplication function to be passed to kmpc_taskloop.
1263 ///
1264 /// The OpenMP runtime defines this function as taking the destination
1265 /// kmp_task_t, source kmp_task_t, and a lastprivate flag. This function is
1266 /// called on the source and destination tasks after the source task has been
1267 /// duplicated to create the destination task. At this point the destination
1268 /// task has been otherwise set up from the runtime's perspective, but this
1269 /// function is needed to fix up any data for the duplicated task e.g. private
1270 /// variables with non-trivial constructors.
1271 ///
1272 /// \param PrivatesTy The type of the privates structure for the task.
1273 /// \param PrivatesIndex The index inside the privates structure containing
1274 /// the data for the callback.
1275 /// \param DupCB The callback to generate the duplication code. See
1276 /// documentation for \ref TaskDupCallbackTy. This can be
1277 /// nullptr.
1278 Expected<Value *> createTaskDuplicationFunction(Type *PrivatesTy,
1279 int32_t PrivatesIndex,
1280 TaskDupCallbackTy DupCB);
1281
1282public:
1283 /// Modifies the canonical loop to be a workshare loop.
1284 ///
1285 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1286 /// created by \p createCanonicalLoop and emits additional instructions to
1287 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1288 /// runtime function in the preheader to obtain the loop bounds to be used in
1289 /// the current thread, updates the relevant instructions in the canonical
1290 /// loop and calls to an OpenMP runtime finalization function after the loop.
1291 ///
1292 /// The concrete transformation is done by applyStaticWorkshareLoop,
1293 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1294 /// on the value of \p SchedKind and \p ChunkSize.
1295 ///
1296 /// \param DL Debug location for instructions added for the
1297 /// workshare-loop construct itself.
1298 /// \param CLI A descriptor of the canonical loop to workshare.
1299 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1300 /// preheader of the loop.
1301 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1302 /// the loop.
1303 /// \param SchedKind Scheduling algorithm to use.
1304 /// \param ChunkSize The chunk size for the inner loop.
1305 /// \param HasSimdModifier Whether the simd modifier is present in the
1306 /// schedule clause.
1307 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1308 /// the schedule clause.
1309 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1310 /// present in the schedule clause.
1311 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1312 /// present.
1313 /// \param LoopType Information about type of loop worksharing.
1314 /// It corresponds to type of loop workshare OpenMP pragma.
1315 /// \param NoLoop If true, no-loop code is generated.
1316 /// \param HasDistSchedule Defines if the clause being lowered is
1317 /// dist_schedule as this is handled slightly differently
1318 ///
1319 /// \param DistScheduleChunkSize The chunk size for dist_schedule loop
1320 ///
1321 /// \returns Point where to insert code after the workshare construct.
1324 bool NeedsBarrier,
1325 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1326 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1327 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1328 bool HasOrderedClause = false,
1329 omp::WorksharingLoopType LoopType =
1331 bool NoLoop = false, bool HasDistSchedule = false,
1332 Value *DistScheduleChunkSize = nullptr);
1333
1334 /// Tile a loop nest.
1335 ///
1336 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1337 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1338 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1339 /// of every loop and every tile sizes must be usable in the outermost
1340 /// loop's preheader. This implies that the loop nest is rectangular.
1341 ///
1342 /// Example:
1343 /// \code
1344 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1345 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1346 /// body(i, j);
1347 /// \endcode
1348 ///
1349 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1350 /// \code
1351 /// for (int i1 = 0; i1 < 3; ++i1)
1352 /// for (int j1 = 0; j1 < 2; ++j1)
1353 /// for (int i2 = 0; i2 < 5; ++i2)
1354 /// for (int j2 = 0; j2 < 7; ++j2)
1355 /// body(i1*3+i2, j1*3+j2);
1356 /// \endcode
1357 ///
1358 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1359 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1360 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1361 /// that are not multiples of the tile size. In the latter case the tile loop
1362 /// of the last floor-loop iteration will have fewer iterations than specified
1363 /// as its tile size.
1364 ///
1365 ///
1366 /// @param DL Debug location for instructions added by tiling, for
1367 /// instance the floor- and tile trip count computation.
1368 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1369 /// invalidated by this method, i.e. should not used after
1370 /// tiling.
1371 /// @param TileSizes For each loop in \p Loops, the tile size for that
1372 /// dimensions.
1373 ///
1374 /// \returns A list of generated loops. Contains twice as many loops as the
1375 /// input loop nest; the first half are the floor loops and the
1376 /// second half are the tile loops.
1377 LLVM_ABI std::vector<CanonicalLoopInfo *>
1379 ArrayRef<Value *> TileSizes);
1380
1381 /// Fuse a sequence of loops.
1382 ///
1383 /// Fuses the loops of \p Loops.
1384 /// The merging of the loops is done in the following structure:
1385 ///
1386 /// Example:
1387 /// \code
1388 /// for (int i = lb0; i < ub0; i += st0) // trip count is calculated as:
1389 /// body(i) // tc0 = (ub0 - lb0 + st0) / st0
1390 /// for (int j = lb1; j < ub1; j += st1)
1391 /// body(j);
1392 ///
1393 /// ...
1394 ///
1395 /// for (int k = lbk; j < ubk; j += stk)
1396 /// body(k);
1397 /// \endcode
1398 ///
1399 /// After fusing the loops a single loop is left:
1400 /// \code
1401 /// for (fuse.index = 0; fuse.index < max(tc0, tc1, ... tck); ++fuse.index) {
1402 /// if (fuse.index < tc0){
1403 /// iv0 = lb0 + st0 * fuse.index;
1404 /// original.index0 = iv0
1405 /// body(0);
1406 /// }
1407 /// if (fuse.index < tc1){
1408 /// iv1 = lb1 + st1 * fuse.index;
1409 /// original.index1 = iv1
1410 /// body(1);
1411 /// }
1412 ///
1413 /// ...
1414 ///
1415 /// if (fuse.index < tck){
1416 /// ivk = lbk + stk * fuse.index;
1417 /// original.indexk = ivk
1418 /// body(k);
1419 /// }
1420 /// }
1421 /// \endcode
1422 ///
1423 ///
1424 /// @param DL Debug location for instructions added by fusion.
1425 ///
1426 /// @param Loops Loops to fuse. The CanonicalLoopInfo objects are
1427 /// invalidated by this method, i.e. should not used after
1428 /// fusion.
1429 ///
1430 /// \returns A single loop generated by the loop fusion
1433
1434 /// Fully unroll a loop.
1435 ///
1436 /// Instead of unrolling the loop immediately (and duplicating its body
1437 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1438 /// metadata.
1439 ///
1440 /// \param DL Debug location for instructions added by unrolling.
1441 /// \param Loop The loop to unroll. The loop will be invalidated.
1443
1444 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1445 /// using LLVM's LoopUnrollPass.
1446 ///
1447 /// \param DL Debug location for instructions added by unrolling.
1448 /// \param Loop The loop to unroll. The loop will be invalidated.
1450
1451 /// Partially unroll a loop.
1452 ///
1453 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1454 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1455 /// needing the CanonicalLoopInfo allows more efficient code generation by
1456 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1457 /// A loop-associated directive applied to the unrolled loop needs to know the
1458 /// new trip count which means that if using a heuristically determined unroll
1459 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1460 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1461 /// but which assumes that some canonicalization has taken place (e.g.
1462 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1463 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1464 ///
1465 /// \param DL Debug location for instructions added by unrolling.
1466 /// \param Loop The loop to unroll. The loop will be invalidated.
1467 /// \param Factor The factor to unroll the loop by. A factor of 0
1468 /// indicates that a heuristic should be used to determine
1469 /// the unroll-factor.
1470 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1471 /// partially unrolled loop. Otherwise, uses loop metadata
1472 /// to defer unrolling to the LoopUnrollPass.
1474 int32_t Factor,
1475 CanonicalLoopInfo **UnrolledCLI);
1476
1477 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1478 /// is cloned. The metadata which prevents vectorization is added to
1479 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1480 /// to false.
1481 ///
1482 /// \param Loop The loop to simd-ize.
1483 /// \param AlignedVars The map which containts pairs of the pointer
1484 /// and its corresponding alignment.
1485 /// \param IfCond The value which corresponds to the if clause
1486 /// condition.
1487 /// \param Order The enum to map order clause.
1488 /// \param Simdlen The Simdlen length to apply to the simd loop.
1489 /// \param Safelen The Safelen length to apply to the simd loop.
1491 MapVector<Value *, Value *> AlignedVars,
1492 Value *IfCond, omp::OrderKind Order,
1493 ConstantInt *Simdlen, ConstantInt *Safelen);
1494
1495 /// Generator for '#omp flush'
1496 ///
1497 /// \param Loc The location where the flush directive was encountered
1498 LLVM_ABI void createFlush(const LocationDescription &Loc);
1499
1500 /// Generator for '#omp taskwait'
1501 ///
1502 /// \param Loc The location where the taskwait directive was encountered.
1503 LLVM_ABI void createTaskwait(const LocationDescription &Loc);
1504
1505 /// Generator for '#omp taskyield'
1506 ///
1507 /// \param Loc The location where the taskyield directive was encountered.
1508 LLVM_ABI void createTaskyield(const LocationDescription &Loc);
1509
1510 /// A struct to pack the relevant information for an OpenMP depend clause.
1520
1521 /// A struct to pack static and dynamic dependency information for a task.
1522 ///
1523 /// For fixed-count (non-iterator) dependencies, callers populate \p Deps
1524 /// and the builder allocates and fills the kmp_depend_info array internally.
1525 /// For iterator-based dependencies, the caller pre-builds the array and
1526 /// sets \p NumDeps and \p DepArray directly.
1528 SmallVector<DependData> Deps; // vector of dependencies
1529 Value *NumDeps; // number of kmp_depend_info entries (used by iterator path)
1530 Value *DepArray; // kmp_depend_info array (used by iterator path)
1531
1532 DependenciesInfo() : Deps(), NumDeps(nullptr), DepArray(nullptr) {}
1535
1536 bool empty() const { return Deps.empty() && DepArray == nullptr; }
1537 };
1538
1539 /// Store one kmp_depend_info entry at the given \p Entry pointer.
1540 LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry,
1541 const DependData &Dep);
1542
1543 /// Return the LLVM struct type matching runtime `kmp_task_affinity_info_t`.
1544 /// `{ kmp_intptr_t base_addr; size_t len; flags (bitfield storage as i32) }`
1546
1547 /// A struct to pack the relevant information for an OpenMP affinity clause.
1549 Value *Count; // number of kmp_task_affinity_info_t entries
1550 Value *Info; // kmp_task_affinity_info_t
1551 };
1552
1553 /// Generator for `#omp taskloop`
1554 ///
1555 /// \param Loc The location where the taskloop construct was encountered.
1556 /// \param AllocaIP The insertion point to be used for alloca instructions.
1557 /// \param DeallocBlocks The list of insertion blocks where explicit
1558 /// deallocations, if needed, should be placed.
1559 /// \param BodyGenCB Callback that will generate the region code.
1560 /// \param LoopInfo Callback that return the CLI
1561 /// \param LBVal Lowerbound value of loop
1562 /// \param UBVal Upperbound value of loop
1563 /// \param StepVal Step value of loop
1564 /// \param Untied True if the task is untied, false if the task is tied.
1565 /// \param IfCond i1 value. If it evaluates to `false`, an undeferred
1566 /// task is generated, and the encountering thread must
1567 /// suspend the current task region, for which execution
1568 /// cannot be resumed until execution of the structured
1569 /// block that is associated with the generated task is
1570 /// completed.
1571 /// \param GrainSize Value of the GrainSize/Num of Tasks if present
1572 /// \param NoGroup False if NoGroup is defined, true if not
1573 /// \param Sched If Grainsize is defined, Sched is 1. Num Tasks, Shed is 2.
1574 /// Otherwise Sched is 0
1575 /// \param Final i1 value which is `true` if the task is final, `false` if the
1576 /// task is not final.
1577 /// \param Mergeable If the given task is `mergeable`
1578 /// \param Priority `priority-value' specifies the execution order of the
1579 /// tasks that is generated by the construct
1580 /// \param NumOfCollapseLoops Defines the number of loops that are being
1581 /// collapsed. The default value is 1, as thats the value when collapse is not
1582 /// used.
1583 /// \param DupCB The callback to generate the duplication code. See
1584 /// documentation for \ref TaskDupCallbackTy. This can be nullptr.
1585 /// \param TaskContextStructPtrVal If non-null, a pointer to to be placed
1586 /// immediately after the {lower bound, upper
1587 /// bound, step} values in the task data.
1588 LLVM_ABI InsertPointOrErrorTy createTaskloop(
1589 const LocationDescription &Loc, InsertPointTy AllocaIP,
1590 ArrayRef<BasicBlock *> DeallocBlocks, BodyGenCallbackTy BodyGenCB,
1592 Value *LBVal, Value *UBVal, Value *StepVal, bool Untied = false,
1593 Value *IfCond = nullptr, Value *GrainSize = nullptr, bool NoGroup = false,
1594 int Sched = 0, Value *Final = nullptr, bool Mergeable = false,
1595 Value *Priority = nullptr, uint64_t NumOfCollapseLoops = 1,
1596 TaskDupCallbackTy DupCB = nullptr,
1597 Value *TaskContextStructPtrVal = nullptr);
1598
1599 /// Generator for `#omp task`
1600 ///
1601 /// \param Loc The location where the task construct was encountered.
1602 /// \param AllocaIP The insertion point to be used for allocations.
1603 /// \param DeallocBlocks The insertion blocks to be used for explicit
1604 /// deallocations, if needed.
1605 /// \param BodyGenCB Callback that will generate the region code.
1606 /// \param Tied True if the task is tied, false if the task is untied.
1607 /// \param Final i1 value which is `true` if the task is final, `false` if the
1608 /// task is not final.
1609 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1610 /// task is generated, and the encountering thread must
1611 /// suspend the current task region, for which execution
1612 /// cannot be resumed until execution of the structured
1613 /// block that is associated with the generated task is
1614 /// completed.
1615 /// \param Dependencies Dependencies info holding either a vector of
1616 /// DependData objects or a pre-built dependency array.
1617 /// \param Affinities AffinityData object holding information of accumulated
1618 /// affinities as specified by the 'affinity' clause.
1619 /// \param EventHandle If present, signifies the event handle as part of
1620 /// the detach clause
1621 /// \param Mergeable If the given task is `mergeable`
1622 /// \param priority `priority-value' specifies the execution order of the
1623 /// tasks that is generated by the construct
1625 const LocationDescription &Loc, InsertPointTy AllocaIP,
1626 ArrayRef<BasicBlock *> DeallocBlocks, BodyGenCallbackTy BodyGenCB,
1627 bool Tied = true, Value *Final = nullptr, Value *IfCondition = nullptr,
1628 const DependenciesInfo &Dependencies = {},
1629 const AffinityData &Affinities = {}, bool Mergeable = false,
1630 Value *EventHandle = nullptr, Value *Priority = nullptr);
1631
1632 /// Generator for the taskgroup construct
1633 ///
1634 /// \param Loc The location where the taskgroup construct was encountered.
1635 /// \param AllocaIP The insertion point to be used for allocations.
1636 /// \param DeallocBlocks The insertion blocks to be used for explicit
1637 /// deallocation instructions, if needed.
1638 /// \param BodyGenCB Callback that will generate the region code.
1640 const LocationDescription &Loc, InsertPointTy AllocaIP,
1641 ArrayRef<BasicBlock *> DeallocBlocks, BodyGenCallbackTy BodyGenCB);
1642
1644 std::function<std::tuple<std::string, uint64_t>()>;
1645
1646 /// Creates a unique info for a target entry when provided a filename and
1647 /// line number from.
1648 ///
1649 /// \param CallBack A callback function which should return filename the entry
1650 /// resides in as well as the line number for the target entry
1651 /// \param ParentName The name of the parent the target entry resides in, if
1652 /// any.
1655 vfs::FileSystem &VFS, StringRef ParentName = "");
1656
1657 /// Enum class for the RedctionGen CallBack type to be used.
1659
1660 /// ReductionGen CallBack for Clang
1661 ///
1662 /// \param CodeGenIP InsertPoint for CodeGen.
1663 /// \param Index Index of the ReductionInfo to generate code for.
1664 /// \param LHSPtr Optionally used by Clang to return the LHSPtr it used for
1665 /// codegen, used for fixup later.
1666 /// \param RHSPtr Optionally used by Clang to
1667 /// return the RHSPtr it used for codegen, used for fixup later.
1668 /// \param CurFn Optionally used by Clang to pass in the Current Function as
1669 /// Clang context may be old.
1671 std::function<InsertPointTy(InsertPointTy CodeGenIP, unsigned Index,
1672 Value **LHS, Value **RHS, Function *CurFn)>;
1673
1674 /// ReductionGen CallBack for MLIR
1675 ///
1676 /// \param CodeGenIP InsertPoint for CodeGen.
1677 /// \param LHS Pass in the LHS Value to be used for CodeGen.
1678 /// \param RHS Pass in the RHS Value to be used for CodeGen.
1680 InsertPointTy CodeGenIP, Value *LHS, Value *RHS, Value *&Res)>;
1681
1682 /// Functions used to generate atomic reductions. Such functions take two
1683 /// Values representing pointers to LHS and RHS of the reduction, as well as
1684 /// the element type of these pointers. They are expected to atomically
1685 /// update the LHS to the reduced value.
1687 InsertPointTy, Type *, Value *, Value *)>;
1688
1690 InsertPointTy, Value *ByRefVal, Value *&Res)>;
1691
1692 /// Enum class for reduction evaluation types scalar, complex and aggregate.
1694
1695 /// Information about an OpenMP reduction.
1710
1716
1717 /// Reduction element type, must match pointee type of variable. For by-ref
1718 /// reductions, this would be just an opaque `ptr`.
1720
1721 /// Reduction variable of pointer type.
1723
1724 /// Thread-private partial reduction variable.
1726
1727 /// Reduction evaluation kind - scalar, complex or aggregate.
1729
1730 /// Callback for generating the reduction body. The IR produced by this will
1731 /// be used to combine two values in a thread-safe context, e.g., under
1732 /// lock or within the same thread, and therefore need not be atomic.
1734
1735 /// Clang callback for generating the reduction body. The IR produced by
1736 /// this will be used to combine two values in a thread-safe context, e.g.,
1737 /// under lock or within the same thread, and therefore need not be atomic.
1739
1740 /// Callback for generating the atomic reduction body, may be null. The IR
1741 /// produced by this will be used to atomically combine two values during
1742 /// reduction. If null, the implementation will use the non-atomic version
1743 /// along with the appropriate synchronization mechanisms.
1745
1747
1748 /// For by-ref reductions, we need to keep track of 2 extra types that are
1749 /// potentially different:
1750 /// * The allocated type is the type of the storage allocated by the
1751 /// reduction op's `alloc` region. For example, for allocatables and arrays,
1752 /// this type would be the descriptor/box struct.
1754
1755 /// * The by-ref element type is the type of the actual storage needed for
1756 /// the data of the allocatable or array. For example, an float allocatable
1757 /// of would need some float storage to store intermediate reduction
1758 /// results.
1760 };
1761
1762 enum class CopyAction : unsigned {
1763 // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
1764 // the warp using shuffle instructions.
1766 // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
1768 };
1769
1775
1776 /// Supporting functions for Reductions CodeGen.
1777private:
1778 /// Get the id of the current thread on the GPU.
1779 Value *getGPUThreadID();
1780
1781 /// Get the GPU warp size.
1782 Value *getGPUWarpSize();
1783
1784 /// Get the id of the warp in the block.
1785 /// We assume that the warp size is 32, which is always the case
1786 /// on the NVPTX device, to generate more efficient code.
1787 Value *getNVPTXWarpID();
1788
1789 /// Get the id of the current lane in the Warp.
1790 /// We assume that the warp size is 32, which is always the case
1791 /// on the NVPTX device, to generate more efficient code.
1792 Value *getNVPTXLaneID();
1793
1794 /// Cast value to the specified type.
1795 Value *castValueToType(InsertPointTy AllocaIP, Value *From, Type *ToType);
1796
1797 /// This function creates calls to one of two shuffle functions to copy
1798 /// variables between lanes in a warp.
1799 Value *createRuntimeShuffleFunction(InsertPointTy AllocaIP, Value *Element,
1800 Type *ElementType, Value *Offset);
1801
1802 /// Function to shuffle over the value from the remote lane.
1803 void shuffleAndStore(InsertPointTy AllocaIP, Value *SrcAddr, Value *DstAddr,
1804 Type *ElementType, Value *Offset, Type *ReductionArrayTy,
1805 bool IsByRefElem);
1806
1807 /// Emit instructions to copy a Reduce list, which contains partially
1808 /// aggregated values, in the specified direction.
1809 Error emitReductionListCopy(
1810 InsertPointTy AllocaIP, CopyAction Action, Type *ReductionArrayTy,
1811 ArrayRef<ReductionInfo> ReductionInfos, Value *SrcBase, Value *DestBase,
1812 ArrayRef<bool> IsByRef,
1813 CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr});
1814
1815 /// Emit a helper that reduces data across two OpenMP threads (lanes)
1816 /// in the same warp. It uses shuffle instructions to copy over data from
1817 /// a remote lane's stack. The reduction algorithm performed is specified
1818 /// by the fourth parameter.
1819 ///
1820 /// Algorithm Versions.
1821 /// Full Warp Reduce (argument value 0):
1822 /// This algorithm assumes that all 32 lanes are active and gathers
1823 /// data from these 32 lanes, producing a single resultant value.
1824 /// Contiguous Partial Warp Reduce (argument value 1):
1825 /// This algorithm assumes that only a *contiguous* subset of lanes
1826 /// are active. This happens for the last warp in a parallel region
1827 /// when the user specified num_threads is not an integer multiple of
1828 /// 32. This contiguous subset always starts with the zeroth lane.
1829 /// Partial Warp Reduce (argument value 2):
1830 /// This algorithm gathers data from any number of lanes at any position.
1831 /// All reduced values are stored in the lowest possible lane. The set
1832 /// of problems every algorithm addresses is a super set of those
1833 /// addressable by algorithms with a lower version number. Overhead
1834 /// increases as algorithm version increases.
1835 ///
1836 /// Terminology
1837 /// Reduce element:
1838 /// Reduce element refers to the individual data field with primitive
1839 /// data types to be combined and reduced across threads.
1840 /// Reduce list:
1841 /// Reduce list refers to a collection of local, thread-private
1842 /// reduce elements.
1843 /// Remote Reduce list:
1844 /// Remote Reduce list refers to a collection of remote (relative to
1845 /// the current thread) reduce elements.
1846 ///
1847 /// We distinguish between three states of threads that are important to
1848 /// the implementation of this function.
1849 /// Alive threads:
1850 /// Threads in a warp executing the SIMT instruction, as distinguished from
1851 /// threads that are inactive due to divergent control flow.
1852 /// Active threads:
1853 /// The minimal set of threads that has to be alive upon entry to this
1854 /// function. The computation is correct iff active threads are alive.
1855 /// Some threads are alive but they are not active because they do not
1856 /// contribute to the computation in any useful manner. Turning them off
1857 /// may introduce control flow overheads without any tangible benefits.
1858 /// Effective threads:
1859 /// In order to comply with the argument requirements of the shuffle
1860 /// function, we must keep all lanes holding data alive. But at most
1861 /// half of them perform value aggregation; we refer to this half of
1862 /// threads as effective. The other half is simply handing off their
1863 /// data.
1864 ///
1865 /// Procedure
1866 /// Value shuffle:
1867 /// In this step active threads transfer data from higher lane positions
1868 /// in the warp to lower lane positions, creating Remote Reduce list.
1869 /// Value aggregation:
1870 /// In this step, effective threads combine their thread local Reduce list
1871 /// with Remote Reduce list and store the result in the thread local
1872 /// Reduce list.
1873 /// Value copy:
1874 /// In this step, we deal with the assumption made by algorithm 2
1875 /// (i.e. contiguity assumption). When we have an odd number of lanes
1876 /// active, say 2k+1, only k threads will be effective and therefore k
1877 /// new values will be produced. However, the Reduce list owned by the
1878 /// (2k+1)th thread is ignored in the value aggregation. Therefore
1879 /// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
1880 /// that the contiguity assumption still holds.
1881 ///
1882 /// \param ReductionInfos Array type containing the ReductionOps.
1883 /// \param ReduceFn The reduction function.
1884 /// \param FuncAttrs Optional param to specify any function attributes that
1885 /// need to be copied to the new function.
1886 /// \param IsByRef For each reduction clause, whether the reduction is by-ref
1887 /// or not.
1888 ///
1889 /// \return The ShuffleAndReduce function.
1890 Expected<Function *> emitShuffleAndReduceFunction(
1892 Function *ReduceFn, AttributeList FuncAttrs, ArrayRef<bool> IsByRef);
1893
1894 /// Helper function for CreateCanonicalScanLoops to create InputLoop
1895 /// in the firstGen and Scan Loop in the SecondGen
1896 /// \param InputLoopGen Callback for generating the loop for input phase
1897 /// \param ScanLoopGen Callback for generating the loop for scan phase
1898 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1899 /// `ScanInfoInitialize`.
1900 ///
1901 /// \return error if any produced, else return success.
1902 Error emitScanBasedDirectiveIR(
1903 llvm::function_ref<Error()> InputLoopGen,
1904 llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen,
1905 ScanInfo *ScanRedInfo);
1906
1907 /// Creates the basic blocks required for scan reduction.
1908 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1909 /// `ScanInfoInitialize`.
1910 void createScanBBs(ScanInfo *ScanRedInfo);
1911
1912 /// Dynamically allocates the buffer needed for scan reduction.
1913 /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to
1914 /// be declared.
1915 /// \param ScanVars Scan Variables.
1916 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1917 /// `ScanInfoInitialize`.
1918 ///
1919 /// \return error if any produced, else return success.
1920 Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
1921 ArrayRef<llvm::Value *> ScanVars,
1922 ArrayRef<llvm::Type *> ScanVarsType,
1923 ScanInfo *ScanRedInfo);
1924
1925 /// Copies the result back to the reduction variable.
1926 /// \param ReductionInfos Array type containing the ReductionOps.
1927 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1928 /// `ScanInfoInitialize`.
1929 ///
1930 /// \return error if any produced, else return success.
1931 Error emitScanBasedDirectiveFinalsIR(
1934
1935 /// This function emits a helper that gathers Reduce lists from the first
1936 /// lane of every active warp to lanes in the first warp.
1937 ///
1938 /// void inter_warp_copy_func(void* reduce_data, num_warps)
1939 /// shared smem[warp_size];
1940 /// For all data entries D in reduce_data:
1941 /// sync
1942 /// If (I am the first lane in each warp)
1943 /// Copy my local D to smem[warp_id]
1944 /// sync
1945 /// if (I am the first warp)
1946 /// Copy smem[thread_id] to my local D
1947 ///
1948 /// \param Loc The insert and source location description.
1949 /// \param ReductionInfos Array type containing the ReductionOps.
1950 /// \param FuncAttrs Optional param to specify any function attributes that
1951 /// need to be copied to the new function.
1952 /// \param IsByRef For each reduction clause, whether the reduction is by-ref
1953 /// or not.
1954 ///
1955 /// \return The InterWarpCopy function.
1957 emitInterWarpCopyFunction(const LocationDescription &Loc,
1958 ArrayRef<ReductionInfo> ReductionInfos,
1959 AttributeList FuncAttrs, ArrayRef<bool> IsByRef);
1960
1961 /// This function emits a helper that copies all the reduction variables from
1962 /// the team into the provided global buffer for the reduction variables.
1963 ///
1964 /// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
1965 /// For all data entries D in reduce_data:
1966 /// Copy local D to buffer.D[Idx]
1967 ///
1968 /// \param ReductionInfos Array type containing the ReductionOps.
1969 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
1970 /// \param FuncAttrs Optional param to specify any function attributes that
1971 /// need to be copied to the new function.
1972 ///
1973 /// \return The ListToGlobalCopy function.
1975 emitListToGlobalCopyFunction(ArrayRef<ReductionInfo> ReductionInfos,
1976 Type *ReductionsBufferTy,
1977 AttributeList FuncAttrs, ArrayRef<bool> IsByRef);
1978
1979 /// This function emits a helper that copies all the reduction variables from
1980 /// the team into the provided global buffer for the reduction variables.
1981 ///
1982 /// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
1983 /// For all data entries D in reduce_data:
1984 /// Copy buffer.D[Idx] to local D;
1985 ///
1986 /// \param ReductionInfos Array type containing the ReductionOps.
1987 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
1988 /// \param FuncAttrs Optional param to specify any function attributes that
1989 /// need to be copied to the new function.
1990 ///
1991 /// \return The GlobalToList function.
1993 emitGlobalToListCopyFunction(ArrayRef<ReductionInfo> ReductionInfos,
1994 Type *ReductionsBufferTy,
1995 AttributeList FuncAttrs, ArrayRef<bool> IsByRef);
1996
1997 /// This function emits a helper that reduces all the reduction variables from
1998 /// the team into the provided global buffer for the reduction variables.
1999 ///
2000 /// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
2001 /// void *GlobPtrs[];
2002 /// GlobPtrs[0] = (void*)&buffer.D0[Idx];
2003 /// ...
2004 /// GlobPtrs[N] = (void*)&buffer.DN[Idx];
2005 /// reduce_function(GlobPtrs, reduce_data);
2006 ///
2007 /// \param ReductionInfos Array type containing the ReductionOps.
2008 /// \param ReduceFn The reduction function.
2009 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
2010 /// \param FuncAttrs Optional param to specify any function attributes that
2011 /// need to be copied to the new function.
2012 ///
2013 /// \return The ListToGlobalReduce function.
2015 emitListToGlobalReduceFunction(ArrayRef<ReductionInfo> ReductionInfos,
2016 Function *ReduceFn, Type *ReductionsBufferTy,
2017 AttributeList FuncAttrs,
2018 ArrayRef<bool> IsByRef);
2019
2020 /// This function emits a helper that reduces all the reduction variables from
2021 /// the team into the provided global buffer for the reduction variables.
2022 ///
2023 /// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data)
2024 /// void *GlobPtrs[];
2025 /// GlobPtrs[0] = (void*)&buffer.D0[Idx];
2026 /// ...
2027 /// GlobPtrs[N] = (void*)&buffer.DN[Idx];
2028 /// reduce_function(reduce_data, GlobPtrs);
2029 ///
2030 /// \param ReductionInfos Array type containing the ReductionOps.
2031 /// \param ReduceFn The reduction function.
2032 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
2033 /// \param FuncAttrs Optional param to specify any function attributes that
2034 /// need to be copied to the new function.
2035 ///
2036 /// \return The GlobalToListReduce function.
2038 emitGlobalToListReduceFunction(ArrayRef<ReductionInfo> ReductionInfos,
2039 Function *ReduceFn, Type *ReductionsBufferTy,
2040 AttributeList FuncAttrs,
2041 ArrayRef<bool> IsByRef);
2042
2043 /// Get the function name of a reduction function.
2044 std::string getReductionFuncName(StringRef Name) const;
2045
2046 /// Generate a Fortran descriptor for array reductions
2047 ///
2048 /// \param DescriptorAddr Address of the descriptor to initialize
2049 /// \param DataPtr Pointer to the actual data the descriptor should reference
2050 /// \param SrcDescriptorAddr Address of the descriptor to copy metadata from
2051 /// \param DescriptorType Type of the descriptor structure
2052 /// \param DataPtrPtrGen Callback to get the base_ptr field in the descriptor
2053 ///
2054 /// \return Error if DataPtrPtrGen fails, otherwise success.
2055 InsertPointOrErrorTy generateReductionDescriptor(
2056 Value *DescriptorAddr, Value *DataPtr, Value *SrcDescriptorAddr,
2057 Type *DescriptorType,
2059 DataPtrPtrGen);
2060
2061 /// Allocate a by-ref reduction descriptor, copy \p SrcDescriptorAddr into it,
2062 /// and update its data pointer to reference \p DataPtr.
2063 ///
2064 /// \param AllocaIP Insertion point for the descriptor allocation.
2065 /// \param RI Reduction info containing descriptor type and access callback.
2066 /// \param DataPtr Pointer to the actual data the descriptor should reference.
2067 /// \param SrcDescriptorAddr Address of the descriptor to copy metadata from.
2068 /// \param DescriptorPtrTy Pointer type expected by the descriptor consumer.
2069 ///
2070 /// \return The new descriptor address, or an Error if descriptor generation
2071 /// fails.
2072 Expected<Value *> createReductionDescriptorCopy(
2073 InsertPointTy AllocaIP, const ReductionInfo &RI, Value *DataPtr,
2074 Value *SrcDescriptorAddr, Type *DescriptorPtrTy,
2075 const Twine &Name = ".omp.reduction.byref_descriptor");
2076
2077 /// Emits reduction function.
2078 /// \param ReducerName Name of the function calling the reduction.
2079 /// \param ReductionInfos Array type containing the ReductionOps.
2080 /// \param ReductionGenCBKind Optional param to specify Clang or MLIR
2081 /// CodeGenCB kind.
2082 /// \param FuncAttrs Optional param to specify any function attributes that
2083 /// need to be copied to the new function.
2084 ///
2085 /// \return The reduction function.
2086 Expected<Function *> createReductionFunction(
2087 StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
2088 ArrayRef<bool> IsByRef,
2090 AttributeList FuncAttrs = {});
2091
2092public:
2093 ///
2094 /// Design of OpenMP reductions on the GPU
2095 ///
2096 /// Consider a typical OpenMP program with one or more reduction
2097 /// clauses:
2098 ///
2099 /// float foo;
2100 /// double bar;
2101 /// #pragma omp target teams distribute parallel for \
2102 /// reduction(+:foo) reduction(*:bar)
2103 /// for (int i = 0; i < N; i++) {
2104 /// foo += A[i]; bar *= B[i];
2105 /// }
2106 ///
2107 /// where 'foo' and 'bar' are reduced across all OpenMP threads in
2108 /// all teams. In our OpenMP implementation on the NVPTX device an
2109 /// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
2110 /// within a team are mapped to CUDA threads within a threadblock.
2111 /// Our goal is to efficiently aggregate values across all OpenMP
2112 /// threads such that:
2113 ///
2114 /// - the compiler and runtime are logically concise, and
2115 /// - the reduction is performed efficiently in a hierarchical
2116 /// manner as follows: within OpenMP threads in the same warp,
2117 /// across warps in a threadblock, and finally across teams on
2118 /// the NVPTX device.
2119 ///
2120 /// Introduction to Decoupling
2121 ///
2122 /// We would like to decouple the compiler and the runtime so that the
2123 /// latter is ignorant of the reduction variables (number, data types)
2124 /// and the reduction operators. This allows a simpler interface
2125 /// and implementation while still attaining good performance.
2126 ///
2127 /// Pseudocode for the aforementioned OpenMP program generated by the
2128 /// compiler is as follows:
2129 ///
2130 /// 1. Create private copies of reduction variables on each OpenMP
2131 /// thread: 'foo_private', 'bar_private'
2132 /// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
2133 /// to it and writes the result in 'foo_private' and 'bar_private'
2134 /// respectively.
2135 /// 3. Call the OpenMP runtime on the GPU to reduce within a team
2136 /// and store the result on the team master:
2137 ///
2138 /// __kmpc_nvptx_parallel_reduce_nowait_v2(...,
2139 /// reduceData, shuffleReduceFn, interWarpCpyFn)
2140 ///
2141 /// where:
2142 /// struct ReduceData {
2143 /// double *foo;
2144 /// double *bar;
2145 /// } reduceData
2146 /// reduceData.foo = &foo_private
2147 /// reduceData.bar = &bar_private
2148 ///
2149 /// 'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
2150 /// auxiliary functions generated by the compiler that operate on
2151 /// variables of type 'ReduceData'. They aid the runtime perform
2152 /// algorithmic steps in a data agnostic manner.
2153 ///
2154 /// 'shuffleReduceFn' is a pointer to a function that reduces data
2155 /// of type 'ReduceData' across two OpenMP threads (lanes) in the
2156 /// same warp. It takes the following arguments as input:
2157 ///
2158 /// a. variable of type 'ReduceData' on the calling lane,
2159 /// b. its lane_id,
2160 /// c. an offset relative to the current lane_id to generate a
2161 /// remote_lane_id. The remote lane contains the second
2162 /// variable of type 'ReduceData' that is to be reduced.
2163 /// d. an algorithm version parameter determining which reduction
2164 /// algorithm to use.
2165 ///
2166 /// 'shuffleReduceFn' retrieves data from the remote lane using
2167 /// efficient GPU shuffle intrinsics and reduces, using the
2168 /// algorithm specified by the 4th parameter, the two operands
2169 /// element-wise. The result is written to the first operand.
2170 ///
2171 /// Different reduction algorithms are implemented in different
2172 /// runtime functions, all calling 'shuffleReduceFn' to perform
2173 /// the essential reduction step. Therefore, based on the 4th
2174 /// parameter, this function behaves slightly differently to
2175 /// cooperate with the runtime to ensure correctness under
2176 /// different circumstances.
2177 ///
2178 /// 'InterWarpCpyFn' is a pointer to a function that transfers
2179 /// reduced variables across warps. It tunnels, through CUDA
2180 /// shared memory, the thread-private data of type 'ReduceData'
2181 /// from lane 0 of each warp to a lane in the first warp.
2182 /// 4. Call the OpenMP runtime on the GPU to reduce across teams.
2183 /// The last team writes the global reduced value to memory.
2184 ///
2185 /// ret = __kmpc_gpu_teams_reduce_nowait(...,
2186 /// reduceData, shuffleReduceFn, interWarpCpyFn,
2187 /// scratchpadCopyFn, loadAndReduceFn)
2188 ///
2189 /// 'scratchpadCopyFn' is a helper that stores reduced
2190 /// data from the team master to a scratchpad array in
2191 /// global memory.
2192 ///
2193 /// 'loadAndReduceFn' is a helper that loads data from
2194 /// the scratchpad array and reduces it with the input
2195 /// operand.
2196 ///
2197 /// These compiler generated functions hide address
2198 /// calculation and alignment information from the runtime.
2199 /// 5. if ret == 1:
2200 /// The team master of the last team stores the reduced
2201 /// result to the globals in memory.
2202 /// foo += reduceData.foo; bar *= reduceData.bar
2203 ///
2204 ///
2205 /// Warp Reduction Algorithms
2206 ///
2207 /// On the warp level, we have three algorithms implemented in the
2208 /// OpenMP runtime depending on the number of active lanes:
2209 ///
2210 /// Full Warp Reduction
2211 ///
2212 /// The reduce algorithm within a warp where all lanes are active
2213 /// is implemented in the runtime as follows:
2214 ///
2215 /// full_warp_reduce(void *reduce_data,
2216 /// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
2217 /// for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
2218 /// ShuffleReduceFn(reduce_data, 0, offset, 0);
2219 /// }
2220 ///
2221 /// The algorithm completes in log(2, WARPSIZE) steps.
2222 ///
2223 /// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
2224 /// not used therefore we save instructions by not retrieving lane_id
2225 /// from the corresponding special registers. The 4th parameter, which
2226 /// represents the version of the algorithm being used, is set to 0 to
2227 /// signify full warp reduction.
2228 ///
2229 /// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
2230 ///
2231 /// #reduce_elem refers to an element in the local lane's data structure
2232 /// #remote_elem is retrieved from a remote lane
2233 /// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
2234 /// reduce_elem = reduce_elem REDUCE_OP remote_elem;
2235 ///
2236 /// Contiguous Partial Warp Reduction
2237 ///
2238 /// This reduce algorithm is used within a warp where only the first
2239 /// 'n' (n <= WARPSIZE) lanes are active. It is typically used when the
2240 /// number of OpenMP threads in a parallel region is not a multiple of
2241 /// WARPSIZE. The algorithm is implemented in the runtime as follows:
2242 ///
2243 /// void
2244 /// contiguous_partial_reduce(void *reduce_data,
2245 /// kmp_ShuffleReductFctPtr ShuffleReduceFn,
2246 /// int size, int lane_id) {
2247 /// int curr_size;
2248 /// int offset;
2249 /// curr_size = size;
2250 /// mask = curr_size/2;
2251 /// while (offset>0) {
2252 /// ShuffleReduceFn(reduce_data, lane_id, offset, 1);
2253 /// curr_size = (curr_size+1)/2;
2254 /// offset = curr_size/2;
2255 /// }
2256 /// }
2257 ///
2258 /// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
2259 ///
2260 /// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
2261 /// if (lane_id < offset)
2262 /// reduce_elem = reduce_elem REDUCE_OP remote_elem
2263 /// else
2264 /// reduce_elem = remote_elem
2265 ///
2266 /// This algorithm assumes that the data to be reduced are located in a
2267 /// contiguous subset of lanes starting from the first. When there is
2268 /// an odd number of active lanes, the data in the last lane is not
2269 /// aggregated with any other lane's dat but is instead copied over.
2270 ///
2271 /// Dispersed Partial Warp Reduction
2272 ///
2273 /// This algorithm is used within a warp when any discontiguous subset of
2274 /// lanes are active. It is used to implement the reduction operation
2275 /// across lanes in an OpenMP simd region or in a nested parallel region.
2276 ///
2277 /// void
2278 /// dispersed_partial_reduce(void *reduce_data,
2279 /// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
2280 /// int size, remote_id;
2281 /// int logical_lane_id = number_of_active_lanes_before_me() * 2;
2282 /// do {
2283 /// remote_id = next_active_lane_id_right_after_me();
2284 /// # the above function returns 0 of no active lane
2285 /// # is present right after the current lane.
2286 /// size = number_of_active_lanes_in_this_warp();
2287 /// logical_lane_id /= 2;
2288 /// ShuffleReduceFn(reduce_data, logical_lane_id,
2289 /// remote_id-1-threadIdx.x, 2);
2290 /// } while (logical_lane_id % 2 == 0 && size > 1);
2291 /// }
2292 ///
2293 /// There is no assumption made about the initial state of the reduction.
2294 /// Any number of lanes (>=1) could be active at any position. The reduction
2295 /// result is returned in the first active lane.
2296 ///
2297 /// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
2298 ///
2299 /// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
2300 /// if (lane_id % 2 == 0 && offset > 0)
2301 /// reduce_elem = reduce_elem REDUCE_OP remote_elem
2302 /// else
2303 /// reduce_elem = remote_elem
2304 ///
2305 ///
2306 /// Intra-Team Reduction
2307 ///
2308 /// This function, as implemented in the runtime call
2309 /// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
2310 /// threads in a team. It first reduces within a warp using the
2311 /// aforementioned algorithms. We then proceed to gather all such
2312 /// reduced values at the first warp.
2313 ///
2314 /// The runtime makes use of the function 'InterWarpCpyFn', which copies
2315 /// data from each of the "warp master" (zeroth lane of each warp, where
2316 /// warp-reduced data is held) to the zeroth warp. This step reduces (in
2317 /// a mathematical sense) the problem of reduction across warp masters in
2318 /// a block to the problem of warp reduction.
2319 ///
2320 ///
2321 /// Inter-Team Reduction
2322 ///
2323 /// Once a team has reduced its data to a single value, it is stored in
2324 /// a global scratchpad array. Since each team has a distinct slot, this
2325 /// can be done without locking.
2326 ///
2327 /// The last team to write to the scratchpad array proceeds to reduce the
2328 /// scratchpad array. One or more workers in the last team use the helper
2329 /// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
2330 /// the k'th worker reduces every k'th element.
2331 ///
2332 /// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
2333 /// reduce across workers and compute a globally reduced value.
2334 ///
2335 /// \param Loc The location where the reduction was
2336 /// encountered. Must be within the associate
2337 /// directive and after the last local access to the
2338 /// reduction variables.
2339 /// \param AllocaIP An insertion point suitable for allocas usable
2340 /// in reductions.
2341 /// \param CodeGenIP An insertion point suitable for code
2342 /// generation.
2343 /// \param ReductionInfos A list of info on each reduction
2344 /// variable.
2345 /// \param IsNoWait Optional flag set if the reduction is
2346 /// marked as nowait.
2347 /// \param IsByRef For each reduction clause, whether the reduction is by-ref.
2348 /// \param IsTeamsReduction Optional flag set if it is a teams
2349 /// reduction.
2350 /// \param IsSPMD Optional flag set when the surrounding kernel
2351 /// is compiled in SPMD execution mode (every
2352 /// reduction private is then known to be a
2353 /// per-thread scratch alloca). When false, the
2354 /// teams-reduction call site emits per-thread
2355 /// scratch and copies the team-local value in so
2356 /// the runtime's cross-team work cannot race on
2357 /// team-shared LDS storage produced by Generic
2358 /// globalization (Generic-SPMD case after
2359 /// OpenMPOpt SPMD-ization).
2360 /// \param GridValue Optional GPU grid value.
2361 /// used for teams reduction.
2362 /// \param SrcLocInfo Source location information global.
2364 const LocationDescription &Loc, InsertPointTy AllocaIP,
2365 InsertPointTy CodeGenIP, ArrayRef<ReductionInfo> ReductionInfos,
2366 ArrayRef<bool> IsByRef, bool IsNoWait = false,
2367 bool IsTeamsReduction = false, bool IsSPMD = false,
2369 std::optional<omp::GV> GridValue = {}, Value *SrcLocInfo = nullptr);
2370
2371 // TODO: provide atomic and non-atomic reduction generators for reduction
2372 // operators defined by the OpenMP specification.
2373
2374 /// Generator for '#omp reduction'.
2375 ///
2376 /// Emits the IR instructing the runtime to perform the specific kind of
2377 /// reductions. Expects reduction variables to have been privatized and
2378 /// initialized to reduction-neutral values separately. Emits the calls to
2379 /// runtime functions as well as the reduction function and the basic blocks
2380 /// performing the reduction atomically and non-atomically.
2381 ///
2382 /// The code emitted for the following:
2383 ///
2384 /// \code
2385 /// type var_1;
2386 /// type var_2;
2387 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
2388 /// /* body */;
2389 /// \endcode
2390 ///
2391 /// corresponds to the following sketch.
2392 ///
2393 /// \code
2394 /// void _outlined_par() {
2395 /// // N is the number of different reductions.
2396 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
2397 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
2398 /// _omp_reduction_func,
2399 /// _gomp_critical_user.reduction.var)) {
2400 /// case 1: {
2401 /// var_1 = var_1 <reduction-op> privatized_var_1;
2402 /// var_2 = var_2 <reduction-op> privatized_var_2;
2403 /// // ...
2404 /// __kmpc_end_reduce(...);
2405 /// break;
2406 /// }
2407 /// case 2: {
2408 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
2409 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
2410 /// // ...
2411 /// break;
2412 /// }
2413 /// default: break;
2414 /// }
2415 /// }
2416 ///
2417 /// void _omp_reduction_func(void **lhs, void **rhs) {
2418 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
2419 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
2420 /// // ...
2421 /// }
2422 /// \endcode
2423 ///
2424 /// \param Loc The location where the reduction was
2425 /// encountered. Must be within the associate
2426 /// directive and after the last local access to the
2427 /// reduction variables.
2428 /// \param AllocaIP An insertion point suitable for allocas usable
2429 /// in reductions.
2430 /// \param ReductionInfos A list of info on each reduction variable.
2431 /// \param IsNoWait A flag set if the reduction is marked as nowait.
2432 /// \param IsByRef A flag set if the reduction is using reference
2433 /// or direct value.
2434 /// \param IsTeamsReduction Optional flag set if it is a teams
2435 /// reduction.
2437 const LocationDescription &Loc, InsertPointTy AllocaIP,
2438 ArrayRef<ReductionInfo> ReductionInfos, ArrayRef<bool> IsByRef,
2439 bool IsNoWait = false, bool IsTeamsReduction = false);
2440
2441 ///}
2442
2443 /// Return the insertion point used by the underlying IRBuilder.
2445
2446 /// Update the internal location to \p Loc.
2448 Builder.restoreIP(Loc.IP);
2449 Builder.SetCurrentDebugLocation(Loc.DL);
2450 return Loc.IP.getBlock() != nullptr;
2451 }
2452
2453 /// Return the function declaration for the runtime function with \p FnID.
2456
2458
2460 ArrayRef<Value *> Args,
2461 StringRef Name = "");
2462
2463 /// Return the (LLVM-IR) string describing the source location \p LocStr.
2465 uint32_t &SrcLocStrSize);
2466
2467 /// Return the (LLVM-IR) string describing the default source location.
2469
2470 /// Return the (LLVM-IR) string describing the source location identified by
2471 /// the arguments.
2473 StringRef FileName, unsigned Line,
2474 unsigned Column,
2475 uint32_t &SrcLocStrSize);
2476
2477 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
2478 /// fallback if \p DL does not specify the function name.
2480 Function *F = nullptr);
2481
2482 /// Return the (LLVM-IR) string describing the source location \p Loc.
2483 LLVM_ABI Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
2484 uint32_t &SrcLocStrSize);
2485
2486 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
2487 /// TODO: Create a enum class for the Reserve2Flags
2489 uint32_t SrcLocStrSize,
2490 omp::IdentFlag Flags = omp::IdentFlag(0),
2491 unsigned Reserve2Flags = 0);
2492
2493 /// Create a hidden global flag \p Name in the module with initial value \p
2494 /// Value.
2496
2497 /// Emit the llvm.used metadata.
2499
2500 /// Emit the kernel execution mode.
2503
2504 /// Generate control flow and cleanup for cancellation.
2505 ///
2506 /// \param CancelFlag Flag indicating if the cancellation is performed.
2507 /// \param CanceledDirective The kind of directive that is cancled.
2508 /// \param ExitCB Extra code to be generated in the exit block.
2509 ///
2510 /// \return an error, if any were triggered during execution.
2512 omp::Directive CanceledDirective);
2513
2514 /// Generate a target region entry call.
2515 ///
2516 /// \param Loc The location at which the request originated and is fulfilled.
2517 /// \param AllocaIP The insertion point to be used for alloca instructions.
2518 /// \param Return Return value of the created function returned by reference.
2519 /// \param DeviceID Identifier for the device via the 'device' clause.
2520 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
2521 /// or 0 if unspecified and -1 if there is no 'teams' clause.
2522 /// \param NumThreads Number of threads via the 'thread_limit' clause.
2523 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
2524 /// \param KernelArgs Array of arguments to the kernel.
2525 LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc,
2526 InsertPointTy AllocaIP,
2527 Value *&Return, Value *Ident,
2528 Value *DeviceID, Value *NumTeams,
2529 Value *NumThreads, Value *HostPtr,
2530 ArrayRef<Value *> KernelArgs);
2531
2532 /// Generate a flush runtime call.
2533 ///
2534 /// \param Loc The location at which the request originated and is fulfilled.
2535 LLVM_ABI void emitFlush(const LocationDescription &Loc);
2536
2537 /// The finalization stack made up of finalize callbacks currently in-flight,
2538 /// wrapped into FinalizationInfo objects that reference also the finalization
2539 /// target block and the kind of cancellable directive.
2541
2542 /// Return true if the last entry in the finalization stack is of kind \p DK
2543 /// and cancellable.
2544 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
2545 return !FinalizationStack.empty() &&
2546 FinalizationStack.back().IsCancellable &&
2547 FinalizationStack.back().DK == DK;
2548 }
2549
2550 /// Generate a taskwait runtime call.
2551 ///
2552 /// \param Loc The location at which the request originated and is fulfilled.
2553 LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc);
2554
2555 /// Generate a taskyield runtime call.
2556 ///
2557 /// \param Loc The location at which the request originated and is fulfilled.
2558 LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc);
2559
2560 /// Return the current thread ID.
2561 ///
2562 /// \param Ident The ident (ident_t*) describing the query origin.
2564
2565 /// The OpenMPIRBuilder Configuration
2567
2568 /// The underlying LLVM-IR module
2570
2571 /// The LLVM-IR Builder used to create IR.
2573
2574 /// Map to remember source location strings
2576
2577 /// Map to remember existing ident_t*.
2579
2580 /// Info manager to keep track of target regions.
2582
2583 /// The target triple of the underlying module.
2584 const Triple T;
2585
2586 /// Helper that contains information about regions we need to outline
2587 /// during finalization.
2589 using PostOutlineCBTy = std::function<void(Function &)>;
2595 // TODO: this should be safe to enable by default
2597
2598 virtual ~OutlineInfo() = default;
2599
2600 /// Collect all blocks in between EntryBB and ExitBB in both the given
2601 /// vector and set.
2603 SmallVectorImpl<BasicBlock *> &BlockVector);
2604
2605 /// Create a CodeExtractor instance based on the information stored in this
2606 /// structure, the list of collected blocks from a previous call to
2607 /// \c collectBlocks and a flag stating whether arguments must be passed in
2608 /// address space 0.
2609 virtual std::unique_ptr<CodeExtractor>
2611 bool ArgsInZeroAddressSpace, Twine Suffix = Twine(""));
2612
2613 /// Return the function that contains the region to be outlined.
2614 Function *getFunction() const { return EntryBB->getParent(); }
2615 };
2616
2617 /// Collection of regions that need to be outlined during finalization.
2619
2620 /// A collection of candidate target functions that's constant allocas will
2621 /// attempt to be raised on a call of finalize after all currently enqueued
2622 /// outline info's have been processed.
2624
2625 /// Collection of owned canonical loop objects that eventually need to be
2626 /// free'd.
2627 std::forward_list<CanonicalLoopInfo> LoopInfos;
2628
2629 /// Collection of owned ScanInfo objects that eventually need to be free'd.
2630 std::forward_list<ScanInfo> ScanInfos;
2631
2632 /// Add a new region that will be outlined later.
2633 void addOutlineInfo(std::unique_ptr<OutlineInfo> &&OI) {
2634 OutlineInfos.emplace_back(std::move(OI));
2635 }
2636
2637 /// An ordered map of auto-generated variables to their unique names.
2638 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
2639 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
2640 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
2641 /// variables.
2643
2644 /// Computes the size of type in bytes.
2646
2647 // Emit a branch from the current block to the Target block only if
2648 // the current block has a terminator.
2650
2651 // If BB has no use then delete it and return. Else place BB after the current
2652 // block, if possible, or else at the end of the function. Also add a branch
2653 // from current block to BB if current block does not have a terminator.
2654 LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn,
2655 bool IsFinished = false);
2656
2657 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
2658 /// Here is the logic:
2659 /// if (Cond) {
2660 /// ThenGen();
2661 /// } else {
2662 /// ElseGen();
2663 /// }
2664 ///
2665 /// \return an error, if any were triggered during execution.
2667 BodyGenCallbackTy ElseGen,
2668 InsertPointTy AllocaIP = {},
2669 ArrayRef<BasicBlock *> DeallocBlocks = {});
2670
2671 /// Create the global variable holding the offload mappings information.
2672 LLVM_ABI GlobalVariable *
2673 createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
2674 std::string VarName);
2675
2676 /// Create the global variable holding the offload names information.
2677 LLVM_ABI GlobalVariable *
2678 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
2679 std::string VarName);
2680
2683 AllocaInst *Args = nullptr;
2685 };
2686
2687 /// Create the allocas instruction used in call to mapper functions.
2689 InsertPointTy AllocaIP,
2690 unsigned NumOperands,
2692
2693 /// Create the call for the target mapper function.
2694 /// \param Loc The source location description.
2695 /// \param MapperFunc Function to be called.
2696 /// \param SrcLocInfo Source location information global.
2697 /// \param MaptypesArg The argument types.
2698 /// \param MapnamesArg The argument names.
2699 /// \param MapperAllocas The AllocaInst used for the call.
2700 /// \param DeviceID Device ID for the call.
2701 /// \param NumOperands Number of operands in the call.
2703 Function *MapperFunc, Value *SrcLocInfo,
2704 Value *MaptypesArg, Value *MapnamesArg,
2706 int64_t DeviceID, unsigned NumOperands);
2707
2708 /// Container for the arguments used to pass data to the runtime library.
2710 /// The array of base pointer passed to the runtime library.
2712 /// The array of section pointers passed to the runtime library.
2714 /// The array of sizes passed to the runtime library.
2715 Value *SizesArray = nullptr;
2716 /// The array of map types passed to the runtime library for the beginning
2717 /// of the region or for the entire region if there are no separate map
2718 /// types for the region end.
2720 /// The array of map types passed to the runtime library for the end of the
2721 /// region, or nullptr if there are no separate map types for the region
2722 /// end.
2724 /// The array of user-defined mappers passed to the runtime library.
2726 /// The array of original declaration names of mapped pointers sent to the
2727 /// runtime library for debugging
2729
2730 explicit TargetDataRTArgs() = default;
2739 };
2740
2741 /// Container to pass the default attributes with which a kernel must be
2742 /// launched, used to set kernel attributes and populate associated static
2743 /// structures.
2744 ///
2745 /// For max values, < 0 means unset, == 0 means set but unknown at compile
2746 /// time. The number of max values will be 1 except for the case where
2747 /// ompx_bare is set.
2757
2758 /// Container to pass LLVM IR runtime values or constants related to the
2759 /// number of teams and threads with which the kernel must be launched, as
2760 /// well as the trip count of the loop, if it is an SPMD or Generic-SPMD
2761 /// kernel. These must be defined in the host prior to the call to the kernel
2762 /// launch OpenMP RTL function.
2765 Value *MinTeams = nullptr;
2768
2769 /// 'parallel' construct 'num_threads' clause value, if present and it is an
2770 /// SPMD kernel.
2771 Value *MaxThreads = nullptr;
2772
2773 /// Total number of iterations of the SPMD or Generic-SPMD kernel or null if
2774 /// it is a generic kernel.
2776
2777 /// Device ID value used in the kernel launch.
2778 Value *DeviceID = nullptr;
2779 };
2780
2781 /// Data structure that contains the needed information to construct the
2782 /// kernel args vector.
2784 /// Number of arguments passed to the runtime library.
2785 unsigned NumTargetItems = 0;
2786 /// Arguments passed to the runtime library
2788 /// The number of iterations
2790 /// The number of teams.
2792 /// The number of threads.
2794 /// The size of the dynamic shared memory.
2796 /// True if the kernel has 'no wait' clause.
2797 bool HasNoWait = false;
2798 /// True if the kernel strictly requires the number of blocks and threads
2799 /// above to run.
2801 /// The fallback mechanism for the shared memory.
2804
2805 // Constructors for TargetKernelArgs.
2806 TargetKernelArgs() = default;
2817 };
2818
2819 /// Create the kernel args vector used by emitTargetKernel. This function
2820 /// creates various constant values that are used in the resulting args
2821 /// vector.
2822 LLVM_ABI static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
2823 IRBuilderBase &Builder,
2824 SmallVector<Value *> &ArgsVector);
2825
2826 /// Struct that keeps the information that should be kept throughout
2827 /// a 'target data' region.
2829 /// Set to true if device pointer information have to be obtained.
2830 bool RequiresDevicePointerInfo = false;
2831 /// Set to true if Clang emits separate runtime calls for the beginning and
2832 /// end of the region. These calls might have separate map type arrays.
2833 bool SeparateBeginEndCalls = false;
2834
2835 public:
2837
2840
2841 /// Indicate whether any user-defined mapper exists.
2842 bool HasMapper = false;
2843 /// The total number of pointers passed to the runtime library.
2844 unsigned NumberOfPtrs = 0u;
2845
2846 bool EmitDebug = false;
2847
2848 /// Whether the `target ... data` directive has a `nowait` clause.
2849 bool HasNoWait = false;
2850
2851 explicit TargetDataInfo() = default;
2852 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
2853 bool SeparateBeginEndCalls)
2854 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
2855 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
2856 /// Clear information about the data arrays.
2859 HasMapper = false;
2860 NumberOfPtrs = 0u;
2861 }
2862 /// Return true if the current target data information has valid arrays.
2863 bool isValid() {
2864 return RTArgs.BasePointersArray && RTArgs.PointersArray &&
2865 RTArgs.SizesArray && RTArgs.MapTypesArray &&
2866 (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
2867 }
2868 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
2869 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
2870 };
2871
2879
2880 /// This structure contains combined information generated for mappable
2881 /// clauses, including base pointers, pointers, sizes, map types, user-defined
2882 /// mappers, and non-contiguous information.
2883 struct MapInfosTy {
2898
2899 /// Append arrays in \a CurInfo.
2900 void append(MapInfosTy &CurInfo) {
2901 BasePointers.append(CurInfo.BasePointers.begin(),
2902 CurInfo.BasePointers.end());
2903 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
2904 DevicePointers.append(CurInfo.DevicePointers.begin(),
2905 CurInfo.DevicePointers.end());
2906 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
2907 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
2908 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
2909 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
2910 CurInfo.NonContigInfo.Dims.end());
2911 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
2912 CurInfo.NonContigInfo.Offsets.end());
2913 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
2914 CurInfo.NonContigInfo.Counts.end());
2915 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
2916 CurInfo.NonContigInfo.Strides.end());
2917 }
2918 };
2920
2921 /// Callback function type for functions emitting the host fallback code that
2922 /// is executed when the kernel launch fails. It takes an insertion point as
2923 /// parameter where the code should be emitted. It returns an insertion point
2924 /// that points right after after the emitted code.
2927
2928 // Callback function type for emitting and fetching user defined custom
2929 // mappers.
2931 function_ref<Expected<Function *>(unsigned int)>;
2932
2933 /// Generate a target region entry call and host fallback call.
2934 ///
2935 /// \param Loc The location at which the request originated and is fulfilled.
2936 /// \param OutlinedFnID The ooulined function ID.
2937 /// \param EmitTargetCallFallbackCB Call back function to generate host
2938 /// fallback code.
2939 /// \param Args Data structure holding information about the kernel arguments.
2940 /// \param DeviceID Identifier for the device via the 'device' clause.
2941 /// \param RTLoc Source location identifier
2942 /// \param AllocaIP The insertion point to be used for alloca instructions.
2944 const LocationDescription &Loc, Value *OutlinedFnID,
2945 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
2946 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
2947
2948 /// Callback type for generating the bodies of device directives that require
2949 /// outer target tasks (e.g. in case of having `nowait` or `depend` clauses).
2950 ///
2951 /// \param DeviceID The ID of the device on which the target region will
2952 /// execute.
2953 /// \param RTLoc Source location identifier
2954 /// \Param TargetTaskAllocaIP Insertion point for the alloca block of the
2955 /// generated task.
2956 ///
2957 /// \return an error, if any were triggered during execution.
2959 function_ref<Error(Value *DeviceID, Value *RTLoc,
2960 IRBuilderBase::InsertPoint TargetTaskAllocaIP)>;
2961
2962 /// Generate a target-task for the target construct
2963 ///
2964 /// \param TaskBodyCB Callback to generate the actual body of the target task.
2965 /// \param DeviceID Identifier for the device via the 'device' clause.
2966 /// \param RTLoc Source location identifier
2967 /// \param AllocaIP The insertion point to be used for alloca instructions.
2968 /// \param Dependencies Dependencies info as specified by the 'depend' clause.
2969 /// \param HasNoWait True if the target construct had 'nowait' on it, false
2970 /// otherwise
2972 emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID,
2973 Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP,
2974 const DependenciesInfo &Dependencies,
2975 const TargetDataRTArgs &RTArgs, bool HasNoWait);
2976
2977 /// Emit the arguments to be passed to the runtime library based on the
2978 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
2979 /// ForEndCall, emit map types to be passed for the end of the region instead
2980 /// of the beginning.
2983 OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall = false);
2984
2985 /// Emit an array of struct descriptors to be assigned to the offload args.
2987 InsertPointTy CodeGenIP,
2988 MapInfosTy &CombinedInfo,
2989 TargetDataInfo &Info);
2990
2991 /// Emit the arrays used to pass the captures and map information to the
2992 /// offloading runtime library. If there is no map or capture information,
2993 /// return nullptr by reference. Accepts a reference to a MapInfosTy object
2994 /// that contains information generated for mappable clauses,
2995 /// including base pointers, pointers, sizes, map types, user-defined mappers.
2997 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
2998 TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB,
2999 bool IsNonContiguous = false,
3000 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr);
3001
3002 /// Allocates memory for and populates the arrays required for offloading
3003 /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
3004 /// emits their base addresses as arguments to be passed to the runtime
3005 /// library. In essence, this function is a combination of
3006 /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
3007 /// be preferred by clients of OpenMPIRBuilder.
3009 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
3010 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
3011 CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous = false,
3012 bool ForEndCall = false,
3013 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr);
3014
3015 /// Creates offloading entry for the provided entry ID \a ID, address \a
3016 /// Addr, size \a Size, and flags \a Flags.
3018 int32_t Flags, GlobalValue::LinkageTypes,
3019 StringRef Name = "");
3020
3021 /// The kind of errors that can occur when emitting the offload entries and
3022 /// metadata.
3029
3030 /// Callback function type
3032 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
3033
3034 // Emit the offloading entries and metadata so that the device codegen side
3035 // can easily figure out what to emit. The produced metadata looks like
3036 // this:
3037 //
3038 // !omp_offload.info = !{!1, ...}
3039 //
3040 // We only generate metadata for function that contain target regions.
3042 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
3043
3044public:
3045 /// Generator for __kmpc_copyprivate
3046 ///
3047 /// \param Loc The source location description.
3048 /// \param BufSize Number of elements in the buffer.
3049 /// \param CpyBuf List of pointers to data to be copied.
3050 /// \param CpyFn function to call for copying data.
3051 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
3052 ///
3053 /// \return The insertion position *after* the CopyPrivate call.
3054
3056 llvm::Value *BufSize,
3057 llvm::Value *CpyBuf,
3058 llvm::Value *CpyFn,
3059 llvm::Value *DidIt);
3060
3061 /// Generator for '#omp single'
3062 ///
3063 /// \param Loc The source location description.
3064 /// \param BodyGenCB Callback that will generate the region code.
3065 /// \param FiniCB Callback to finalize variable copies.
3066 /// \param IsNowait If false, a barrier is emitted.
3067 /// \param CPVars copyprivate variables.
3068 /// \param CPFuncs copy functions to use for each copyprivate variable.
3069 ///
3070 /// \returns The insertion position *after* the single call.
3073 FinalizeCallbackTy FiniCB, bool IsNowait,
3074 ArrayRef<llvm::Value *> CPVars = {},
3075 ArrayRef<llvm::Function *> CPFuncs = {});
3076
3077 /// Generator for '#omp scope'
3078 ///
3079 /// \param Loc The source location description.
3080 /// \param BodyGenCB Callback that will generate the region code.
3081 /// \param FiniCB Callback to finalize variable copies.
3082 /// \param IsNowait If false, a barrier is emitted.
3083 ///
3084 /// \returns The insertion position *after* the scope.
3085 LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc,
3086 BodyGenCallbackTy BodyGenCB,
3087 FinalizeCallbackTy FiniCB,
3088 bool IsNowait);
3089
3090 /// Generator for '#omp master'
3091 ///
3092 /// \param Loc The insert and source location description.
3093 /// \param BodyGenCB Callback that will generate the region code.
3094 /// \param FiniCB Callback to finalize variable copies.
3095 ///
3096 /// \returns The insertion position *after* the master.
3097 LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc,
3098 BodyGenCallbackTy BodyGenCB,
3099 FinalizeCallbackTy FiniCB);
3100
3101 /// Generator for '#omp masked'
3102 ///
3103 /// \param Loc The insert and source location description.
3104 /// \param BodyGenCB Callback that will generate the region code.
3105 /// \param FiniCB Callback to finialize variable copies.
3106 ///
3107 /// \returns The insertion position *after* the masked.
3108 LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc,
3109 BodyGenCallbackTy BodyGenCB,
3110 FinalizeCallbackTy FiniCB,
3111 Value *Filter);
3112
3113 /// This function performs the scan reduction of the values updated in
3114 /// the input phase. The reduction logic needs to be emitted between input
3115 /// and scan loop returned by `CreateCanonicalScanLoops`. The following
3116 /// is the code that is generated, `buffer` and `span` are expected to be
3117 /// populated before executing the generated code.
3118 /// \code{c}
3119 /// for (int k = 0; k != ceil(log2(span)); ++k) {
3120 /// i=pow(2,k)
3121 /// for (size cnt = last_iter; cnt >= i; --cnt)
3122 /// buffer[cnt] op= buffer[cnt-i];
3123 /// }
3124 /// \endcode
3125 /// \param Loc The insert and source location description.
3126 /// \param ReductionInfos Array type containing the ReductionOps.
3127 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
3128 /// `ScanInfoInitialize`.
3129 ///
3130 /// \returns The insertion position *after* the masked.
3132 const LocationDescription &Loc,
3134 ScanInfo *ScanRedInfo);
3135
3136 /// This directive split and directs the control flow to input phase
3137 /// blocks or scan phase blocks based on 1. whether input loop or scan loop
3138 /// is executed, 2. whether exclusive or inclusive scan is used.
3139 ///
3140 /// \param Loc The insert and source location description.
3141 /// \param AllocaIP The IP where the temporary buffer for scan reduction
3142 // needs to be allocated.
3143 /// \param ScanVars Scan Variables.
3144 /// \param IsInclusive Whether it is an inclusive or exclusive scan.
3145 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
3146 /// `ScanInfoInitialize`.
3147 ///
3148 /// \returns The insertion position *after* the scan.
3149 LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc,
3150 InsertPointTy AllocaIP,
3151 ArrayRef<llvm::Value *> ScanVars,
3152 ArrayRef<llvm::Type *> ScanVarsType,
3153 bool IsInclusive,
3154 ScanInfo *ScanRedInfo);
3155
3156 /// Generator for '#omp critical'
3157 ///
3158 /// \param Loc The insert and source location description.
3159 /// \param BodyGenCB Callback that will generate the region body code.
3160 /// \param FiniCB Callback to finalize variable copies.
3161 /// \param CriticalName name of the lock used by the critical directive
3162 /// \param HintInst Hint Instruction for hint clause associated with critical
3163 ///
3164 /// \returns The insertion position *after* the critical.
3165 LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc,
3166 BodyGenCallbackTy BodyGenCB,
3167 FinalizeCallbackTy FiniCB,
3168 StringRef CriticalName,
3169 Value *HintInst);
3170
3171 /// Generator for '#omp ordered depend (source | sink)'
3172 ///
3173 /// \param Loc The insert and source location description.
3174 /// \param AllocaIP The insertion point to be used for alloca instructions.
3175 /// \param NumLoops The number of loops in depend clause.
3176 /// \param StoreValues The value will be stored in vector address.
3177 /// \param Name The name of alloca instruction.
3178 /// \param IsDependSource If true, depend source; otherwise, depend sink.
3179 ///
3180 /// \return The insertion position *after* the ordered.
3182 createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP,
3183 unsigned NumLoops, ArrayRef<llvm::Value *> StoreValues,
3184 const Twine &Name, bool IsDependSource);
3185
3186 /// Generator for '#omp ordered [threads | simd]'
3187 ///
3188 /// \param Loc The insert and source location description.
3189 /// \param BodyGenCB Callback that will generate the region code.
3190 /// \param FiniCB Callback to finalize variable copies.
3191 /// \param IsThreads If true, with threads clause or without clause;
3192 /// otherwise, with simd clause;
3193 ///
3194 /// \returns The insertion position *after* the ordered.
3196 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
3197 FinalizeCallbackTy FiniCB, bool IsThreads);
3198
3199 /// Generator for '#omp sections'
3200 ///
3201 /// \param Loc The insert and source location description.
3202 /// \param AllocaIP The insertion points to be used for alloca instructions.
3203 /// \param SectionCBs Callbacks that will generate body of each section.
3204 /// \param PrivCB Callback to copy a given variable (think copy constructor).
3205 /// \param FiniCB Callback to finalize variable copies.
3206 /// \param IsCancellable Flag to indicate a cancellable parallel region.
3207 /// \param IsNowait If true, barrier - to ensure all sections are executed
3208 /// before moving forward will not be generated.
3209 /// \returns The insertion position *after* the sections.
3211 createSections(const LocationDescription &Loc, InsertPointTy AllocaIP,
3214 bool IsCancellable, bool IsNowait);
3215
3216 /// Generator for '#omp section'
3217 ///
3218 /// \param Loc The insert and source location description.
3219 /// \param BodyGenCB Callback that will generate the region body code.
3220 /// \param FiniCB Callback to finalize variable copies.
3221 /// \returns The insertion position *after* the section.
3222 LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc,
3223 BodyGenCallbackTy BodyGenCB,
3224 FinalizeCallbackTy FiniCB);
3225
3226 /// Generator for `#omp teams`
3227 ///
3228 /// \param Loc The location where the teams construct was encountered.
3229 /// \param BodyGenCB Callback that will generate the region code.
3230 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
3231 /// it is as if lower bound is specified as equal to upperbound. If
3232 /// this is non-null, then upperbound must also be non-null.
3233 /// \param NumTeamsUpper Upper bound on the number of teams.
3234 /// \param ThreadLimit on the number of threads that may participate in a
3235 /// contention group created by each team.
3236 /// \param IfExpr is the integer argument value of the if condition on the
3237 /// teams clause.
3238 LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc,
3239 BodyGenCallbackTy BodyGenCB,
3240 Value *NumTeamsLower = nullptr,
3241 Value *NumTeamsUpper = nullptr,
3242 Value *ThreadLimit = nullptr,
3243 Value *IfExpr = nullptr);
3244
3245 /// Generator for `#omp distribute`
3246 ///
3247 /// \param Loc The location where the distribute construct was encountered.
3248 /// \param AllocaIP The insertion point to be used for allocations.
3249 /// \param DeallocBlocks The insertion blocks to be used for explicit
3250 /// deallocations, if needed.
3251 /// \param BodyGenCB Callback that will generate the region code.
3253 const LocationDescription &Loc, InsertPointTy AllocaIP,
3254 ArrayRef<BasicBlock *> DeallocBlocks, BodyGenCallbackTy BodyGenCB);
3255
3256 /// Generate conditional branch and relevant BasicBlocks through which private
3257 /// threads copy the 'copyin' variables from Master copy to threadprivate
3258 /// copies.
3259 ///
3260 /// \param IP insertion block for copyin conditional
3261 /// \param MasterVarPtr a pointer to the master variable
3262 /// \param PrivateVarPtr a pointer to the threadprivate variable
3263 /// \param IntPtrTy Pointer size type
3264 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
3265 // and copy.in.end block
3266 ///
3267 /// \returns The insertion point where copying operation to be emitted.
3269 Value *MasterAddr,
3270 Value *PrivateAddr,
3271 llvm::IntegerType *IntPtrTy,
3272 bool BranchtoEnd = true);
3273
3274 /// Create a runtime call for kmpc_alloc
3275 ///
3276 /// \param Loc The insert and source location description.
3277 /// \param Size Size of allocated memory space
3278 /// \param Allocator Allocator information instruction
3279 /// \param Name Name of call Instruction for OMP_alloc
3280 ///
3281 /// \returns CallInst to the OMP_Alloc call
3282 LLVM_ABI CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
3283 Value *Allocator, std::string Name = "");
3284
3285 /// Create a runtime call for kmpc_align_alloc
3286 ///
3287 /// \param Loc The insert and source location description.
3288 /// \param Align Align value
3289 /// \param Size Size of allocated memory space
3290 /// \param Allocator Allocator information instruction
3291 /// \param Name Name of call Instruction for OMP_Align_Alloc
3292 ///
3293 /// \returns CallInst to the OMP_Align_Alloc call
3294 LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
3295 Value *Align, Value *Size,
3296 Value *Allocator,
3297 std::string Name = "");
3298
3299 /// Create a runtime call for kmpc_free
3300 ///
3301 /// \param Loc The insert and source location description.
3302 /// \param Addr Address of memory space to be freed
3303 /// \param Allocator Allocator information instruction
3304 /// \param Name Name of call Instruction for OMP_Free
3305 ///
3306 /// \returns CallInst to the OMP_Free call
3307 LLVM_ABI CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
3308 Value *Allocator, std::string Name = "");
3309
3310 /// Create a runtime call for kmpc_alloc_shared.
3311 ///
3312 /// \param Loc The insert and source location description.
3313 /// \param Size Size of allocated memory space.
3314 /// \param Name Name of call Instruction.
3315 ///
3316 /// \returns CallInst to the kmpc_alloc_shared call.
3317 LLVM_ABI CallInst *createOMPAllocShared(const LocationDescription &Loc,
3318 Value *Size,
3319 const Twine &Name = Twine(""));
3320
3321 /// Create a runtime call for kmpc_alloc_shared.
3322 ///
3323 /// \param Loc The insert and source location description.
3324 /// \param VarType Type of variable to be allocated.
3325 /// \param Name Name of call Instruction.
3326 ///
3327 /// \returns CallInst to the kmpc_alloc_shared call.
3328 LLVM_ABI CallInst *createOMPAllocShared(const LocationDescription &Loc,
3329 Type *VarType,
3330 const Twine &Name = Twine(""));
3331
3332 /// Create a runtime call for kmpc_free_shared.
3333 ///
3334 /// \param Loc The insert and source location description.
3335 /// \param Addr Value obtained from the corresponding kmpc_alloc_shared call.
3336 /// \param Size Size of allocated memory space.
3337 /// \param Name Name of call Instruction.
3338 ///
3339 /// \returns CallInst to the kmpc_free_shared call.
3340 LLVM_ABI CallInst *createOMPFreeShared(const LocationDescription &Loc,
3341 Value *Addr, Value *Size,
3342 const Twine &Name = Twine(""));
3343
3344 /// Create a runtime call for kmpc_free_shared.
3345 ///
3346 /// \param Loc The insert and source location description.
3347 /// \param Addr Value obtained from the corresponding kmpc_alloc_shared call.
3348 /// \param VarType Type of variable to be freed.
3349 /// \param Name Name of call Instruction.
3350 ///
3351 /// \returns CallInst to the kmpc_free_shared call.
3352 LLVM_ABI CallInst *createOMPFreeShared(const LocationDescription &Loc,
3353 Value *Addr, Type *VarType,
3354 const Twine &Name = Twine(""));
3355
3356 /// Create a runtime call for kmpc_threadprivate_cached
3357 ///
3358 /// \param Loc The insert and source location description.
3359 /// \param Pointer pointer to data to be cached
3360 /// \param Size size of data to be cached
3361 /// \param Name Name of call Instruction for callinst
3362 ///
3363 /// \returns CallInst to the thread private cache call.
3364 LLVM_ABI CallInst *
3365 createCachedThreadPrivate(const LocationDescription &Loc,
3367 const llvm::Twine &Name = Twine(""));
3368
3369 /// Create a runtime call for __tgt_interop_init
3370 ///
3371 /// \param Loc The insert and source location description.
3372 /// \param InteropVar variable to be allocated
3373 /// \param InteropType type of interop operation
3374 /// \param Device devide to which offloading will occur
3375 /// \param NumDependences number of dependence variables
3376 /// \param DependenceAddress pointer to dependence variables
3377 /// \param HaveNowaitClause does nowait clause exist
3378 ///
3379 /// \returns CallInst to the __tgt_interop_init call
3380 LLVM_ABI CallInst *createOMPInteropInit(const LocationDescription &Loc,
3381 Value *InteropVar,
3382 omp::OMPInteropType InteropType,
3383 Value *Device, Value *NumDependences,
3384 Value *DependenceAddress,
3385 bool HaveNowaitClause);
3386
3387 /// Create a runtime call for __tgt_interop_destroy
3388 ///
3389 /// \param Loc The insert and source location description.
3390 /// \param InteropVar variable to be allocated
3391 /// \param Device devide to which offloading will occur
3392 /// \param NumDependences number of dependence variables
3393 /// \param DependenceAddress pointer to dependence variables
3394 /// \param HaveNowaitClause does nowait clause exist
3395 ///
3396 /// \returns CallInst to the __tgt_interop_destroy call
3397 LLVM_ABI CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
3398 Value *InteropVar, Value *Device,
3399 Value *NumDependences,
3400 Value *DependenceAddress,
3401 bool HaveNowaitClause);
3402
3403 /// Create a runtime call for __tgt_interop_use
3404 ///
3405 /// \param Loc The insert and source location description.
3406 /// \param InteropVar variable to be allocated
3407 /// \param Device devide to which offloading will occur
3408 /// \param NumDependences number of dependence variables
3409 /// \param DependenceAddress pointer to dependence variables
3410 /// \param HaveNowaitClause does nowait clause exist
3411 ///
3412 /// \returns CallInst to the __tgt_interop_use call
3413 LLVM_ABI CallInst *createOMPInteropUse(const LocationDescription &Loc,
3414 Value *InteropVar, Value *Device,
3415 Value *NumDependences,
3416 Value *DependenceAddress,
3417 bool HaveNowaitClause);
3418
3419 /// The `omp target` interface
3420 ///
3421 /// For more information about the usage of this interface,
3422 /// \see openmp/device/include/Interface.h
3423 ///
3424 ///{
3425
3426 /// Create a runtime call for kmpc_target_init
3427 ///
3428 /// \param Loc The insert and source location description.
3429 /// \param Attrs Structure containing the default attributes, including
3430 /// numbers of threads and teams to launch the kernel with.
3432 const LocationDescription &Loc,
3434
3435 /// Create a runtime call for kmpc_target_deinit
3436 ///
3437 /// \param Loc The insert and source location description.
3438 /// \param TeamsReductionDataSize The maximal size of all the reduction data
3439 /// for teams reduction.
3440 LLVM_ABI void createTargetDeinit(const LocationDescription &Loc,
3441 int32_t TeamsReductionDataSize = 0);
3442
3443 ///}
3444
3445 /// Helpers to read/write kernel annotations from the IR.
3446 ///
3447 ///{
3448
3449 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
3450 /// is set.
3451 LLVM_ABI static std::pair<int32_t, int32_t>
3452 readThreadBoundsForKernel(const Triple &T, Function &Kernel);
3453 LLVM_ABI static void writeThreadBoundsForKernel(const Triple &T,
3454 Function &Kernel, int32_t LB,
3455 int32_t UB);
3456
3457 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
3458 /// is set.
3459 LLVM_ABI static std::pair<int32_t, int32_t>
3460 readTeamBoundsForKernel(const Triple &T, Function &Kernel);
3461 LLVM_ABI static void writeTeamsForKernel(const Triple &T, Function &Kernel,
3462 int32_t LB, int32_t UB);
3463 ///}
3464
3465private:
3466 // Sets the function attributes expected for the outlined function
3467 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
3468
3469 // Creates the function ID/Address for the given outlined function.
3470 // In the case of an embedded device function the address of the function is
3471 // used, in the case of a non-offload function a constant is created.
3472 Constant *createOutlinedFunctionID(Function *OutlinedFn,
3473 StringRef EntryFnIDName);
3474
3475 // Creates the region entry address for the outlined function
3476 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
3477 StringRef EntryFnName);
3478
3479public:
3480 /// Functions used to generate a function with the given name.
3482 std::function<Expected<Function *>(StringRef FunctionName)>;
3483
3484 /// Create a unique name for the entry function using the source location
3485 /// information of the current target region. The name will be something like:
3486 ///
3487 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
3488 ///
3489 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
3490 /// mangled name of the function that encloses the target region and BB is the
3491 /// line number of the target region. CC is a count added when more than one
3492 /// region is located at the same location.
3493 ///
3494 /// If this target outline function is not an offload entry, we don't need to
3495 /// register it. This may happen if it is guarded by an if clause that is
3496 /// false at compile time, or no target archs have been specified.
3497 ///
3498 /// The created target region ID is used by the runtime library to identify
3499 /// the current target region, so it only has to be unique and not
3500 /// necessarily point to anything. It could be the pointer to the outlined
3501 /// function that implements the target region, but we aren't using that so
3502 /// that the compiler doesn't need to keep that, and could therefore inline
3503 /// the host function if proven worthwhile during optimization. In the other
3504 /// hand, if emitting code for the device, the ID has to be the function
3505 /// address so that it can retrieved from the offloading entry and launched
3506 /// by the runtime library. We also mark the outlined function to have
3507 /// external linkage in case we are emitting code for the device, because
3508 /// these functions will be entry points to the device.
3509 ///
3510 /// \param InfoManager The info manager keeping track of the offload entries
3511 /// \param EntryInfo The entry information about the function
3512 /// \param GenerateFunctionCallback The callback function to generate the code
3513 /// \param OutlinedFunction Pointer to the outlined function
3514 /// \param EntryFnIDName Name of the ID o be created
3516 TargetRegionEntryInfo &EntryInfo,
3517 FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry,
3518 Function *&OutlinedFn, Constant *&OutlinedFnID);
3519
3520 /// Registers the given function and sets up the attribtues of the function
3521 /// Returns the FunctionID.
3522 ///
3523 /// \param InfoManager The info manager keeping track of the offload entries
3524 /// \param EntryInfo The entry information about the function
3525 /// \param OutlinedFunction Pointer to the outlined function
3526 /// \param EntryFnName Name of the outlined function
3527 /// \param EntryFnIDName Name of the ID o be created
3530 Function *OutlinedFunction,
3531 StringRef EntryFnName, StringRef EntryFnIDName);
3532
3533 /// Type of BodyGen to use for region codegen
3534 ///
3535 /// Priv: If device pointer privatization is required, emit the body of the
3536 /// region here. It will have to be duplicated: with and without
3537 /// privatization.
3538 /// DupNoPriv: If we need device pointer privatization, we need
3539 /// to emit the body of the region with no privatization in the 'else' branch
3540 /// of the conditional.
3541 /// NoPriv: If we don't require privatization of device
3542 /// pointers, we emit the body in between the runtime calls. This avoids
3543 /// duplicating the body code.
3545
3546 /// Callback type for creating the map infos for the kernel parameters.
3547 /// \param CodeGenIP is the insertion point where code should be generated,
3548 /// if any.
3551
3552private:
3553 /// Emit the array initialization or deletion portion for user-defined mapper
3554 /// code generation. First, it evaluates whether an array section is mapped
3555 /// and whether the \a MapType instructs to delete this section. If \a IsInit
3556 /// is true, and \a MapType indicates to not delete this array, array
3557 /// initialization code is generated. If \a IsInit is false, and \a MapType
3558 /// indicates to delete this array, array deletion code is generated.
3559 void emitUDMapperArrayInitOrDel(Function *MapperFn, llvm::Value *MapperHandle,
3560 llvm::Value *Base, llvm::Value *Begin,
3561 llvm::Value *Size, llvm::Value *MapType,
3562 llvm::Value *MapName, TypeSize ElementSize,
3563 llvm::BasicBlock *ExitBB, bool IsInit);
3564
3565public:
3566 /// Emit the user-defined mapper function. The code generation follows the
3567 /// pattern in the example below.
3568 /// \code
3569 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
3570 /// void *base, void *begin,
3571 /// int64_t size, int64_t type,
3572 /// void *name = nullptr) {
3573 /// // Allocate space for an array section first or add a base/begin for
3574 /// // pointer dereference.
3575 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
3576 /// !maptype.IsDelete)
3577 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
3578 /// size*sizeof(Ty), clearToFromMember(type));
3579 /// // Map members.
3580 /// for (unsigned i = 0; i < size; i++) {
3581 /// // For each component specified by this mapper:
3582 /// for (auto c : begin[i]->all_components) {
3583 /// if (c.hasMapper())
3584 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin,
3585 /// c.arg_size,
3586 /// c.arg_type, c.arg_name);
3587 /// else
3588 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
3589 /// c.arg_begin, c.arg_size, c.arg_type,
3590 /// c.arg_name);
3591 /// }
3592 /// }
3593 /// // Delete the array section.
3594 /// if (size > 1 && maptype.IsDelete)
3595 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
3596 /// size*sizeof(Ty), clearToFromMember(type));
3597 /// }
3598 /// \endcode
3599 ///
3600 /// \param PrivAndGenMapInfoCB Callback that privatizes code and populates the
3601 /// MapInfos and returns.
3602 /// \param ElemTy DeclareMapper element type.
3603 /// \param FuncName Optional param to specify mapper function name.
3604 /// \param CustomMapperCB Optional callback to generate code related to
3605 /// custom mappers.
3608 InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)>
3609 PrivAndGenMapInfoCB,
3610 llvm::Type *ElemTy, StringRef FuncName,
3611 CustomMapperCallbackTy CustomMapperCB,
3612 bool PreserveMemberOfFlags = false);
3613
3614 /// Generator for '#omp target data'
3615 ///
3616 /// \param Loc The location where the target data construct was encountered.
3617 /// \param AllocaIP The insertion points to be used for allocations.
3618 /// \param CodeGenIP The insertion point at which the target directive code
3619 /// should be placed.
3620 /// \param DeallocBlocks The insertion blocks at which explicit deallocations
3621 /// should be placed, if needed.
3622 /// \param IsBegin If true then emits begin mapper call otherwise emits
3623 /// end mapper call.
3624 /// \param DeviceID Stores the DeviceID from the device clause.
3625 /// \param IfCond Value which corresponds to the if clause condition.
3626 /// \param Info Stores all information realted to the Target Data directive.
3627 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
3628 /// \param CustomMapperCB Callback to generate code related to
3629 /// custom mappers.
3630 /// \param BodyGenCB Optional Callback to generate the region code.
3631 /// \param DeviceAddrCB Optional callback to generate code related to
3632 /// use_device_ptr and use_device_addr.
3634 const LocationDescription &Loc, InsertPointTy AllocaIP,
3635 InsertPointTy CodeGenIP, ArrayRef<BasicBlock *> DeallocBlocks,
3636 Value *DeviceID, Value *IfCond, TargetDataInfo &Info,
3637 GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB,
3638 omp::RuntimeFunction *MapperFunc = nullptr,
3640 BodyGenTy BodyGenType)>
3641 BodyGenCB = nullptr,
3642 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
3643 Value *SrcLocInfo = nullptr);
3644
3646 InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3647 ArrayRef<BasicBlock *> DeallocBlocks)>;
3648
3650 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
3651 InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs)>;
3652
3653 /// Generator for '#omp target'
3654 ///
3655 /// \param Loc where the target data construct was encountered.
3656 /// \param IsOffloadEntry whether it is an offload entry.
3657 /// \param CodeGenIP The insertion point where the call to the outlined
3658 /// function should be emitted.
3659 /// \param DeallocBlocks The insertion points at which explicit deallocations
3660 /// should be placed, if needed.
3661 /// \param Info Stores all information realted to the Target directive.
3662 /// \param EntryInfo The entry information about the function.
3663 /// \param DefaultAttrs Structure containing the default attributes, including
3664 /// numbers of threads and teams to launch the kernel with.
3665 /// \param RuntimeAttrs Structure containing the runtime numbers of threads
3666 /// and teams to launch the kernel with.
3667 /// \param IfCond value of the `if` clause.
3668 /// \param Inputs The input values to the region that will be passed.
3669 /// as arguments to the outlined function.
3670 /// \param BodyGenCB Callback that will generate the region code.
3671 /// \param ArgAccessorFuncCB Callback that will generate accessors
3672 /// instructions for passed in target arguments where neccessary
3673 /// \param CustomMapperCB Callback to generate code related to
3674 /// custom mappers.
3675 /// \param Dependencies A vector of DependData objects that carry
3676 /// dependency information as passed in the depend clause
3677 /// \param HasNowait Whether the target construct has a `nowait` clause or
3678 /// not.
3679 /// \param DynCGroupMem The size of the dynamic groupprivate memory for each
3680 /// cgroup.
3681 /// \param DynCGroupMem The fallback mechanism to execute if the requested
3682 /// cgroup memory cannot be provided.
3684 const LocationDescription &Loc, bool IsOffloadEntry,
3687 ArrayRef<BasicBlock *> DeallocBlocks, TargetDataInfo &Info,
3688 TargetRegionEntryInfo &EntryInfo,
3689 const TargetKernelDefaultAttrs &DefaultAttrs,
3690 const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond,
3691 SmallVectorImpl<Value *> &Inputs, GenMapInfoCallbackTy GenMapInfoCB,
3692 TargetBodyGenCallbackTy BodyGenCB,
3693 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
3694 CustomMapperCallbackTy CustomMapperCB,
3695 const DependenciesInfo &Dependencies = {}, bool HasNowait = false,
3696 Value *DynCGroupMem = nullptr,
3697 omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback =
3699
3700 /// Returns __kmpc_for_static_init_* runtime function for the specified
3701 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
3702 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
3704 bool IVSigned,
3705 bool IsGPUDistribute);
3706
3707 /// Returns __kmpc_dispatch_init_* runtime function for the specified
3708 /// size \a IVSize and sign \a IVSigned.
3710 bool IVSigned);
3711
3712 /// Returns __kmpc_dispatch_next_* runtime function for the specified
3713 /// size \a IVSize and sign \a IVSigned.
3715 bool IVSigned);
3716
3717 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
3718 /// size \a IVSize and sign \a IVSigned.
3720 bool IVSigned);
3721
3722 /// Returns __kmpc_dispatch_deinit runtime function.
3724
3725 /// Declarations for LLVM-IR types (simple, array, function and structure) are
3726 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
3727 /// we provide the declarations, the initializeTypes function will provide the
3728 /// values.
3729 ///
3730 ///{
3731#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
3732#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
3733 ArrayType *VarName##Ty = nullptr; \
3734 PointerType *VarName##PtrTy = nullptr;
3735#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
3736 FunctionType *VarName = nullptr; \
3737 PointerType *VarName##Ptr = nullptr;
3738#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
3739 StructType *VarName = nullptr; \
3740 PointerType *VarName##Ptr = nullptr;
3741#include "llvm/Frontend/OpenMP/OMPKinds.def"
3742
3743 ///}
3744
3745private:
3746 /// Create all simple and struct types exposed by the runtime and remember
3747 /// the llvm::PointerTypes of them for easy access later.
3748 void initializeTypes(Module &M);
3749
3750 /// Common interface for generating entry calls for OMP Directives.
3751 /// if the directive has a region/body, It will set the insertion
3752 /// point to the body
3753 ///
3754 /// \param OMPD Directive to generate entry blocks for
3755 /// \param EntryCall Call to the entry OMP Runtime Function
3756 /// \param ExitBB block where the region ends.
3757 /// \param Conditional indicate if the entry call result will be used
3758 /// to evaluate a conditional of whether a thread will execute
3759 /// body code or not.
3760 ///
3761 /// \return The insertion position in exit block
3762 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
3763 BasicBlock *ExitBB,
3764 bool Conditional = false);
3765
3766 /// Common interface to finalize the region
3767 ///
3768 /// \param OMPD Directive to generate exiting code for
3769 /// \param FinIP Insertion point for emitting Finalization code and exit call.
3770 /// This block must not contain any non-finalization code.
3771 /// \param ExitCall Call to the ending OMP Runtime Function
3772 /// \param HasFinalize indicate if the directive will require finalization
3773 /// and has a finalization callback in the stack that
3774 /// should be called.
3775 ///
3776 /// \return The insertion position in exit block
3777 InsertPointOrErrorTy emitCommonDirectiveExit(omp::Directive OMPD,
3778 InsertPointTy FinIP,
3779 Instruction *ExitCall,
3780 bool HasFinalize = true);
3781
3782 /// Common Interface to generate OMP inlined regions
3783 ///
3784 /// \param OMPD Directive to generate inlined region for
3785 /// \param EntryCall Call to the entry OMP Runtime Function
3786 /// \param ExitCall Call to the ending OMP Runtime Function
3787 /// \param BodyGenCB Body code generation callback.
3788 /// \param FiniCB Finalization Callback. Will be called when finalizing region
3789 /// \param Conditional indicate if the entry call result will be used
3790 /// to evaluate a conditional of whether a thread will execute
3791 /// body code or not.
3792 /// \param HasFinalize indicate if the directive will require finalization
3793 /// and has a finalization callback in the stack that
3794 /// should be called.
3795 /// \param IsCancellable if HasFinalize is set to true, indicate if the
3796 /// the directive should be cancellable.
3797 /// \return The insertion point after the region
3799 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
3800 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
3801 FinalizeCallbackTy FiniCB, bool Conditional = false,
3802 bool HasFinalize = true, bool IsCancellable = false);
3803
3804 /// Get the platform-specific name separator.
3805 /// \param Parts different parts of the final name that needs separation
3806 /// \param FirstSeparator First separator used between the initial two
3807 /// parts of the name.
3808 /// \param Separator separator used between all of the rest consecutive
3809 /// parts of the name
3810 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
3811 StringRef FirstSeparator,
3812 StringRef Separator);
3813
3814 /// Returns corresponding lock object for the specified critical region
3815 /// name. If the lock object does not exist it is created, otherwise the
3816 /// reference to the existing copy is returned.
3817 /// \param CriticalName Name of the critical region.
3818 ///
3819 Value *getOMPCriticalRegionLock(StringRef CriticalName);
3820
3821 /// Callback type for Atomic Expression update
3822 /// ex:
3823 /// \code{.cpp}
3824 /// unsigned x = 0;
3825 /// #pragma omp atomic update
3826 /// x = Expr(x_old); //Expr() is any legal operation
3827 /// \endcode
3828 ///
3829 /// \param XOld the value of the atomic memory address to use for update
3830 /// \param IRB reference to the IRBuilder to use
3831 ///
3832 /// \returns Value to update X to.
3833 using AtomicUpdateCallbackTy =
3834 const function_ref<Expected<Value *>(Value *XOld, IRBuilder<> &IRB)>;
3835
3836private:
3837 enum AtomicKind { Read, Write, Update, Capture, Compare };
3838
3839 /// Determine whether to emit flush or not
3840 ///
3841 /// \param Loc The insert and source location description.
3842 /// \param AO The required atomic ordering
3843 /// \param AK The OpenMP atomic operation kind used.
3844 ///
3845 /// \returns wether a flush was emitted or not
3846 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
3847 AtomicOrdering AO, AtomicKind AK);
3848
3849 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
3850 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
3851 /// Only Scalar data types.
3852 ///
3853 /// \param AllocaIP The insertion point to be used for alloca
3854 /// instructions.
3855 /// \param X The target atomic pointer to be updated
3856 /// \param XElemTy The element type of the atomic pointer.
3857 /// \param Expr The value to update X with.
3858 /// \param AO Atomic ordering of the generated atomic
3859 /// instructions.
3860 /// \param RMWOp The binary operation used for update. If
3861 /// operation is not supported by atomicRMW,
3862 /// or belong to {FADD, FSUB, BAD_BINOP}.
3863 /// Then a `cmpExch` based atomic will be generated.
3864 /// \param UpdateOp Code generator for complex expressions that cannot be
3865 /// expressed through atomicrmw instruction.
3866 /// \param VolatileX true if \a X volatile?
3867 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
3868 /// update expression, false otherwise.
3869 /// (e.g. true for X = X BinOp Expr)
3870 ///
3871 /// \returns A pair of the old value of X before the update, and the value
3872 /// used for the update.
3873 Expected<std::pair<Value *, Value *>>
3874 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
3876 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
3877 bool IsXBinopExpr, bool IsIgnoreDenormalMode,
3878 bool IsFineGrainedMemory, bool IsRemoteMemory);
3879
3880 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
3881 ///
3882 /// \Return The instruction
3883 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3884 AtomicRMWInst::BinOp RMWOp);
3885
3886 bool IsFinalized;
3887
3888public:
3889 /// a struct to pack relevant information while generating atomic Ops
3891 Value *Var = nullptr;
3892 Type *ElemTy = nullptr;
3893 bool IsSigned = false;
3894 bool IsVolatile = false;
3895 };
3896
3897 /// Emit atomic Read for : V = X --- Only Scalar data types.
3898 ///
3899 /// \param Loc The insert and source location description.
3900 /// \param X The target pointer to be atomically read
3901 /// \param V Memory address where to store atomically read
3902 /// value
3903 /// \param AO Atomic ordering of the generated atomic
3904 /// instructions.
3905 /// \param AllocaIP Insert point for allocas
3906 //
3907 /// \return Insertion point after generated atomic read IR.
3910 AtomicOrdering AO,
3911 InsertPointTy AllocaIP);
3912
3913 /// Emit atomic write for : X = Expr --- Only Scalar data types.
3914 ///
3915 /// \param Loc The insert and source location description.
3916 /// \param X The target pointer to be atomically written to
3917 /// \param Expr The value to store.
3918 /// \param AO Atomic ordering of the generated atomic
3919 /// instructions.
3920 /// \param AllocaIP Insert point for allocas
3921 ///
3922 /// \return Insertion point after generated atomic Write IR.
3924 AtomicOpValue &X, Value *Expr,
3925 AtomicOrdering AO,
3926 InsertPointTy AllocaIP);
3927
3928 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
3929 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
3930 /// Only Scalar data types.
3931 ///
3932 /// \param Loc The insert and source location description.
3933 /// \param AllocaIP The insertion point to be used for alloca instructions.
3934 /// \param X The target atomic pointer to be updated
3935 /// \param Expr The value to update X with.
3936 /// \param AO Atomic ordering of the generated atomic instructions.
3937 /// \param RMWOp The binary operation used for update. If operation
3938 /// is not supported by atomicRMW, or belong to
3939 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
3940 /// atomic will be generated.
3941 /// \param UpdateOp Code generator for complex expressions that cannot be
3942 /// expressed through atomicrmw instruction.
3943 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
3944 /// update expression, false otherwise.
3945 /// (e.g. true for X = X BinOp Expr)
3946 ///
3947 /// \return Insertion point after generated atomic update IR.
3950 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3951 AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
3952 bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
3953 bool IsRemoteMemory = false);
3954
3955 /// Emit atomic update for constructs: --- Only Scalar data types
3956 /// V = X; X = X BinOp Expr ,
3957 /// X = X BinOp Expr; V = X,
3958 /// V = X; X = Expr BinOp X,
3959 /// X = Expr BinOp X; V = X,
3960 /// V = X; X = UpdateOp(X),
3961 /// X = UpdateOp(X); V = X,
3962 ///
3963 /// \param Loc The insert and source location description.
3964 /// \param AllocaIP The insertion point to be used for alloca instructions.
3965 /// \param X The target atomic pointer to be updated
3966 /// \param V Memory address where to store captured value
3967 /// \param Expr The value to update X with.
3968 /// \param AO Atomic ordering of the generated atomic instructions
3969 /// \param RMWOp The binary operation used for update. If
3970 /// operation is not supported by atomicRMW, or belong to
3971 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
3972 /// atomic will be generated.
3973 /// \param UpdateOp Code generator for complex expressions that cannot be
3974 /// expressed through atomicrmw instruction.
3975 /// \param UpdateExpr true if X is an in place update of the form
3976 /// X = X BinOp Expr or X = Expr BinOp X
3977 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
3978 /// update expression, false otherwise.
3979 /// (e.g. true for X = X BinOp Expr)
3980 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
3981 /// 'v', not an updated one.
3982 ///
3983 /// \return Insertion point after generated atomic capture IR.
3986 AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3987 AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
3988 bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
3989 bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
3990 bool IsRemoteMemory = false);
3991
3992 /// Emit atomic compare for constructs: --- Only scalar data types
3993 /// cond-expr-stmt:
3994 /// x = x ordop expr ? expr : x;
3995 /// x = expr ordop x ? expr : x;
3996 /// x = x == e ? d : x;
3997 /// x = e == x ? d : x; (this one is not in the spec)
3998 /// cond-update-stmt:
3999 /// if (x ordop expr) { x = expr; }
4000 /// if (expr ordop x) { x = expr; }
4001 /// if (x == e) { x = d; }
4002 /// if (e == x) { x = d; } (this one is not in the spec)
4003 /// conditional-update-capture-atomic:
4004 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
4005 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
4006 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
4007 /// IsFailOnly=true)
4008 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
4009 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
4010 /// IsFailOnly=true)
4011 ///
4012 /// \param Loc The insert and source location description.
4013 /// \param X The target atomic pointer to be updated.
4014 /// \param V Memory address where to store captured value (for
4015 /// compare capture only).
4016 /// \param R Memory address where to store comparison result
4017 /// (for compare capture with '==' only).
4018 /// \param E The expected value ('e') for forms that use an
4019 /// equality comparison or an expression ('expr') for
4020 /// forms that use 'ordop' (logically an atomic maximum or
4021 /// minimum).
4022 /// \param D The desired value for forms that use an equality
4023 /// comparison. If forms that use 'ordop', it should be
4024 /// \p nullptr.
4025 /// \param AO Atomic ordering of the generated atomic instructions.
4026 /// \param Op Atomic compare operation. It can only be ==, <, or >.
4027 /// \param IsXBinopExpr True if the conditional statement is in the form where
4028 /// x is on LHS. It only matters for < or >.
4029 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
4030 /// 'v', not an updated one (for compare capture
4031 /// only).
4032 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
4033 /// only when the comparison fails. This is only valid for
4034 /// the case the comparison is '=='.
4035 ///
4036 /// \return Insertion point after generated atomic capture IR.
4037 /// Whether to emit special handling for IEEE 754 -0.0 == +0.0 in
4038 /// atomic compare operations on floating-point types.
4039 bool HandleFPNegZero = false;
4040
4041 /// Set whether atomic compare should handle -0.0/+0.0 equivalence.
4042 /// Returns the previous value so callers can save and restore it.
4043 bool setHandleFPNegZero(bool FPNegZero) {
4044 bool Old = HandleFPNegZero;
4045 HandleFPNegZero = FPNegZero;
4046 return Old;
4047 }
4048
4050 const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
4051 AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
4052 omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
4053 bool IsFailOnly, bool IsWeak = false);
4055 const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
4056 AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
4057 omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
4058 bool IsFailOnly, AtomicOrdering Failure, bool IsWeak = false);
4059
4060 /// Create the control flow structure of a canonical OpenMP loop.
4061 ///
4062 /// The emitted loop will be disconnected, i.e. no edge to the loop's
4063 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
4064 /// IRBuilder location is not preserved.
4065 ///
4066 /// \param DL DebugLoc used for the instructions in the skeleton.
4067 /// \param TripCount Value to be used for the trip count.
4068 /// \param F Function in which to insert the BasicBlocks.
4069 /// \param PreInsertBefore Where to insert BBs that execute before the body,
4070 /// typically the body itself.
4071 /// \param PostInsertBefore Where to insert BBs that execute after the body.
4072 /// \param Name Base name used to derive BB
4073 /// and instruction names.
4074 ///
4075 /// \returns The CanonicalLoopInfo that represents the emitted loop.
4077 Function *F,
4078 BasicBlock *PreInsertBefore,
4079 BasicBlock *PostInsertBefore,
4080 const Twine &Name = {});
4081 /// OMP Offload Info Metadata name string
4082 const std::string ompOffloadInfoName = "omp_offload.info";
4083
4084 /// Loads all the offload entries information from the host IR
4085 /// metadata. This function is only meant to be used with device code
4086 /// generation.
4087 ///
4088 /// \param M Module to load Metadata info from. Module passed maybe
4089 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
4091
4092 /// Loads all the offload entries information from the host IR
4093 /// metadata read from the file passed in as the HostFilePath argument. This
4094 /// function is only meant to be used with device code generation.
4095 ///
4096 /// \param HostFilePath The path to the host IR file,
4097 /// used to load in offload metadata for the device, allowing host and device
4098 /// to maintain the same metadata mapping.
4100 StringRef HostFilePath);
4101
4102 /// Gets (if variable with the given name already exist) or creates
4103 /// internal global variable with the specified Name. The created variable has
4104 /// linkage CommonLinkage by default and is initialized by null value.
4105 /// \param Ty Type of the global variable. If it is exist already the type
4106 /// must be the same.
4107 /// \param Name Name of the variable.
4110 std::optional<unsigned> AddressSpace = {});
4111
4113 InsertPointTy BodyIP, llvm::Value *LinearIV)>;
4114
4115 /// Create a canonical iterator loop at the current insertion point.
4116 ///
4117 /// This helper splits the current block and builds a canonical loop
4118 /// using createLoopSkeleton(). The resulting control flow looks like:
4119 ///
4120 /// CurBB -> Preheader -> Header -> Body -> Latch -> After -> ContBB
4121 ///
4122 /// The body of the loop is produced by calling \p BodyGen with the insertion
4123 /// point for the loop body and the induction variable.
4124 /// Unlike createCanonicalLoop(), this function is intended for \p BodyGen
4125 /// that may perform region lowering (e.g., translating MLIR regions) and are
4126 /// not guaranteed to preserve the canonical skeleton's body terminator. In
4127 /// particular:
4128 ///
4129 /// - The skeleton’s unconditional branch from the loop body is removed
4130 /// before invoking \p BodyGen.
4131 /// - \p BodyGen may freely emit instructions and temporarily introduce
4132 /// control flow.
4133 /// - If the loop body does not end with a terminator after \p BodyGen
4134 /// returns, a branch to the latch is inserted to restore canonical form.
4135 ///
4136 /// \param Loc The location where the iterator modifier was encountered.
4137 /// \param TripCount Number of loop iterations.
4138 /// \param BodyGen Callback to generate the loop body.
4139 /// \param Name Base name used for creating the loop
4140 /// \returns The insertion position *after* the iterator loop
4143 IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator");
4144
4145 /// Kind of parameter in a function with 'declare simd' directive.
4154
4155 /// Attribute set of the `declare simd` parameter.
4162
4168
4169 /// Emit x86 vector-function ABI attributes for a `declare simd` function.
4170 ///
4171 /// Generates and attaches `_ZGV*` vector function ABI attributes to \p Fn
4172 /// following the x86 vector ABI used by OpenMP `declare simd`. For each
4173 /// supported ISA (SSE, AVX, AVX2, AVX512) and masking variant, this
4174 /// constructs the appropriate mangled vector-function name and adds it as a
4175 /// function attribute.
4176 ///
4177 /// \param Fn The scalar function to which vector-function attributes
4178 /// are attached.
4179 /// \param NumElements Number of elements used to derive the vector length
4180 /// when
4181 /// \p VLENVal is not specified.
4182 /// \param VLENVal User provided vector length.
4183 /// \param ParamAttrs Array of attribute set of the `declare simd` parameter.
4184 /// \param Branch `undefined`, `inbranch` or `notinbranch` clause.
4186 llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal,
4188
4189 /// Emit AArch64 vector-function ABI attributes for a `declare simd` function.
4190 ///
4191 /// Generates and attaches `_ZGV*` vector function ABI attributes to \p Fn
4192 /// following the AArch64 vector-function ABI. The emitted names depend on the
4193 /// selected ISA, user-specified vector length, parameter attribute mangling,
4194 /// and the declare simd branch clause.
4195 ///
4196 /// \param Fn The scalar function to which vector-function
4197 /// attributes are attached.
4198 /// \param VLENVal User provided vector length.
4199 /// \param ParamAttrs Array of attribute set of the `declare simd`
4200 /// parameter.
4201 /// \param Branch `undefined`, `inbranch` or `notinbranch`
4202 /// clause.
4203 /// \param ISA `'n'` for Advanced SIMD or `'s'` for SVE.
4204 /// \param NarrowestDataSize Narrowest data size in bits used to infer the
4205 /// default vector length when \p VLENVal is
4206 /// absent.
4207 /// \param OutputBecomesInput Whether result values are represented as input
4208 /// parameters in the emitted vector-function ABI
4209 /// name.
4211 llvm::Function *Fn, unsigned VLENVal,
4213 char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput);
4214};
4215
4216/// Class to represented the control flow structure of an OpenMP canonical loop.
4217///
4218/// The control-flow structure is standardized for easy consumption by
4219/// directives associated with loops. For instance, the worksharing-loop
4220/// construct may change this control flow such that each loop iteration is
4221/// executed on only one thread. The constraints of a canonical loop in brief
4222/// are:
4223///
4224/// * The number of loop iterations must have been computed before entering the
4225/// loop.
4226///
4227/// * Has an (unsigned) logical induction variable that starts at zero and
4228/// increments by one.
4229///
4230/// * The loop's CFG itself has no side-effects. The OpenMP specification
4231/// itself allows side-effects, but the order in which they happen, including
4232/// how often or whether at all, is unspecified. We expect that the frontend
4233/// will emit those side-effect instructions somewhere (e.g. before the loop)
4234/// such that the CanonicalLoopInfo itself can be side-effect free.
4235///
4236/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
4237/// execution of a loop body that satifies these constraints. It does NOT
4238/// represent arbitrary SESE regions that happen to contain a loop. Do not use
4239/// CanonicalLoopInfo for such purposes.
4240///
4241/// The control flow can be described as follows:
4242///
4243/// Preheader
4244/// |
4245/// /-> Header
4246/// | |
4247/// | Cond---\
4248/// | | |
4249/// | Body |
4250/// | | | |
4251/// | <...> |
4252/// | | | |
4253/// \--Latch |
4254/// |
4255/// Exit
4256/// |
4257/// After
4258///
4259/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
4260/// including) and end at AfterIP (at the After's first instruction, excluding).
4261/// That is, instructions in the Preheader and After blocks (except the
4262/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
4263/// side-effects. Typically, the Preheader is used to compute the loop's trip
4264/// count. The instructions from BodyIP (at the Body block's first instruction,
4265/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
4266/// control and thus can have side-effects. The body block is the single entry
4267/// point into the loop body, which may contain arbitrary control flow as long
4268/// as all control paths eventually branch to the Latch block.
4269///
4270/// TODO: Consider adding another standardized BasicBlock between Body CFG and
4271/// Latch to guarantee that there is only a single edge to the latch. It would
4272/// make loop transformations easier to not needing to consider multiple
4273/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
4274/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
4275/// executes after each body iteration.
4276///
4277/// There must be no loop-carried dependencies through llvm::Values. This is
4278/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
4279/// for the induction variable.
4280///
4281/// All code in Header, Cond, Latch and Exit (plus the terminator of the
4282/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
4283/// by assertOK(). They are expected to not be modified unless explicitly
4284/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
4285/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
4286/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
4287/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
4288/// anymore as its underlying control flow may not exist anymore.
4289/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
4290/// may also return a new CanonicalLoopInfo that can be passed to other
4291/// loop-associated construct implementing methods. These loop-transforming
4292/// methods may either create a new CanonicalLoopInfo usually using
4293/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
4294/// modify one of the input CanonicalLoopInfo and return it as representing the
4295/// modified loop. What is done is an implementation detail of
4296/// transformation-implementing method and callers should always assume that the
4297/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
4298/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
4299/// created by createCanonicalLoop, such that transforming methods do not have
4300/// to special case where the CanonicalLoopInfo originated from.
4301///
4302/// Generally, methods consuming CanonicalLoopInfo do not need an
4303/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
4304/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
4305/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
4306/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
4307/// any InsertPoint in the Preheader, After or Block can still be used after
4308/// calling such a method.
4309///
4310/// TODO: Provide mechanisms for exception handling and cancellation points.
4311///
4312/// Defined outside OpenMPIRBuilder because nested classes cannot be
4313/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
4315 friend class OpenMPIRBuilder;
4316
4317private:
4318 BasicBlock *Header = nullptr;
4319 BasicBlock *Cond = nullptr;
4320 BasicBlock *Latch = nullptr;
4321 BasicBlock *Exit = nullptr;
4322
4323 // Hold the MLIR value for the `lastiter` of the canonical loop.
4324 Value *LastIter = nullptr;
4325
4326 /// Add the control blocks of this loop to \p BBs.
4327 ///
4328 /// This does not include any block from the body, including the one returned
4329 /// by getBody().
4330 ///
4331 /// FIXME: This currently includes the Preheader and After blocks even though
4332 /// their content is (mostly) not under CanonicalLoopInfo's control.
4333 /// Re-evaluated whether this makes sense.
4334 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
4335
4336 /// Sets the number of loop iterations to the given value. This value must be
4337 /// valid in the condition block (i.e., defined in the preheader) and is
4338 /// interpreted as an unsigned integer.
4339 void setTripCount(Value *TripCount);
4340
4341 /// Replace all uses of the canonical induction variable in the loop body with
4342 /// a new one.
4343 ///
4344 /// The intended use case is to update the induction variable for an updated
4345 /// iteration space such that it can stay normalized in the 0...tripcount-1
4346 /// range.
4347 ///
4348 /// The \p Updater is called with the (presumable updated) current normalized
4349 /// induction variable and is expected to return the value that uses of the
4350 /// pre-updated induction values should use instead, typically dependent on
4351 /// the new induction variable. This is a lambda (instead of e.g. just passing
4352 /// the new value) to be able to distinguish the uses of the pre-updated
4353 /// induction variable and uses of the induction varible to compute the
4354 /// updated induction variable value.
4355 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
4356
4357public:
4358 /// Sets the last iteration variable for this loop.
4359 void setLastIter(Value *IterVar) { LastIter = std::move(IterVar); }
4360
4361 /// Returns the last iteration variable for this loop.
4362 /// Certain use-cases (like translation of linear clause) may access
4363 /// this variable even after a loop transformation. Hence, do not guard
4364 /// this getter function by `isValid`. It is the responsibility of the
4365 /// callee to ensure this functionality is not invoked by a non-outlined
4366 /// CanonicalLoopInfo object (in which case, `setLastIter` will never be
4367 /// invoked and `LastIter` will be by default `nullptr`).
4368 Value *getLastIter() { return LastIter; }
4369
4370 /// Returns whether this object currently represents the IR of a loop. If
4371 /// returning false, it may have been consumed by a loop transformation or not
4372 /// been initialized. Do not use in this case;
4373 bool isValid() const { return Header; }
4374
4375 /// The preheader ensures that there is only a single edge entering the loop.
4376 /// Code that must be execute before any loop iteration can be emitted here,
4377 /// such as computing the loop trip count and begin lifetime markers. Code in
4378 /// the preheader is not considered part of the canonical loop.
4380
4381 /// The header is the entry for each iteration. In the canonical control flow,
4382 /// it only contains the PHINode for the induction variable.
4384 assert(isValid() && "Requires a valid canonical loop");
4385 return Header;
4386 }
4387
4388 /// The condition block computes whether there is another loop iteration. If
4389 /// yes, branches to the body; otherwise to the exit block.
4391 assert(isValid() && "Requires a valid canonical loop");
4392 return Cond;
4393 }
4394
4395 /// The body block is the single entry for a loop iteration and not controlled
4396 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
4397 /// eventually branch to the \p Latch block.
4399 assert(isValid() && "Requires a valid canonical loop");
4400 return cast<CondBrInst>(Cond->getTerminator())->getSuccessor(0);
4401 }
4402
4403 /// Reaching the latch indicates the end of the loop body code. In the
4404 /// canonical control flow, it only contains the increment of the induction
4405 /// variable.
4407 assert(isValid() && "Requires a valid canonical loop");
4408 return Latch;
4409 }
4410
4411 /// Reaching the exit indicates no more iterations are being executed.
4413 assert(isValid() && "Requires a valid canonical loop");
4414 return Exit;
4415 }
4416
4417 /// The after block is intended for clean-up code such as lifetime end
4418 /// markers. It is separate from the exit block to ensure, analogous to the
4419 /// preheader, it having just a single entry edge and being free from PHI
4420 /// nodes should there be multiple loop exits (such as from break
4421 /// statements/cancellations).
4423 assert(isValid() && "Requires a valid canonical loop");
4424 return Exit->getSingleSuccessor();
4425 }
4426
4427 /// Returns the llvm::Value containing the number of loop iterations. It must
4428 /// be valid in the preheader and always interpreted as an unsigned integer of
4429 /// any bit-width.
4431 assert(isValid() && "Requires a valid canonical loop");
4432 Instruction *CmpI = &Cond->front();
4433 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
4434 return CmpI->getOperand(1);
4435 }
4436
4437 /// Returns the instruction representing the current logical induction
4438 /// variable. Always unsigned, always starting at 0 with an increment of one.
4440 assert(isValid() && "Requires a valid canonical loop");
4441 Instruction *IndVarPHI = &Header->front();
4442 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
4443 return IndVarPHI;
4444 }
4445
4446 /// Return the type of the induction variable (and the trip count).
4448 assert(isValid() && "Requires a valid canonical loop");
4449 return getIndVar()->getType();
4450 }
4451
4452 /// Return the insertion point for user code before the loop.
4454 assert(isValid() && "Requires a valid canonical loop");
4455 BasicBlock *Preheader = getPreheader();
4456 return {Preheader, std::prev(Preheader->end())};
4457 };
4458
4459 /// Return the insertion point for user code in the body.
4461 assert(isValid() && "Requires a valid canonical loop");
4462 BasicBlock *Body = getBody();
4463 return {Body, Body->begin()};
4464 };
4465
4466 /// Return the insertion point for user code after the loop.
4468 assert(isValid() && "Requires a valid canonical loop");
4469 BasicBlock *After = getAfter();
4470 return {After, After->begin()};
4471 };
4472
4474 assert(isValid() && "Requires a valid canonical loop");
4475 return Header->getParent();
4476 }
4477
4478 /// Consistency self-check.
4479 LLVM_ABI void assertOK() const;
4480
4481 /// Invalidate this loop. That is, the underlying IR does not fulfill the
4482 /// requirements of an OpenMP canonical loop anymore.
4483 LLVM_ABI void invalidate();
4484};
4485
4486/// ScanInfo holds the information to assist in lowering of Scan reduction.
4487/// Before lowering, the body of the for loop specifying scan reduction is
4488/// expected to have the following structure
4489///
4490/// Loop Body Entry
4491/// |
4492/// Code before the scan directive
4493/// |
4494/// Scan Directive
4495/// |
4496/// Code after the scan directive
4497/// |
4498/// Loop Body Exit
4499/// When `createCanonicalScanLoops` is executed, the bodyGen callback of it
4500/// transforms the body to:
4501///
4502/// Loop Body Entry
4503/// |
4504/// OMPScanDispatch
4505///
4506/// OMPBeforeScanBlock
4507/// |
4508/// OMPScanLoopExit
4509/// |
4510/// Loop Body Exit
4511///
4512/// The insert point is updated to the first insert point of OMPBeforeScanBlock.
4513/// It dominates the control flow of code generated until
4514/// scan directive is encountered and OMPAfterScanBlock dominates the
4515/// control flow of code generated after scan is encountered. The successor
4516/// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
4517/// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
4518/// exclusive or inclusive scan. This jump is added when `createScan` is
4519/// executed. If input loop is being generated, if it is inclusive scan,
4520/// `OMPAfterScanBlock` succeeds `OMPScanDispatch` , if exclusive,
4521/// `OMPBeforeScanBlock` succeeds `OMPDispatch` and vice versa for scan loop. At
4522/// the end of the input loop, temporary buffer is populated and at the
4523/// beginning of the scan loop, temporary buffer is read. After scan directive
4524/// is encountered, insertion point is updated to `OMPAfterScanBlock` as it is
4525/// expected to dominate the code after the scan directive. Both Before and
4526/// After scan blocks are succeeded by `OMPScanLoopExit`.
4527/// Temporary buffer allocations are done in `ScanLoopInit` block before the
4528/// lowering of for-loop. The results are copied back to reduction variable in
4529/// `ScanLoopFinish` block.
4531public:
4532 /// Dominates the body of the loop before scan directive
4534
4535 /// Dominates the body of the loop before scan directive
4537
4538 /// Controls the flow to before or after scan blocks
4540
4541 /// Exit block of loop body
4543
4544 /// Block before loop body where scan initializations are done
4546
4547 /// Block after loop body where scan finalizations are done
4549
4550 /// If true, it indicates Input phase is lowered; else it indicates
4551 /// ScanPhase is lowered
4552 bool OMPFirstScanLoop = false;
4553
4554 /// Maps the private reduction variable to the pointer of the temporary
4555 /// buffer
4557
4558 /// Keeps track of value of iteration variable for input/scan loop to be
4559 /// used for Scan directive lowering
4560 llvm::Value *IV = nullptr;
4561
4562 /// Stores the span of canonical loop being lowered to be used for temporary
4563 /// buffer allocation or Finalization.
4564 llvm::Value *Span = nullptr;
4565
4569 ScanInfo(ScanInfo &) = delete;
4570 ScanInfo &operator=(const ScanInfo &) = delete;
4571
4572 ~ScanInfo() { delete (ScanBuffPtrs); }
4573};
4574
4575} // end namespace llvm
4576
4577#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
arc branch finalize
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
#define X(NUM, ENUM, NAME)
Definition ELF.h:854
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:215
DXIL Finalize Linkage
Hexagon Hardware Loops
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
#define T
This file defines constans and helpers used when dealing with OpenMP.
Provides definitions for Target specific Grid Values.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SmallPtrSet< BasicBlock *, 0 > BlockSet
This file implements a set that has insertion order iteration characteristics.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Align AtomicAlign
Definition Atomic.h:23
bool UseLibcall
Definition Atomic.h:25
IRBuilderBase * Builder
Definition Atomic.h:19
uint64_t AtomicSizeInBits
Definition Atomic.h:21
uint64_t ValueSizeInBits
Definition Atomic.h:22
IRBuilderBase::InsertPoint AllocaIP
Definition Atomic.h:26
Align ValueAlign
Definition Atomic.h:24
BinOp
This enumeration lists the possible modifications atomicrmw can make.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Value * getLastIter()
Returns the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Utility class for extracting code into a new function.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition GlobalValue.h:52
InsertPoint - A saved insertion point.
Definition IRBuilder.h:246
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
Class to represent integer types.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage, const std::string &VarName)
static bool classof(const OffloadEntryInfo *Info)
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OffloadingEntryInfoKinds getKind() const
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
static bool classof(const OffloadEntryInfo *Info)
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseNoHost
The target is marked for non-host devices.
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
@ OMPTargetDeviceClauseNone
The target is marked as having no clause.
@ OMPTargetDeviceClauseHost
The target is marked for host devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalVarEntryNone
Mark the entry as having no declare target entry kind.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
StringRef separator() const
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
void setFirstSeparator(StringRef FS)
void setDefaultTargetAS(unsigned AS)
StringRef firstSeparator() const
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
SmallVector< Triple > TargetTriples
When compilation is being done for the OpenMP host (i.e.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
LLVM_ABI bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
CallingConv::ID getRuntimeCC() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
void setOpenMPOffloadMandatory(bool Value)
void setIsTargetDevice(bool Value)
void setSeparator(StringRef S)
void setRuntimeCC(CallingConv::ID CC)
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
std::optional< omp::GV > GridValue
LLVM_ABI bool hasRequiresReverseOffload() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
llvm::AllocaInst * CreateAlloca(llvm::Type *Ty, const llvm::Twine &Name) const override
void decorateWithTBAA(llvm::Instruction *I) override
AtomicInfo(IRBuilder<> *Builder, llvm::Type *Ty, uint64_t AtomicSizeInBits, uint64_t ValueSizeInBits, llvm::Align AtomicAlign, llvm::Align ValueAlign, bool UseLibcall, IRBuilderBase::InsertPoint AllocaIP, llvm::Value *AtomicVar)
llvm::Value * getAtomicPointer() const override
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
SmallMapVector< const Value *, std::pair< Value *, Value * >, 4 > DevicePtrInfoMap
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool HasNoWait
Whether the target ... data directive has a nowait clause.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
LLVM_ABI CallInst * createOMPAllocShared(const LocationDescription &Loc, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_alloc_shared.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
EvalKind
Enum class for reduction evaluation types scalar, complex and aggregate.
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, bool IsWeak=false)
std::function< InsertPointTy(InsertPointTy CodeGenIP, unsigned Index, Value **LHS, Value **RHS, Function *CurFn)> ReductionGenClangCBTy
ReductionGen CallBack for Clang.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait)
Generator for 'omp scope'.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
std::function< InsertPointOrErrorTy( InsertPointTy CodeGenIP, Value *LHS, Value *RHS, Value *&Res)> ReductionGenCBTy
ReductionGen CallBack for MLIR.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
void setConfig(OpenMPIRBuilderConfig C)
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
bool setHandleFPNegZero(bool FPNegZero)
Set whether atomic compare should handle -0.0/+0.0 equivalence.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
SmallVector< uint64_t, 4 > MapDimArrayTy
std::function< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> StorableBodyGenCallbackTy
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
SmallVector< Constant *, 4 > MapNamesArrayTy
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> TargetBodyGenCallbackTy
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
bool HandleFPNegZero
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
SmallVector< omp::OpenMPOffloadMappingFlags, 4 > MapFlagsArrayTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB, bool PreserveMemberOfFlags=false)
Emit the user-defined mapper function.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
SmallVector< MapValuesArrayTy, 4 > MapNonContiguousArrayTy
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< DeviceInfoTy, 4 > MapDeviceInfoArrayTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={}, ArrayRef< BasicBlock * > DeallocBlocks={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
std::function< InsertPointOrErrorTy( InsertPointTy, Value *ByRefVal, Value *&Res)> ReductionGenDataPtrPtrCBTy
void addOutlineInfo(std::unique_ptr< OutlineInfo > &&OI)
Add a new region that will be outlined later.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, bool IsSPMD=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
SmallVector< Value *, 4 > MapValuesArrayTy
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
LLVM_ABI CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy getInsertionPoint()
}
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPFreeShared(const LocationDescription &Loc, Value *Addr, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_free_shared.
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< InsertPointOrErrorTy( InsertPointTy, Type *, Value *, Value *)> ReductionGenAtomicCBTy
Functions used to generate atomic reductions.
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
DeclareSimdKindTy
Kind of parameter in a function with 'declare simd' directive.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
SmallVector< std::unique_ptr< OutlineInfo >, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0)
Create a runtime call for kmpc_target_deinit.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< InsertPointTy > DeallocIPs)> TargetGenArgAccessorsCallbackTy
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
ScanInfo & operator=(const ScanInfo &)=delete
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
ScanInfo(ScanInfo &)=delete
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
Definition SetVector.h:57
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:128
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Class to represent struct types.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
Value * getOperand(unsigned i) const
Definition User.h:207
See the file comment.
Definition ValueMap.h:84
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
Value handle that is nullable, but tries to track the Value.
An efficient, type-erasing, non-owning reference to a callable.
The virtual file system interface.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RTLDependenceKindTy
Dependence kind for RTL.
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
auto cast_or_null(const Y &Val)
Definition Casting.h:714
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
Attribute set of the declare simd parameter.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
DependenciesInfo(SmallVector< DependData > D)
const omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
const bool IsCancellable
Flag to indicate if the directive is cancellable.
LLVM_ABI Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
FinalizationInfo(FinalizeCallbackTy FiniCB, omp::Directive DK, bool IsCancellable)
LLVM_ABI Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
LocationDescription(const InsertPointTy &IP)
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
LocationDescription(const IRBuilderBase &IRB)
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
virtual std::unique_ptr< CodeExtractor > createCodeExtractor(ArrayRef< BasicBlock * > Blocks, bool ArgsInZeroAddressSpace, Twine Suffix=Twine(""))
Create a CodeExtractor instance based on the information stored in this structure,...
std::function< void(Function &)> PostOutlineCBTy
SmallVector< BasicBlock * > OuterDeallocBBs
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, EvalKind EvaluationKind, ReductionGenCBTy ReductionGen, ReductionGenClangCBTy ReductionGenClang, ReductionGenAtomicCBTy AtomicReductionGen, ReductionGenDataPtrPtrCBTy DataPtrPtrGen, Type *ByRefAllocatedType=nullptr, Type *ByRefElementType=nullptr)
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
ReductionInfo(Value *PrivateVariable)
Type * ByRefAllocatedType
For by-ref reductions, we need to keep track of 2 extra types that are potentially different:
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, Value *SizesArray, Value *MapTypesArray, Value *MapTypesArrayEnd, Value *MappersArray, Value *MapNamesArray)
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, Value *NumIterations, ArrayRef< Value * > NumTeams, ArrayRef< Value * > NumThreads, Value *DynCGroupMem, bool HasNoWait, bool StrictBlocksAndThreads, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback)
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool StrictBlocksAndThreads
True if the kernel strictly requires the number of blocks and threads above to run.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
bool operator<(const TargetRegionEntryInfo &RHS) const
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...