LLVM 19.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
23#include <forward_list>
24#include <map>
25#include <optional>
26
27namespace llvm {
28class CanonicalLoopInfo;
29struct TargetRegionEntryInfo;
30class OffloadEntriesInfoManager;
31class OpenMPIRBuilder;
32
33/// Move the instruction after an InsertPoint to the beginning of another
34/// BasicBlock.
35///
36/// The instructions after \p IP are moved to the beginning of \p New which must
37/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
38/// \p New will be added such that there is no semantic change. Otherwise, the
39/// \p IP insert block remains degenerate and it is up to the caller to insert a
40/// terminator.
41void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
42 bool CreateBranch);
43
44/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
45/// insert location will stick to after the instruction before the insertion
46/// point (instead of moving with the instruction the InsertPoint stores
47/// internally).
48void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
49
50/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
51/// (missing the terminator).
52///
53/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
54/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
55/// is true, a branch to the new successor will new created such that
56/// semantically there is no change; otherwise the block of the insertion point
57/// remains degenerate and it is the caller's responsibility to insert a
58/// terminator. Returns the new successor block.
59BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
60 llvm::Twine Name = {});
61
62/// Split a BasicBlock at \p Builder's insertion point, even if the block is
63/// degenerate (missing the terminator). Its new insert location will stick to
64/// after the instruction before the insertion point (instead of moving with the
65/// instruction the InsertPoint stores internally).
66BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
67 llvm::Twine Name = {});
68
69/// Split a BasicBlock at \p Builder's insertion point, even if the block is
70/// degenerate (missing the terminator). Its new insert location will stick to
71/// after the instruction before the insertion point (instead of moving with the
72/// instruction the InsertPoint stores internally).
73BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
74
75/// Like splitBB, but reuses the current block's name for the new name.
76BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
77 llvm::Twine Suffix = ".split");
78
79/// Captures attributes that affect generating LLVM-IR using the
80/// OpenMPIRBuilder and related classes. Note that not all attributes are
81/// required for all classes or functions. In some use cases the configuration
82/// is not necessary at all, because because the only functions that are called
83/// are ones that are not dependent on the configuration.
85public:
86 /// Flag to define whether to generate code for the role of the OpenMP host
87 /// (if set to false) or device (if set to true) in an offloading context. It
88 /// is set when the -fopenmp-is-target-device compiler frontend option is
89 /// specified.
90 std::optional<bool> IsTargetDevice;
91
92 /// Flag for specifying if the compilation is done for an accelerator. It is
93 /// set according to the architecture of the target triple and currently only
94 /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform
95 /// the role of an OpenMP target device, so `IsTargetDevice` must also be true
96 /// if `IsGPU` is true. This restriction might be lifted if an accelerator-
97 /// like target with the ability to work as the OpenMP host is added, or if
98 /// the capabilities of the currently supported GPU architectures are
99 /// expanded.
100 std::optional<bool> IsGPU;
101
102 // Flag for specifying if offloading is mandatory.
103 std::optional<bool> OpenMPOffloadMandatory;
104
105 /// First separator used between the initial two parts of a name.
106 std::optional<StringRef> FirstSeparator;
107 /// Separator used between all of the rest consecutive parts of s name
108 std::optional<StringRef> Separator;
109
113 bool HasRequiresReverseOffload,
114 bool HasRequiresUnifiedAddress,
115 bool HasRequiresUnifiedSharedMemory,
116 bool HasRequiresDynamicAllocators);
117
118 // Getters functions that assert if the required values are not present.
119 bool isTargetDevice() const {
120 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
121 return *IsTargetDevice;
122 }
123
124 bool isGPU() const {
125 assert(IsGPU.has_value() && "IsGPU is not set");
126 return *IsGPU;
127 }
128
130 assert(OpenMPOffloadMandatory.has_value() &&
131 "OpenMPOffloadMandatory is not set");
133 }
134
135 bool hasRequiresFlags() const { return RequiresFlags; }
136 bool hasRequiresReverseOffload() const;
137 bool hasRequiresUnifiedAddress() const;
139 bool hasRequiresDynamicAllocators() const;
140
141 /// Returns requires directive clauses as flags compatible with those expected
142 /// by libomptarget.
143 int64_t getRequiresFlags() const;
144
145 // Returns the FirstSeparator if set, otherwise use the default separator
146 // depending on isGPU
148 if (FirstSeparator.has_value())
149 return *FirstSeparator;
150 if (isGPU())
151 return "_";
152 return ".";
153 }
154
155 // Returns the Separator if set, otherwise use the default separator depending
156 // on isGPU
158 if (Separator.has_value())
159 return *Separator;
160 if (isGPU())
161 return "$";
162 return ".";
163 }
164
166 void setIsGPU(bool Value) { IsGPU = Value; }
170
175
176private:
177 /// Flags for specifying which requires directive clauses are present.
178 int64_t RequiresFlags;
179};
180
181/// Data structure to contain the information needed to uniquely identify
182/// a target entry.
184 std::string ParentName;
185 unsigned DeviceID;
186 unsigned FileID;
187 unsigned Line;
188 unsigned Count;
189
192 unsigned FileID, unsigned Line, unsigned Count = 0)
194 Count(Count) {}
195
198 unsigned DeviceID, unsigned FileID,
199 unsigned Line, unsigned Count);
200
202 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
203 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
204 RHS.Count);
205 }
206};
207
208/// Class that manages information about offload code regions and data
210 /// Number of entries registered so far.
211 OpenMPIRBuilder *OMPBuilder;
212 unsigned OffloadingEntriesNum = 0;
213
214public:
215 /// Base class of the entries info.
217 public:
218 /// Kind of a given entry.
219 enum OffloadingEntryInfoKinds : unsigned {
220 /// Entry is a target region.
222 /// Entry is a declare target variable.
224 /// Invalid entry info.
226 };
227
228 protected:
230 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
231 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
232 uint32_t Flags)
233 : Flags(Flags), Order(Order), Kind(Kind) {}
234 ~OffloadEntryInfo() = default;
235
236 public:
237 bool isValid() const { return Order != ~0u; }
238 unsigned getOrder() const { return Order; }
239 OffloadingEntryInfoKinds getKind() const { return Kind; }
240 uint32_t getFlags() const { return Flags; }
241 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
242 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
244 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
245 Addr = V;
246 }
247 static bool classof(const OffloadEntryInfo *Info) { return true; }
248
249 private:
250 /// Address of the entity that has to be mapped for offloading.
251 WeakTrackingVH Addr;
252
253 /// Flags associated with the device global.
254 uint32_t Flags = 0u;
255
256 /// Order this entry was emitted.
257 unsigned Order = ~0u;
258
260 };
261
262 /// Return true if a there are no entries defined.
263 bool empty() const;
264 /// Return number of entries defined so far.
265 unsigned size() const { return OffloadingEntriesNum; }
266
267 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
268
269 //
270 // Target region entries related.
271 //
272
273 /// Kind of the target registry entry.
275 /// Mark the entry as target region.
277 };
278
279 /// Target region entries info.
281 /// Address that can be used as the ID of the entry.
282 Constant *ID = nullptr;
283
284 public:
287 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
288 Constant *ID,
291 ID(ID) {
293 }
294
295 Constant *getID() const { return ID; }
296 void setID(Constant *V) {
297 assert(!ID && "ID has been set before!");
298 ID = V;
299 }
300 static bool classof(const OffloadEntryInfo *Info) {
301 return Info->getKind() == OffloadingEntryInfoTargetRegion;
302 }
303 };
304
305 /// Initialize target region entry.
306 /// This is ONLY needed for DEVICE compilation.
308 unsigned Order);
309 /// Register target region entry.
313 /// Return true if a target region entry with the provided information
314 /// exists.
316 bool IgnoreAddressId = false) const;
317
318 // Return the Name based on \a EntryInfo using the next available Count.
320 const TargetRegionEntryInfo &EntryInfo);
321
322 /// brief Applies action \a Action on all registered entries.
323 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
324 const OffloadEntryInfoTargetRegion &)>
326 void
328
329 //
330 // Device global variable entries related.
331 //
332
333 /// Kind of the global variable entry..
335 /// Mark the entry as a to declare target.
337 /// Mark the entry as a to declare target link.
339 /// Mark the entry as a declare target enter.
341 /// Mark the entry as having no declare target entry kind.
343 /// Mark the entry as a declare target indirect global.
345 };
346
347 /// Kind of device clause for declare target variables
348 /// and functions
349 /// NOTE: Currently not used as a part of a variable entry
350 /// used for Flang and Clang to interface with the variable
351 /// related registration functions
353 /// The target is marked for all devices
355 /// The target is marked for non-host devices
357 /// The target is marked for host devices
359 /// The target is marked as having no clause
361 };
362
363 /// Device global variable entries info.
365 /// Type of the global variable.
366 int64_t VarSize;
368 const std::string VarName;
369
370 public:
373 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
376 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
377 int64_t VarSize,
380 const std::string &VarName)
382 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
384 }
385
386 int64_t getVarSize() const { return VarSize; }
387 StringRef getVarName() const { return VarName; }
388 void setVarSize(int64_t Size) { VarSize = Size; }
389 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
390 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
391 static bool classof(const OffloadEntryInfo *Info) {
392 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
393 }
394 };
395
396 /// Initialize device global variable entry.
397 /// This is ONLY used for DEVICE compilation.
400 unsigned Order);
401
402 /// Register device global variable entry.
404 int64_t VarSize,
407 /// Checks if the variable with the given name has been registered already.
409 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
410 }
411 /// Applies action \a Action on all registered entries.
412 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
416
417private:
418 /// Return the count of entries at a particular source location.
419 unsigned
420 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
421
422 /// Update the count of entries at a particular source location.
423 void
424 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
425
427 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
428 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
429 EntryInfo.FileID, EntryInfo.Line, 0);
430 }
431
432 // Count of entries at a location.
433 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
434
435 // Storage for target region entries kind.
436 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
437 OffloadEntriesTargetRegionTy;
438 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
439 /// Storage for device global variable entries kind. The storage is to be
440 /// indexed by mangled name.
442 OffloadEntriesDeviceGlobalVarTy;
443 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
444};
445
446/// An interface to create LLVM-IR for OpenMP directives.
447///
448/// Each OpenMP directive has a corresponding public generator method.
450public:
451 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
452 /// not have an effect on \p M (see initialize)
454 : M(M), Builder(M.getContext()), OffloadInfoManager(this),
455 T(Triple(M.getTargetTriple())) {}
457
458 /// Initialize the internal state, this will put structures types and
459 /// potentially other helpers into the underlying module. Must be called
460 /// before any other method and only once! This internal state includes types
461 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
462 void initialize();
463
465
466 /// Finalize the underlying module, e.g., by outlining regions.
467 /// \param Fn The function to be finalized. If not used,
468 /// all functions are finalized.
469 void finalize(Function *Fn = nullptr);
470
471 /// Add attributes known for \p FnID to \p Fn.
473
474 /// Type used throughout for insertion points.
476
477 /// Get the create a name using the platform specific separators.
478 /// \param Parts parts of the final name that needs separation
479 /// The created name has a first separator between the first and second part
480 /// and a second separator between all other parts.
481 /// E.g. with FirstSeparator "$" and Separator "." and
482 /// parts: "p1", "p2", "p3", "p4"
483 /// The resulting name is "p1$p2.p3.p4"
484 /// The separators are retrieved from the OpenMPIRBuilderConfig.
485 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
486
487 /// Callback type for variable finalization (think destructors).
488 ///
489 /// \param CodeGenIP is the insertion point at which the finalization code
490 /// should be placed.
491 ///
492 /// A finalize callback knows about all objects that need finalization, e.g.
493 /// destruction, when the scope of the currently generated construct is left
494 /// at the time, and location, the callback is invoked.
495 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
496
498 /// The finalization callback provided by the last in-flight invocation of
499 /// createXXXX for the directive of kind DK.
501
502 /// The directive kind of the innermost directive that has an associated
503 /// region which might require finalization when it is left.
504 omp::Directive DK;
505
506 /// Flag to indicate if the directive is cancellable.
508 };
509
510 /// Push a finalization callback on the finalization stack.
511 ///
512 /// NOTE: Temporary solution until Clang CG is gone.
514 FinalizationStack.push_back(FI);
515 }
516
517 /// Pop the last finalization callback from the finalization stack.
518 ///
519 /// NOTE: Temporary solution until Clang CG is gone.
521
522 /// Callback type for body (=inner region) code generation
523 ///
524 /// The callback takes code locations as arguments, each describing a
525 /// location where additional instructions can be inserted.
526 ///
527 /// The CodeGenIP may be in the middle of a basic block or point to the end of
528 /// it. The basic block may have a terminator or be degenerate. The callback
529 /// function may just insert instructions at that position, but also split the
530 /// block (without the Before argument of BasicBlock::splitBasicBlock such
531 /// that the identify of the split predecessor block is preserved) and insert
532 /// additional control flow, including branches that do not lead back to what
533 /// follows the CodeGenIP. Note that since the callback is allowed to split
534 /// the block, callers must assume that InsertPoints to positions in the
535 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
536 /// such InsertPoints need to be preserved, it can split the block itself
537 /// before calling the callback.
538 ///
539 /// AllocaIP and CodeGenIP must not point to the same position.
540 ///
541 /// \param AllocaIP is the insertion point at which new alloca instructions
542 /// should be placed. The BasicBlock it is pointing to must
543 /// not be split.
544 /// \param CodeGenIP is the insertion point at which the body code should be
545 /// placed.
547 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
548
549 // This is created primarily for sections construct as llvm::function_ref
550 // (BodyGenCallbackTy) is not storable (as described in the comments of
551 // function_ref class - function_ref contains non-ownable reference
552 // to the callable.
554 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
555
556 /// Callback type for loop body code generation.
557 ///
558 /// \param CodeGenIP is the insertion point where the loop's body code must be
559 /// placed. This will be a dedicated BasicBlock with a
560 /// conditional branch from the loop condition check and
561 /// terminated with an unconditional branch to the loop
562 /// latch.
563 /// \param IndVar is the induction variable usable at the insertion point.
565 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
566
567 /// Callback type for variable privatization (think copy & default
568 /// constructor).
569 ///
570 /// \param AllocaIP is the insertion point at which new alloca instructions
571 /// should be placed.
572 /// \param CodeGenIP is the insertion point at which the privatization code
573 /// should be placed.
574 /// \param Original The value being copied/created, should not be used in the
575 /// generated IR.
576 /// \param Inner The equivalent of \p Original that should be used in the
577 /// generated IR; this is equal to \p Original if the value is
578 /// a pointer and can thus be passed directly, otherwise it is
579 /// an equivalent but different value.
580 /// \param ReplVal The replacement value, thus a copy or new created version
581 /// of \p Inner.
582 ///
583 /// \returns The new insertion point where code generation continues and
584 /// \p ReplVal the replacement value.
586 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
587 Value &Inner, Value *&ReplVal)>;
588
589 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
590 /// (filename, line, column, ...).
593 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
596 : IP(IP), DL(DL) {}
599 };
600
601 /// Emitter methods for OpenMP directives.
602 ///
603 ///{
604
605 /// Generator for '#omp barrier'
606 ///
607 /// \param Loc The location where the barrier directive was encountered.
608 /// \param DK The kind of directive that caused the barrier.
609 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
610 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
611 /// should be checked and acted upon.
612 ///
613 /// \returns The insertion point after the barrier.
614 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
615 bool ForceSimpleCall = false,
616 bool CheckCancelFlag = true);
617
618 /// Generator for '#omp cancel'
619 ///
620 /// \param Loc The location where the directive was encountered.
621 /// \param IfCondition The evaluated 'if' clause expression, if any.
622 /// \param CanceledDirective The kind of directive that is cancled.
623 ///
624 /// \returns The insertion point after the barrier.
625 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
626 omp::Directive CanceledDirective);
627
628 /// Generator for '#omp parallel'
629 ///
630 /// \param Loc The insert and source location description.
631 /// \param AllocaIP The insertion points to be used for alloca instructions.
632 /// \param BodyGenCB Callback that will generate the region code.
633 /// \param PrivCB Callback to copy a given variable (think copy constructor).
634 /// \param FiniCB Callback to finalize variable copies.
635 /// \param IfCondition The evaluated 'if' clause expression, if any.
636 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
637 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
638 /// \param IsCancellable Flag to indicate a cancellable parallel region.
639 ///
640 /// \returns The insertion position *after* the parallel.
643 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
644 FinalizeCallbackTy FiniCB, Value *IfCondition,
645 Value *NumThreads, omp::ProcBindKind ProcBind,
646 bool IsCancellable);
647
648 /// Generator for the control flow structure of an OpenMP canonical loop.
649 ///
650 /// This generator operates on the logical iteration space of the loop, i.e.
651 /// the caller only has to provide a loop trip count of the loop as defined by
652 /// base language semantics. The trip count is interpreted as an unsigned
653 /// integer. The induction variable passed to \p BodyGenCB will be of the same
654 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
655 /// convert the logical iteration variable to the loop counter variable in the
656 /// loop body.
657 ///
658 /// \param Loc The insert and source location description. The insert
659 /// location can be between two instructions or the end of a
660 /// degenerate block (e.g. a BB under construction).
661 /// \param BodyGenCB Callback that will generate the loop body code.
662 /// \param TripCount Number of iterations the loop body is executed.
663 /// \param Name Base name used to derive BB and instruction names.
664 ///
665 /// \returns An object representing the created control flow structure which
666 /// can be used for loop-associated directives.
668 LoopBodyGenCallbackTy BodyGenCB,
669 Value *TripCount,
670 const Twine &Name = "loop");
671
672 /// Generator for the control flow structure of an OpenMP canonical loop.
673 ///
674 /// Instead of a logical iteration space, this allows specifying user-defined
675 /// loop counter values using increment, upper- and lower bounds. To
676 /// disambiguate the terminology when counting downwards, instead of lower
677 /// bounds we use \p Start for the loop counter value in the first body
678 /// iteration.
679 ///
680 /// Consider the following limitations:
681 ///
682 /// * A loop counter space over all integer values of its bit-width cannot be
683 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
684 /// stored into an 8 bit integer):
685 ///
686 /// DO I = 0, 255, 1
687 ///
688 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
689 /// effectively counting downwards:
690 ///
691 /// for (uint8_t i = 100u; i > 0; i += 127u)
692 ///
693 ///
694 /// TODO: May need to add additional parameters to represent:
695 ///
696 /// * Allow representing downcounting with unsigned integers.
697 ///
698 /// * Sign of the step and the comparison operator might disagree:
699 ///
700 /// for (int i = 0; i < 42; i -= 1u)
701 ///
702 //
703 /// \param Loc The insert and source location description.
704 /// \param BodyGenCB Callback that will generate the loop body code.
705 /// \param Start Value of the loop counter for the first iterations.
706 /// \param Stop Loop counter values past this will stop the loop.
707 /// \param Step Loop counter increment after each iteration; negative
708 /// means counting down.
709 /// \param IsSigned Whether Start, Stop and Step are signed integers.
710 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
711 /// counter.
712 /// \param ComputeIP Insertion point for instructions computing the trip
713 /// count. Can be used to ensure the trip count is available
714 /// at the outermost loop of a loop nest. If not set,
715 /// defaults to the preheader of the generated loop.
716 /// \param Name Base name used to derive BB and instruction names.
717 ///
718 /// \returns An object representing the created control flow structure which
719 /// can be used for loop-associated directives.
721 LoopBodyGenCallbackTy BodyGenCB,
722 Value *Start, Value *Stop, Value *Step,
723 bool IsSigned, bool InclusiveStop,
724 InsertPointTy ComputeIP = {},
725 const Twine &Name = "loop");
726
727 /// Collapse a loop nest into a single loop.
728 ///
729 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
730 /// that has the same number of innermost loop iterations as the origin loop
731 /// nest. The induction variables of the input loops are derived from the
732 /// collapsed loop's induction variable. This is intended to be used to
733 /// implement OpenMP's collapse clause. Before applying a directive,
734 /// collapseLoops normalizes a loop nest to contain only a single loop and the
735 /// directive's implementation does not need to handle multiple loops itself.
736 /// This does not remove the need to handle all loop nest handling by
737 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
738 /// modifier of the worksharing-loop directive.
739 ///
740 /// Example:
741 /// \code
742 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
743 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
744 /// body(i, j);
745 /// \endcode
746 ///
747 /// After collapsing with Loops={i,j}, the loop is changed to
748 /// \code
749 /// for (int ij = 0; ij < 63; ++ij) {
750 /// int i = ij / 9;
751 /// int j = ij % 9;
752 /// body(i, j);
753 /// }
754 /// \endcode
755 ///
756 /// In the current implementation, the following limitations apply:
757 ///
758 /// * All input loops have an induction variable of the same type.
759 ///
760 /// * The collapsed loop will have the same trip count integer type as the
761 /// input loops. Therefore it is possible that the collapsed loop cannot
762 /// represent all iterations of the input loops. For instance, assuming a
763 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
764 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
765 /// which cannot be represented in an 32-bit integer. Behavior is undefined
766 /// in this case.
767 ///
768 /// * The trip counts of every input loop must be available at \p ComputeIP.
769 /// Non-rectangular loops are not yet supported.
770 ///
771 /// * At each nest level, code between a surrounding loop and its nested loop
772 /// is hoisted into the loop body, and such code will be executed more
773 /// often than before collapsing (or not at all if any inner loop iteration
774 /// has a trip count of 0). This is permitted by the OpenMP specification.
775 ///
776 /// \param DL Debug location for instructions added for collapsing,
777 /// such as instructions to compute/derive the input loop's
778 /// induction variables.
779 /// \param Loops Loops in the loop nest to collapse. Loops are specified
780 /// from outermost-to-innermost and every control flow of a
781 /// loop's body must pass through its directly nested loop.
782 /// \param ComputeIP Where additional instruction that compute the collapsed
783 /// trip count. If not set, defaults to before the generated
784 /// loop.
785 ///
786 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
789 InsertPointTy ComputeIP);
790
791 /// Get the default alignment value for given target
792 ///
793 /// \param TargetTriple Target triple
794 /// \param Features StringMap which describes extra CPU features
795 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
796 const StringMap<bool> &Features);
797
798 /// Retrieve (or create if non-existent) the address of a declare
799 /// target variable, used in conjunction with registerTargetGlobalVariable
800 /// to create declare target global variables.
801 ///
802 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
803 /// clause used in conjunction with the variable being registered (link,
804 /// to, enter).
805 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
806 /// clause used in conjunction with the variable being registered (nohost,
807 /// host, any)
808 /// \param IsDeclaration - boolean stating if the variable being registered
809 /// is a declaration-only and not a definition
810 /// \param IsExternallyVisible - boolean stating if the variable is externally
811 /// visible
812 /// \param EntryInfo - Unique entry information for the value generated
813 /// using getTargetEntryUniqueInfo, used to name generated pointer references
814 /// to the declare target variable
815 /// \param MangledName - the mangled name of the variable being registered
816 /// \param GeneratedRefs - references generated by invocations of
817 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
818 /// these are required by Clang for book keeping.
819 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
820 /// \param TargetTriple - The OpenMP device target triple we are compiling
821 /// for
822 /// \param LlvmPtrTy - The type of the variable we are generating or
823 /// retrieving an address for
824 /// \param GlobalInitializer - a lambda function which creates a constant
825 /// used for initializing a pointer reference to the variable in certain
826 /// cases. If a nullptr is passed, it will default to utilising the original
827 /// variable to initialize the pointer reference.
828 /// \param VariableLinkage - a lambda function which returns the variables
829 /// linkage type, if unspecified and a nullptr is given, it will instead
830 /// utilise the linkage stored on the existing global variable in the
831 /// LLVMModule.
835 bool IsDeclaration, bool IsExternallyVisible,
836 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
837 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
838 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
839 std::function<Constant *()> GlobalInitializer,
840 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
841
842 /// Registers a target variable for device or host.
843 ///
844 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
845 /// clause used in conjunction with the variable being registered (link,
846 /// to, enter).
847 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
848 /// clause used in conjunction with the variable being registered (nohost,
849 /// host, any)
850 /// \param IsDeclaration - boolean stating if the variable being registered
851 /// is a declaration-only and not a definition
852 /// \param IsExternallyVisible - boolean stating if the variable is externally
853 /// visible
854 /// \param EntryInfo - Unique entry information for the value generated
855 /// using getTargetEntryUniqueInfo, used to name generated pointer references
856 /// to the declare target variable
857 /// \param MangledName - the mangled name of the variable being registered
858 /// \param GeneratedRefs - references generated by invocations of
859 /// registerTargetGlobalVariable these are required by Clang for book
860 /// keeping.
861 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
862 /// \param TargetTriple - The OpenMP device target triple we are compiling
863 /// for
864 /// \param GlobalInitializer - a lambda function which creates a constant
865 /// used for initializing a pointer reference to the variable in certain
866 /// cases. If a nullptr is passed, it will default to utilising the original
867 /// variable to initialize the pointer reference.
868 /// \param VariableLinkage - a lambda function which returns the variables
869 /// linkage type, if unspecified and a nullptr is given, it will instead
870 /// utilise the linkage stored on the existing global variable in the
871 /// LLVMModule.
872 /// \param LlvmPtrTy - The type of the variable we are generating or
873 /// retrieving an address for
874 /// \param Addr - the original llvm value (addr) of the variable to be
875 /// registered
879 bool IsDeclaration, bool IsExternallyVisible,
880 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
881 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
882 std::vector<Triple> TargetTriple,
883 std::function<Constant *()> GlobalInitializer,
884 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
885 Type *LlvmPtrTy, Constant *Addr);
886
887 /// Get the offset of the OMP_MAP_MEMBER_OF field.
888 unsigned getFlagMemberOffset();
889
890 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
891 /// the position given.
892 /// \param Position - A value indicating the position of the parent
893 /// of the member in the kernel argument structure, often retrieved
894 /// by the parents position in the combined information vectors used
895 /// to generate the structure itself. Multiple children (member's of)
896 /// with the same parent will use the same returned member flag.
898
899 /// Given an initial flag set, this function modifies it to contain
900 /// the passed in MemberOfFlag generated from the getMemberOfFlag
901 /// function. The results are dependent on the existing flag bits
902 /// set in the original flag set.
903 /// \param Flags - The original set of flags to be modified with the
904 /// passed in MemberOfFlag.
905 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
906 /// slightly based on the getMemberOfFlag which adjusts the flag bits
907 /// based on the members position in its parent.
909 omp::OpenMPOffloadMappingFlags MemberOfFlag);
910
911private:
912 /// Modifies the canonical loop to be a statically-scheduled workshare loop
913 /// which is executed on the device
914 ///
915 /// This takes a \p CLI representing a canonical loop, such as the one
916 /// created by \see createCanonicalLoop and emits additional instructions to
917 /// turn it into a workshare loop. In particular, it calls to an OpenMP
918 /// runtime function in the preheader to call OpenMP device rtl function
919 /// which handles worksharing of loop body interations.
920 ///
921 /// \param DL Debug location for instructions added for the
922 /// workshare-loop construct itself.
923 /// \param CLI A descriptor of the canonical loop to workshare.
924 /// \param AllocaIP An insertion point for Alloca instructions usable in the
925 /// preheader of the loop.
926 /// \param LoopType Information about type of loop worksharing.
927 /// It corresponds to type of loop workshare OpenMP pragma.
928 ///
929 /// \returns Point where to insert code after the workshare construct.
930 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
931 InsertPointTy AllocaIP,
932 omp::WorksharingLoopType LoopType);
933
934 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
935 ///
936 /// This takes a \p LoopInfo representing a canonical loop, such as the one
937 /// created by \p createCanonicalLoop and emits additional instructions to
938 /// turn it into a workshare loop. In particular, it calls to an OpenMP
939 /// runtime function in the preheader to obtain the loop bounds to be used in
940 /// the current thread, updates the relevant instructions in the canonical
941 /// loop and calls to an OpenMP runtime finalization function after the loop.
942 ///
943 /// \param DL Debug location for instructions added for the
944 /// workshare-loop construct itself.
945 /// \param CLI A descriptor of the canonical loop to workshare.
946 /// \param AllocaIP An insertion point for Alloca instructions usable in the
947 /// preheader of the loop.
948 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
949 /// the loop.
950 ///
951 /// \returns Point where to insert code after the workshare construct.
952 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
953 InsertPointTy AllocaIP,
954 bool NeedsBarrier);
955
956 /// Modifies the canonical loop a statically-scheduled workshare loop with a
957 /// user-specified chunk size.
958 ///
959 /// \param DL Debug location for instructions added for the
960 /// workshare-loop construct itself.
961 /// \param CLI A descriptor of the canonical loop to workshare.
962 /// \param AllocaIP An insertion point for Alloca instructions usable in
963 /// the preheader of the loop.
964 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
965 /// loop.
966 /// \param ChunkSize The user-specified chunk size.
967 ///
968 /// \returns Point where to insert code after the workshare construct.
969 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
971 InsertPointTy AllocaIP,
972 bool NeedsBarrier,
973 Value *ChunkSize);
974
975 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
976 ///
977 /// This takes a \p LoopInfo representing a canonical loop, such as the one
978 /// created by \p createCanonicalLoop and emits additional instructions to
979 /// turn it into a workshare loop. In particular, it calls to an OpenMP
980 /// runtime function in the preheader to obtain, and then in each iteration
981 /// to update the loop counter.
982 ///
983 /// \param DL Debug location for instructions added for the
984 /// workshare-loop construct itself.
985 /// \param CLI A descriptor of the canonical loop to workshare.
986 /// \param AllocaIP An insertion point for Alloca instructions usable in the
987 /// preheader of the loop.
988 /// \param SchedType Type of scheduling to be passed to the init function.
989 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
990 /// the loop.
991 /// \param Chunk The size of loop chunk considered as a unit when
992 /// scheduling. If \p nullptr, defaults to 1.
993 ///
994 /// \returns Point where to insert code after the workshare construct.
995 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
996 InsertPointTy AllocaIP,
997 omp::OMPScheduleType SchedType,
998 bool NeedsBarrier,
999 Value *Chunk = nullptr);
1000
1001 /// Create alternative version of the loop to support if clause
1002 ///
1003 /// OpenMP if clause can require to generate second loop. This loop
1004 /// will be executed when if clause condition is not met. createIfVersion
1005 /// adds branch instruction to the copied loop if \p ifCond is not met.
1006 ///
1007 /// \param Loop Original loop which should be versioned.
1008 /// \param IfCond Value which corresponds to if clause condition
1009 /// \param VMap Value to value map to define relation between
1010 /// original and copied loop values and loop blocks.
1011 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1012 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1013 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
1014
1015public:
1016 /// Modifies the canonical loop to be a workshare loop.
1017 ///
1018 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1019 /// created by \p createCanonicalLoop and emits additional instructions to
1020 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1021 /// runtime function in the preheader to obtain the loop bounds to be used in
1022 /// the current thread, updates the relevant instructions in the canonical
1023 /// loop and calls to an OpenMP runtime finalization function after the loop.
1024 ///
1025 /// The concrete transformation is done by applyStaticWorkshareLoop,
1026 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1027 /// on the value of \p SchedKind and \p ChunkSize.
1028 ///
1029 /// \param DL Debug location for instructions added for the
1030 /// workshare-loop construct itself.
1031 /// \param CLI A descriptor of the canonical loop to workshare.
1032 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1033 /// preheader of the loop.
1034 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1035 /// the loop.
1036 /// \param SchedKind Scheduling algorithm to use.
1037 /// \param ChunkSize The chunk size for the inner loop.
1038 /// \param HasSimdModifier Whether the simd modifier is present in the
1039 /// schedule clause.
1040 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1041 /// the schedule clause.
1042 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1043 /// present in the schedule clause.
1044 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1045 /// present.
1046 /// \param LoopType Information about type of loop worksharing.
1047 /// It corresponds to type of loop workshare OpenMP pragma.
1048 ///
1049 /// \returns Point where to insert code after the workshare construct.
1052 bool NeedsBarrier,
1053 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1054 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1055 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1056 bool HasOrderedClause = false,
1057 omp::WorksharingLoopType LoopType =
1059
1060 /// Tile a loop nest.
1061 ///
1062 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1063 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1064 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1065 /// of every loop and every tile sizes must be usable in the outermost
1066 /// loop's preheader. This implies that the loop nest is rectangular.
1067 ///
1068 /// Example:
1069 /// \code
1070 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1071 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1072 /// body(i, j);
1073 /// \endcode
1074 ///
1075 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1076 /// \code
1077 /// for (int i1 = 0; i1 < 3; ++i1)
1078 /// for (int j1 = 0; j1 < 2; ++j1)
1079 /// for (int i2 = 0; i2 < 5; ++i2)
1080 /// for (int j2 = 0; j2 < 7; ++j2)
1081 /// body(i1*3+i2, j1*3+j2);
1082 /// \endcode
1083 ///
1084 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1085 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1086 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1087 /// that are not multiples of the tile size. In the latter case the tile loop
1088 /// of the last floor-loop iteration will have fewer iterations than specified
1089 /// as its tile size.
1090 ///
1091 ///
1092 /// @param DL Debug location for instructions added by tiling, for
1093 /// instance the floor- and tile trip count computation.
1094 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1095 /// invalidated by this method, i.e. should not used after
1096 /// tiling.
1097 /// @param TileSizes For each loop in \p Loops, the tile size for that
1098 /// dimensions.
1099 ///
1100 /// \returns A list of generated loops. Contains twice as many loops as the
1101 /// input loop nest; the first half are the floor loops and the
1102 /// second half are the tile loops.
1103 std::vector<CanonicalLoopInfo *>
1105 ArrayRef<Value *> TileSizes);
1106
1107 /// Fully unroll a loop.
1108 ///
1109 /// Instead of unrolling the loop immediately (and duplicating its body
1110 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1111 /// metadata.
1112 ///
1113 /// \param DL Debug location for instructions added by unrolling.
1114 /// \param Loop The loop to unroll. The loop will be invalidated.
1116
1117 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1118 /// using LLVM's LoopUnrollPass.
1119 ///
1120 /// \param DL Debug location for instructions added by unrolling.
1121 /// \param Loop The loop to unroll. The loop will be invalidated.
1123
1124 /// Partially unroll a loop.
1125 ///
1126 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1127 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1128 /// needing the CanonicalLoopInfo allows more efficient code generation by
1129 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1130 /// A loop-associated directive applied to the unrolled loop needs to know the
1131 /// new trip count which means that if using a heuristically determined unroll
1132 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1133 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1134 /// but which assumes that some canonicalization has taken place (e.g.
1135 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1136 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1137 ///
1138 /// \param DL Debug location for instructions added by unrolling.
1139 /// \param Loop The loop to unroll. The loop will be invalidated.
1140 /// \param Factor The factor to unroll the loop by. A factor of 0
1141 /// indicates that a heuristic should be used to determine
1142 /// the unroll-factor.
1143 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1144 /// partially unrolled loop. Otherwise, uses loop metadata
1145 /// to defer unrolling to the LoopUnrollPass.
1146 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1147 CanonicalLoopInfo **UnrolledCLI);
1148
1149 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1150 /// is cloned. The metadata which prevents vectorization is added to
1151 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1152 /// to false.
1153 ///
1154 /// \param Loop The loop to simd-ize.
1155 /// \param AlignedVars The map which containts pairs of the pointer
1156 /// and its corresponding alignment.
1157 /// \param IfCond The value which corresponds to the if clause
1158 /// condition.
1159 /// \param Order The enum to map order clause.
1160 /// \param Simdlen The Simdlen length to apply to the simd loop.
1161 /// \param Safelen The Safelen length to apply to the simd loop.
1163 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1164 omp::OrderKind Order, ConstantInt *Simdlen,
1165 ConstantInt *Safelen);
1166
1167 /// Generator for '#omp flush'
1168 ///
1169 /// \param Loc The location where the flush directive was encountered
1170 void createFlush(const LocationDescription &Loc);
1171
1172 /// Generator for '#omp taskwait'
1173 ///
1174 /// \param Loc The location where the taskwait directive was encountered.
1175 void createTaskwait(const LocationDescription &Loc);
1176
1177 /// Generator for '#omp taskyield'
1178 ///
1179 /// \param Loc The location where the taskyield directive was encountered.
1180 void createTaskyield(const LocationDescription &Loc);
1181
1182 /// A struct to pack the relevant information for an OpenMP depend clause.
1183 struct DependData {
1187 explicit DependData() = default;
1189 Value *DepVal)
1191 };
1192
1193 /// Generator for `#omp task`
1194 ///
1195 /// \param Loc The location where the task construct was encountered.
1196 /// \param AllocaIP The insertion point to be used for alloca instructions.
1197 /// \param BodyGenCB Callback that will generate the region code.
1198 /// \param Tied True if the task is tied, false if the task is untied.
1199 /// \param Final i1 value which is `true` if the task is final, `false` if the
1200 /// task is not final.
1201 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1202 /// task is generated, and the encountering thread must
1203 /// suspend the current task region, for which execution
1204 /// cannot be resumed until execution of the structured
1205 /// block that is associated with the generated task is
1206 /// completed.
1207 InsertPointTy createTask(const LocationDescription &Loc,
1208 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1209 bool Tied = true, Value *Final = nullptr,
1210 Value *IfCondition = nullptr,
1211 SmallVector<DependData> Dependencies = {});
1212
1213 /// Generator for the taskgroup construct
1214 ///
1215 /// \param Loc The location where the taskgroup construct was encountered.
1216 /// \param AllocaIP The insertion point to be used for alloca instructions.
1217 /// \param BodyGenCB Callback that will generate the region code.
1218 InsertPointTy createTaskgroup(const LocationDescription &Loc,
1219 InsertPointTy AllocaIP,
1220 BodyGenCallbackTy BodyGenCB);
1221
1223 std::function<std::tuple<std::string, uint64_t>()>;
1224
1225 /// Creates a unique info for a target entry when provided a filename and
1226 /// line number from.
1227 ///
1228 /// \param CallBack A callback function which should return filename the entry
1229 /// resides in as well as the line number for the target entry
1230 /// \param ParentName The name of the parent the target entry resides in, if
1231 /// any.
1234 StringRef ParentName = "");
1235
1236 /// Functions used to generate reductions. Such functions take two Values
1237 /// representing LHS and RHS of the reduction, respectively, and a reference
1238 /// to the value that is updated to refer to the reduction result.
1241
1242 /// Functions used to generate atomic reductions. Such functions take two
1243 /// Values representing pointers to LHS and RHS of the reduction, as well as
1244 /// the element type of these pointers. They are expected to atomically
1245 /// update the LHS to the reduced value.
1248
1249 /// Information about an OpenMP reduction.
1257
1258 /// Reduction element type, must match pointee type of variable.
1260
1261 /// Reduction variable of pointer type.
1263
1264 /// Thread-private partial reduction variable.
1266
1267 /// Callback for generating the reduction body. The IR produced by this will
1268 /// be used to combine two values in a thread-safe context, e.g., under
1269 /// lock or within the same thread, and therefore need not be atomic.
1271
1272 /// Callback for generating the atomic reduction body, may be null. The IR
1273 /// produced by this will be used to atomically combine two values during
1274 /// reduction. If null, the implementation will use the non-atomic version
1275 /// along with the appropriate synchronization mechanisms.
1277 };
1278
1279 // TODO: provide atomic and non-atomic reduction generators for reduction
1280 // operators defined by the OpenMP specification.
1281
1282 /// Generator for '#omp reduction'.
1283 ///
1284 /// Emits the IR instructing the runtime to perform the specific kind of
1285 /// reductions. Expects reduction variables to have been privatized and
1286 /// initialized to reduction-neutral values separately. Emits the calls to
1287 /// runtime functions as well as the reduction function and the basic blocks
1288 /// performing the reduction atomically and non-atomically.
1289 ///
1290 /// The code emitted for the following:
1291 ///
1292 /// \code
1293 /// type var_1;
1294 /// type var_2;
1295 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1296 /// /* body */;
1297 /// \endcode
1298 ///
1299 /// corresponds to the following sketch.
1300 ///
1301 /// \code
1302 /// void _outlined_par() {
1303 /// // N is the number of different reductions.
1304 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1305 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1306 /// _omp_reduction_func,
1307 /// _gomp_critical_user.reduction.var)) {
1308 /// case 1: {
1309 /// var_1 = var_1 <reduction-op> privatized_var_1;
1310 /// var_2 = var_2 <reduction-op> privatized_var_2;
1311 /// // ...
1312 /// __kmpc_end_reduce(...);
1313 /// break;
1314 /// }
1315 /// case 2: {
1316 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
1317 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
1318 /// // ...
1319 /// break;
1320 /// }
1321 /// default: break;
1322 /// }
1323 /// }
1324 ///
1325 /// void _omp_reduction_func(void **lhs, void **rhs) {
1326 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1327 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1328 /// // ...
1329 /// }
1330 /// \endcode
1331 ///
1332 /// \param Loc The location where the reduction was
1333 /// encountered. Must be within the associate
1334 /// directive and after the last local access to the
1335 /// reduction variables.
1336 /// \param AllocaIP An insertion point suitable for allocas usable
1337 /// in reductions.
1338 /// \param ReductionInfos A list of info on each reduction variable.
1339 /// \param IsNoWait A flag set if the reduction is marked as nowait.
1341 InsertPointTy AllocaIP,
1342 ArrayRef<ReductionInfo> ReductionInfos,
1343 bool IsNoWait = false);
1344
1345 ///}
1346
1347 /// Return the insertion point used by the underlying IRBuilder.
1349
1350 /// Update the internal location to \p Loc.
1352 Builder.restoreIP(Loc.IP);
1354 return Loc.IP.getBlock() != nullptr;
1355 }
1356
1357 /// Return the function declaration for the runtime function with \p FnID.
1360
1362
1363 /// Return the (LLVM-IR) string describing the source location \p LocStr.
1364 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1365
1366 /// Return the (LLVM-IR) string describing the default source location.
1368
1369 /// Return the (LLVM-IR) string describing the source location identified by
1370 /// the arguments.
1371 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1372 unsigned Line, unsigned Column,
1373 uint32_t &SrcLocStrSize);
1374
1375 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1376 /// fallback if \p DL does not specify the function name.
1378 Function *F = nullptr);
1379
1380 /// Return the (LLVM-IR) string describing the source location \p Loc.
1381 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1382 uint32_t &SrcLocStrSize);
1383
1384 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1385 /// TODO: Create a enum class for the Reserve2Flags
1386 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1387 omp::IdentFlag Flags = omp::IdentFlag(0),
1388 unsigned Reserve2Flags = 0);
1389
1390 /// Create a hidden global flag \p Name in the module with initial value \p
1391 /// Value.
1393
1394 /// Generate control flow and cleanup for cancellation.
1395 ///
1396 /// \param CancelFlag Flag indicating if the cancellation is performed.
1397 /// \param CanceledDirective The kind of directive that is cancled.
1398 /// \param ExitCB Extra code to be generated in the exit block.
1399 void emitCancelationCheckImpl(Value *CancelFlag,
1400 omp::Directive CanceledDirective,
1401 FinalizeCallbackTy ExitCB = {});
1402
1403 /// Generate a target region entry call.
1404 ///
1405 /// \param Loc The location at which the request originated and is fulfilled.
1406 /// \param AllocaIP The insertion point to be used for alloca instructions.
1407 /// \param Return Return value of the created function returned by reference.
1408 /// \param DeviceID Identifier for the device via the 'device' clause.
1409 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1410 /// or 0 if unspecified and -1 if there is no 'teams' clause.
1411 /// \param NumThreads Number of threads via the 'thread_limit' clause.
1412 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1413 /// \param KernelArgs Array of arguments to the kernel.
1414 InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1415 InsertPointTy AllocaIP, Value *&Return,
1416 Value *Ident, Value *DeviceID, Value *NumTeams,
1417 Value *NumThreads, Value *HostPtr,
1418 ArrayRef<Value *> KernelArgs);
1419
1420 /// Generate a barrier runtime call.
1421 ///
1422 /// \param Loc The location at which the request originated and is fulfilled.
1423 /// \param DK The directive which caused the barrier
1424 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1425 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1426 /// should be checked and acted upon.
1427 ///
1428 /// \returns The insertion point after the barrier.
1429 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1430 omp::Directive DK, bool ForceSimpleCall,
1431 bool CheckCancelFlag);
1432
1433 /// Generate a flush runtime call.
1434 ///
1435 /// \param Loc The location at which the request originated and is fulfilled.
1436 void emitFlush(const LocationDescription &Loc);
1437
1438 /// The finalization stack made up of finalize callbacks currently in-flight,
1439 /// wrapped into FinalizationInfo objects that reference also the finalization
1440 /// target block and the kind of cancellable directive.
1442
1443 /// Return true if the last entry in the finalization stack is of kind \p DK
1444 /// and cancellable.
1445 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1446 return !FinalizationStack.empty() &&
1447 FinalizationStack.back().IsCancellable &&
1448 FinalizationStack.back().DK == DK;
1449 }
1450
1451 /// Generate a taskwait runtime call.
1452 ///
1453 /// \param Loc The location at which the request originated and is fulfilled.
1454 void emitTaskwaitImpl(const LocationDescription &Loc);
1455
1456 /// Generate a taskyield runtime call.
1457 ///
1458 /// \param Loc The location at which the request originated and is fulfilled.
1459 void emitTaskyieldImpl(const LocationDescription &Loc);
1460
1461 /// Return the current thread ID.
1462 ///
1463 /// \param Ident The ident (ident_t*) describing the query origin.
1465
1466 /// The OpenMPIRBuilder Configuration
1468
1469 /// The underlying LLVM-IR module
1471
1472 /// The LLVM-IR Builder used to create IR.
1474
1475 /// Map to remember source location strings
1477
1478 /// Map to remember existing ident_t*.
1480
1481 /// Info manager to keep track of target regions.
1483
1484 /// The target triple of the underlying module.
1485 const Triple T;
1486
1487 /// Helper that contains information about regions we need to outline
1488 /// during finalization.
1490 using PostOutlineCBTy = std::function<void(Function &)>;
1494
1495 /// Collect all blocks in between EntryBB and ExitBB in both the given
1496 /// vector and set.
1498 SmallVectorImpl<BasicBlock *> &BlockVector);
1499
1500 /// Return the function that contains the region to be outlined.
1501 Function *getFunction() const { return EntryBB->getParent(); }
1502 };
1503
1504 /// Collection of regions that need to be outlined during finalization.
1506
1507 /// Collection of owned canonical loop objects that eventually need to be
1508 /// free'd.
1509 std::forward_list<CanonicalLoopInfo> LoopInfos;
1510
1511 /// Add a new region that will be outlined later.
1512 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1513
1514 /// An ordered map of auto-generated variables to their unique names.
1515 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1516 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1517 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1518 /// variables.
1520
1521 /// Computes the size of type in bytes.
1522 Value *getSizeInBytes(Value *BasePtr);
1523
1524 // Emit a branch from the current block to the Target block only if
1525 // the current block has a terminator.
1527
1528 // If BB has no use then delete it and return. Else place BB after the current
1529 // block, if possible, or else at the end of the function. Also add a branch
1530 // from current block to BB if current block does not have a terminator.
1531 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1532
1533 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1534 /// Here is the logic:
1535 /// if (Cond) {
1536 /// ThenGen();
1537 /// } else {
1538 /// ElseGen();
1539 /// }
1541 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1542
1543 /// Create the global variable holding the offload mappings information.
1545 std::string VarName);
1546
1547 /// Create the global variable holding the offload names information.
1550 std::string VarName);
1551
1554 AllocaInst *Args = nullptr;
1556 };
1557
1558 /// Create the allocas instruction used in call to mapper functions.
1560 InsertPointTy AllocaIP, unsigned NumOperands,
1562
1563 /// Create the call for the target mapper function.
1564 /// \param Loc The source location description.
1565 /// \param MapperFunc Function to be called.
1566 /// \param SrcLocInfo Source location information global.
1567 /// \param MaptypesArg The argument types.
1568 /// \param MapnamesArg The argument names.
1569 /// \param MapperAllocas The AllocaInst used for the call.
1570 /// \param DeviceID Device ID for the call.
1571 /// \param NumOperands Number of operands in the call.
1572 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1573 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1574 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1575 unsigned NumOperands);
1576
1577 /// Container for the arguments used to pass data to the runtime library.
1579 /// The array of base pointer passed to the runtime library.
1581 /// The array of section pointers passed to the runtime library.
1583 /// The array of sizes passed to the runtime library.
1584 Value *SizesArray = nullptr;
1585 /// The array of map types passed to the runtime library for the beginning
1586 /// of the region or for the entire region if there are no separate map
1587 /// types for the region end.
1589 /// The array of map types passed to the runtime library for the end of the
1590 /// region, or nullptr if there are no separate map types for the region
1591 /// end.
1593 /// The array of user-defined mappers passed to the runtime library.
1595 /// The array of original declaration names of mapped pointers sent to the
1596 /// runtime library for debugging
1598
1599 explicit TargetDataRTArgs() {}
1608 };
1609
1610 /// Data structure that contains the needed information to construct the
1611 /// kernel args vector.
1613 /// Number of arguments passed to the runtime library.
1615 /// Arguments passed to the runtime library
1617 /// The number of iterations
1619 /// The number of teams.
1621 /// The number of threads.
1623 /// The size of the dynamic shared memory.
1625 /// True if the kernel has 'no wait' clause.
1627
1628 /// Constructor for TargetKernelArgs
1636 };
1637
1638 /// Create the kernel args vector used by emitTargetKernel. This function
1639 /// creates various constant values that are used in the resulting args
1640 /// vector.
1641 static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1643 SmallVector<Value *> &ArgsVector);
1644
1645 /// Struct that keeps the information that should be kept throughout
1646 /// a 'target data' region.
1648 /// Set to true if device pointer information have to be obtained.
1649 bool RequiresDevicePointerInfo = false;
1650 /// Set to true if Clang emits separate runtime calls for the beginning and
1651 /// end of the region. These calls might have separate map type arrays.
1652 bool SeparateBeginEndCalls = false;
1653
1654 public:
1656
1659
1660 /// Indicate whether any user-defined mapper exists.
1661 bool HasMapper = false;
1662 /// The total number of pointers passed to the runtime library.
1663 unsigned NumberOfPtrs = 0u;
1664
1665 explicit TargetDataInfo() {}
1666 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1667 bool SeparateBeginEndCalls)
1668 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1669 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1670 /// Clear information about the data arrays.
1673 HasMapper = false;
1674 NumberOfPtrs = 0u;
1675 }
1676 /// Return true if the current target data information has valid arrays.
1677 bool isValid() {
1681 }
1682 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1683 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1684 };
1685
1693
1694 /// This structure contains combined information generated for mappable
1695 /// clauses, including base pointers, pointers, sizes, map types, user-defined
1696 /// mappers, and non-contiguous information.
1697 struct MapInfosTy {
1699 bool IsNonContiguous = false;
1704 };
1712
1713 /// Append arrays in \a CurInfo.
1714 void append(MapInfosTy &CurInfo) {
1716 CurInfo.BasePointers.end());
1717 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1719 CurInfo.DevicePointers.end());
1720 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1721 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1722 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1724 CurInfo.NonContigInfo.Dims.end());
1726 CurInfo.NonContigInfo.Offsets.end());
1728 CurInfo.NonContigInfo.Counts.end());
1730 CurInfo.NonContigInfo.Strides.end());
1731 }
1732 };
1733
1734 /// Callback function type for functions emitting the host fallback code that
1735 /// is executed when the kernel launch fails. It takes an insertion point as
1736 /// parameter where the code should be emitted. It returns an insertion point
1737 /// that points right after after the emitted code.
1739
1740 /// Generate a target region entry call and host fallback call.
1741 ///
1742 /// \param Loc The location at which the request originated and is fulfilled.
1743 /// \param OutlinedFn The outlined kernel function.
1744 /// \param OutlinedFnID The ooulined function ID.
1745 /// \param EmitTargetCallFallbackCB Call back function to generate host
1746 /// fallback code.
1747 /// \param Args Data structure holding information about the kernel arguments.
1748 /// \param DeviceID Identifier for the device via the 'device' clause.
1749 /// \param RTLoc Source location identifier
1750 /// \param AllocaIP The insertion point to be used for alloca instructions.
1752 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1753 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1754 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1755
1756 /// Emit the arguments to be passed to the runtime library based on the
1757 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
1758 /// ForEndCall, emit map types to be passed for the end of the region instead
1759 /// of the beginning.
1763 bool EmitDebug = false,
1764 bool ForEndCall = false);
1765
1766 /// Emit an array of struct descriptors to be assigned to the offload args.
1768 InsertPointTy CodeGenIP,
1769 MapInfosTy &CombinedInfo,
1771
1772 /// Emit the arrays used to pass the captures and map information to the
1773 /// offloading runtime library. If there is no map or capture information,
1774 /// return nullptr by reference.
1776 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1777 TargetDataInfo &Info, bool IsNonContiguous = false,
1778 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1779 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1780
1781 /// Creates offloading entry for the provided entry ID \a ID, address \a
1782 /// Addr, size \a Size, and flags \a Flags.
1784 int32_t Flags, GlobalValue::LinkageTypes,
1785 StringRef Name = "");
1786
1787 /// The kind of errors that can occur when emitting the offload entries and
1788 /// metadata.
1794
1795 /// Callback function type
1797 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1798
1799 // Emit the offloading entries and metadata so that the device codegen side
1800 // can easily figure out what to emit. The produced metadata looks like
1801 // this:
1802 //
1803 // !omp_offload.info = !{!1, ...}
1804 //
1805 // We only generate metadata for function that contain target regions.
1807 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1808
1809public:
1810 /// Generator for __kmpc_copyprivate
1811 ///
1812 /// \param Loc The source location description.
1813 /// \param BufSize Number of elements in the buffer.
1814 /// \param CpyBuf List of pointers to data to be copied.
1815 /// \param CpyFn function to call for copying data.
1816 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1817 ///
1818 /// \return The insertion position *after* the CopyPrivate call.
1819
1821 llvm::Value *BufSize, llvm::Value *CpyBuf,
1822 llvm::Value *CpyFn, llvm::Value *DidIt);
1823
1824 /// Generator for '#omp single'
1825 ///
1826 /// \param Loc The source location description.
1827 /// \param BodyGenCB Callback that will generate the region code.
1828 /// \param FiniCB Callback to finalize variable copies.
1829 /// \param IsNowait If false, a barrier is emitted.
1830 /// \param DidIt Local variable used as a flag to indicate 'single' thread
1831 ///
1832 /// \returns The insertion position *after* the single call.
1834 BodyGenCallbackTy BodyGenCB,
1835 FinalizeCallbackTy FiniCB, bool IsNowait,
1836 llvm::Value *DidIt);
1837
1838 /// Generator for '#omp master'
1839 ///
1840 /// \param Loc The insert and source location description.
1841 /// \param BodyGenCB Callback that will generate the region code.
1842 /// \param FiniCB Callback to finalize variable copies.
1843 ///
1844 /// \returns The insertion position *after* the master.
1846 BodyGenCallbackTy BodyGenCB,
1847 FinalizeCallbackTy FiniCB);
1848
1849 /// Generator for '#omp masked'
1850 ///
1851 /// \param Loc The insert and source location description.
1852 /// \param BodyGenCB Callback that will generate the region code.
1853 /// \param FiniCB Callback to finialize variable copies.
1854 ///
1855 /// \returns The insertion position *after* the masked.
1857 BodyGenCallbackTy BodyGenCB,
1859
1860 /// Generator for '#omp critical'
1861 ///
1862 /// \param Loc The insert and source location description.
1863 /// \param BodyGenCB Callback that will generate the region body code.
1864 /// \param FiniCB Callback to finalize variable copies.
1865 /// \param CriticalName name of the lock used by the critical directive
1866 /// \param HintInst Hint Instruction for hint clause associated with critical
1867 ///
1868 /// \returns The insertion position *after* the critical.
1870 BodyGenCallbackTy BodyGenCB,
1871 FinalizeCallbackTy FiniCB,
1872 StringRef CriticalName, Value *HintInst);
1873
1874 /// Generator for '#omp ordered depend (source | sink)'
1875 ///
1876 /// \param Loc The insert and source location description.
1877 /// \param AllocaIP The insertion point to be used for alloca instructions.
1878 /// \param NumLoops The number of loops in depend clause.
1879 /// \param StoreValues The value will be stored in vector address.
1880 /// \param Name The name of alloca instruction.
1881 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1882 ///
1883 /// \return The insertion position *after* the ordered.
1885 InsertPointTy AllocaIP, unsigned NumLoops,
1886 ArrayRef<llvm::Value *> StoreValues,
1887 const Twine &Name, bool IsDependSource);
1888
1889 /// Generator for '#omp ordered [threads | simd]'
1890 ///
1891 /// \param Loc The insert and source location description.
1892 /// \param BodyGenCB Callback that will generate the region code.
1893 /// \param FiniCB Callback to finalize variable copies.
1894 /// \param IsThreads If true, with threads clause or without clause;
1895 /// otherwise, with simd clause;
1896 ///
1897 /// \returns The insertion position *after* the ordered.
1899 BodyGenCallbackTy BodyGenCB,
1900 FinalizeCallbackTy FiniCB,
1901 bool IsThreads);
1902
1903 /// Generator for '#omp sections'
1904 ///
1905 /// \param Loc The insert and source location description.
1906 /// \param AllocaIP The insertion points to be used for alloca instructions.
1907 /// \param SectionCBs Callbacks that will generate body of each section.
1908 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1909 /// \param FiniCB Callback to finalize variable copies.
1910 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1911 /// \param IsNowait If true, barrier - to ensure all sections are executed
1912 /// before moving forward will not be generated.
1913 /// \returns The insertion position *after* the sections.
1915 InsertPointTy AllocaIP,
1917 PrivatizeCallbackTy PrivCB,
1918 FinalizeCallbackTy FiniCB, bool IsCancellable,
1919 bool IsNowait);
1920
1921 /// Generator for '#omp section'
1922 ///
1923 /// \param Loc The insert and source location description.
1924 /// \param BodyGenCB Callback that will generate the region body code.
1925 /// \param FiniCB Callback to finalize variable copies.
1926 /// \returns The insertion position *after* the section.
1928 BodyGenCallbackTy BodyGenCB,
1929 FinalizeCallbackTy FiniCB);
1930
1931 /// Generator for `#omp teams`
1932 ///
1933 /// \param Loc The location where the teams construct was encountered.
1934 /// \param BodyGenCB Callback that will generate the region code.
1935 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
1936 /// it is as if lower bound is specified as equal to upperbound. If
1937 /// this is non-null, then upperbound must also be non-null.
1938 /// \param NumTeamsUpper Upper bound on the number of teams.
1939 /// \param ThreadLimit on the number of threads that may participate in a
1940 /// contention group created by each team.
1941 /// \param IfExpr is the integer argument value of the if condition on the
1942 /// teams clause.
1945 Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
1946 Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
1947
1948 /// Generate conditional branch and relevant BasicBlocks through which private
1949 /// threads copy the 'copyin' variables from Master copy to threadprivate
1950 /// copies.
1951 ///
1952 /// \param IP insertion block for copyin conditional
1953 /// \param MasterVarPtr a pointer to the master variable
1954 /// \param PrivateVarPtr a pointer to the threadprivate variable
1955 /// \param IntPtrTy Pointer size type
1956 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1957 // and copy.in.end block
1958 ///
1959 /// \returns The insertion point where copying operation to be emitted.
1961 Value *PrivateAddr,
1962 llvm::IntegerType *IntPtrTy,
1963 bool BranchtoEnd = true);
1964
1965 /// Create a runtime call for kmpc_Alloc
1966 ///
1967 /// \param Loc The insert and source location description.
1968 /// \param Size Size of allocated memory space
1969 /// \param Allocator Allocator information instruction
1970 /// \param Name Name of call Instruction for OMP_alloc
1971 ///
1972 /// \returns CallInst to the OMP_Alloc call
1974 Value *Allocator, std::string Name = "");
1975
1976 /// Create a runtime call for kmpc_free
1977 ///
1978 /// \param Loc The insert and source location description.
1979 /// \param Addr Address of memory space to be freed
1980 /// \param Allocator Allocator information instruction
1981 /// \param Name Name of call Instruction for OMP_Free
1982 ///
1983 /// \returns CallInst to the OMP_Free call
1985 Value *Allocator, std::string Name = "");
1986
1987 /// Create a runtime call for kmpc_threadprivate_cached
1988 ///
1989 /// \param Loc The insert and source location description.
1990 /// \param Pointer pointer to data to be cached
1991 /// \param Size size of data to be cached
1992 /// \param Name Name of call Instruction for callinst
1993 ///
1994 /// \returns CallInst to the thread private cache call.
1998 const llvm::Twine &Name = Twine(""));
1999
2000 /// Create a runtime call for __tgt_interop_init
2001 ///
2002 /// \param Loc The insert and source location description.
2003 /// \param InteropVar variable to be allocated
2004 /// \param InteropType type of interop operation
2005 /// \param Device devide to which offloading will occur
2006 /// \param NumDependences number of dependence variables
2007 /// \param DependenceAddress pointer to dependence variables
2008 /// \param HaveNowaitClause does nowait clause exist
2009 ///
2010 /// \returns CallInst to the __tgt_interop_init call
2012 Value *InteropVar,
2013 omp::OMPInteropType InteropType, Value *Device,
2014 Value *NumDependences,
2015 Value *DependenceAddress,
2016 bool HaveNowaitClause);
2017
2018 /// Create a runtime call for __tgt_interop_destroy
2019 ///
2020 /// \param Loc The insert and source location description.
2021 /// \param InteropVar variable to be allocated
2022 /// \param Device devide to which offloading will occur
2023 /// \param NumDependences number of dependence variables
2024 /// \param DependenceAddress pointer to dependence variables
2025 /// \param HaveNowaitClause does nowait clause exist
2026 ///
2027 /// \returns CallInst to the __tgt_interop_destroy call
2029 Value *InteropVar, Value *Device,
2030 Value *NumDependences,
2031 Value *DependenceAddress,
2032 bool HaveNowaitClause);
2033
2034 /// Create a runtime call for __tgt_interop_use
2035 ///
2036 /// \param Loc The insert and source location description.
2037 /// \param InteropVar variable to be allocated
2038 /// \param Device devide to which offloading will occur
2039 /// \param NumDependences number of dependence variables
2040 /// \param DependenceAddress pointer to dependence variables
2041 /// \param HaveNowaitClause does nowait clause exist
2042 ///
2043 /// \returns CallInst to the __tgt_interop_use call
2045 Value *InteropVar, Value *Device,
2046 Value *NumDependences, Value *DependenceAddress,
2047 bool HaveNowaitClause);
2048
2049 /// The `omp target` interface
2050 ///
2051 /// For more information about the usage of this interface,
2052 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2053 ///
2054 ///{
2055
2056 /// Create a runtime call for kmpc_target_init
2057 ///
2058 /// \param Loc The insert and source location description.
2059 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2060 /// \param MinThreads Minimal number of threads, or 0.
2061 /// \param MaxThreads Maximal number of threads, or 0.
2062 /// \param MinTeams Minimal number of teams, or 0.
2063 /// \param MaxTeams Maximal number of teams, or 0.
2064 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
2065 int32_t MinThreadsVal = 0,
2066 int32_t MaxThreadsVal = 0,
2067 int32_t MinTeamsVal = 0,
2068 int32_t MaxTeamsVal = 0);
2069
2070 /// Create a runtime call for kmpc_target_deinit
2071 ///
2072 /// \param Loc The insert and source location description.
2073 /// \param TeamsReductionDataSize The maximal size of all the reduction data
2074 /// for teams reduction.
2075 /// \param TeamsReductionBufferLength The number of elements (each of up to
2076 /// \p TeamsReductionDataSize size), in the teams reduction buffer.
2078 int32_t TeamsReductionDataSize = 0,
2079 int32_t TeamsReductionBufferLength = 1024);
2080
2081 ///}
2082
2083 /// Helpers to read/write kernel annotations from the IR.
2084 ///
2085 ///{
2086
2087 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
2088 /// is set.
2089 static std::pair<int32_t, int32_t>
2090 readThreadBoundsForKernel(const Triple &T, Function &Kernel);
2091 static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
2092 int32_t LB, int32_t UB);
2093
2094 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
2095 /// is set.
2096 static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
2097 Function &Kernel);
2098 static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB,
2099 int32_t UB);
2100 ///}
2101
2102private:
2103 // Sets the function attributes expected for the outlined function
2104 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
2105
2106 // Creates the function ID/Address for the given outlined function.
2107 // In the case of an embedded device function the address of the function is
2108 // used, in the case of a non-offload function a constant is created.
2109 Constant *createOutlinedFunctionID(Function *OutlinedFn,
2110 StringRef EntryFnIDName);
2111
2112 // Creates the region entry address for the outlined function
2113 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2114 StringRef EntryFnName);
2115
2116public:
2117 /// Functions used to generate a function with the given name.
2118 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2119
2120 /// Create a unique name for the entry function using the source location
2121 /// information of the current target region. The name will be something like:
2122 ///
2123 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2124 ///
2125 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2126 /// mangled name of the function that encloses the target region and BB is the
2127 /// line number of the target region. CC is a count added when more than one
2128 /// region is located at the same location.
2129 ///
2130 /// If this target outline function is not an offload entry, we don't need to
2131 /// register it. This may happen if it is guarded by an if clause that is
2132 /// false at compile time, or no target archs have been specified.
2133 ///
2134 /// The created target region ID is used by the runtime library to identify
2135 /// the current target region, so it only has to be unique and not
2136 /// necessarily point to anything. It could be the pointer to the outlined
2137 /// function that implements the target region, but we aren't using that so
2138 /// that the compiler doesn't need to keep that, and could therefore inline
2139 /// the host function if proven worthwhile during optimization. In the other
2140 /// hand, if emitting code for the device, the ID has to be the function
2141 /// address so that it can retrieved from the offloading entry and launched
2142 /// by the runtime library. We also mark the outlined function to have
2143 /// external linkage in case we are emitting code for the device, because
2144 /// these functions will be entry points to the device.
2145 ///
2146 /// \param InfoManager The info manager keeping track of the offload entries
2147 /// \param EntryInfo The entry information about the function
2148 /// \param GenerateFunctionCallback The callback function to generate the code
2149 /// \param OutlinedFunction Pointer to the outlined function
2150 /// \param EntryFnIDName Name of the ID o be created
2152 FunctionGenCallback &GenerateFunctionCallback,
2153 bool IsOffloadEntry, Function *&OutlinedFn,
2154 Constant *&OutlinedFnID);
2155
2156 /// Registers the given function and sets up the attribtues of the function
2157 /// Returns the FunctionID.
2158 ///
2159 /// \param InfoManager The info manager keeping track of the offload entries
2160 /// \param EntryInfo The entry information about the function
2161 /// \param OutlinedFunction Pointer to the outlined function
2162 /// \param EntryFnName Name of the outlined function
2163 /// \param EntryFnIDName Name of the ID o be created
2165 Function *OutlinedFunction,
2166 StringRef EntryFnName,
2167 StringRef EntryFnIDName);
2168
2169 /// Type of BodyGen to use for region codegen
2170 ///
2171 /// Priv: If device pointer privatization is required, emit the body of the
2172 /// region here. It will have to be duplicated: with and without
2173 /// privatization.
2174 /// DupNoPriv: If we need device pointer privatization, we need
2175 /// to emit the body of the region with no privatization in the 'else' branch
2176 /// of the conditional.
2177 /// NoPriv: If we don't require privatization of device
2178 /// pointers, we emit the body in between the runtime calls. This avoids
2179 /// duplicating the body code.
2181
2182 /// Callback type for creating the map infos for the kernel parameters.
2183 /// \param CodeGenIP is the insertion point where code should be generated,
2184 /// if any.
2187
2188 /// Generator for '#omp target data'
2189 ///
2190 /// \param Loc The location where the target data construct was encountered.
2191 /// \param AllocaIP The insertion points to be used for alloca instructions.
2192 /// \param CodeGenIP The insertion point at which the target directive code
2193 /// should be placed.
2194 /// \param IsBegin If true then emits begin mapper call otherwise emits
2195 /// end mapper call.
2196 /// \param DeviceID Stores the DeviceID from the device clause.
2197 /// \param IfCond Value which corresponds to the if clause condition.
2198 /// \param Info Stores all information realted to the Target Data directive.
2199 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2200 /// \param BodyGenCB Optional Callback to generate the region code.
2201 /// \param DeviceAddrCB Optional callback to generate code related to
2202 /// use_device_ptr and use_device_addr.
2203 /// \param CustomMapperCB Optional callback to generate code related to
2204 /// custom mappers.
2206 const LocationDescription &Loc, InsertPointTy AllocaIP,
2207 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2209 omp::RuntimeFunction *MapperFunc = nullptr,
2211 BodyGenTy BodyGenType)>
2212 BodyGenCB = nullptr,
2213 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2214 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2215 Value *SrcLocInfo = nullptr);
2216
2218 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2219
2221 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
2222 InsertPointTy CodeGenIP)>;
2223
2224 /// Generator for '#omp target'
2225 ///
2226 /// \param Loc where the target data construct was encountered.
2227 /// \param CodeGenIP The insertion point where the call to the outlined
2228 /// function should be emitted.
2229 /// \param EntryInfo The entry information about the function.
2230 /// \param NumTeams Number of teams specified in the num_teams clause.
2231 /// \param NumThreads Number of teams specified in the thread_limit clause.
2232 /// \param Inputs The input values to the region that will be passed.
2233 /// as arguments to the outlined function.
2234 /// \param BodyGenCB Callback that will generate the region code.
2235 /// \param ArgAccessorFuncCB Callback that will generate accessors
2236 /// instructions for passed in target arguments where neccessary
2240 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2241 int32_t NumThreads,
2243 GenMapInfoCallbackTy GenMapInfoCB,
2244 TargetBodyGenCallbackTy BodyGenCB,
2245 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB);
2246
2247 /// Returns __kmpc_for_static_init_* runtime function for the specified
2248 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2249 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2250 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2251 bool IsGPUDistribute);
2252
2253 /// Returns __kmpc_dispatch_init_* runtime function for the specified
2254 /// size \a IVSize and sign \a IVSigned.
2255 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2256
2257 /// Returns __kmpc_dispatch_next_* runtime function for the specified
2258 /// size \a IVSize and sign \a IVSigned.
2259 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2260
2261 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2262 /// size \a IVSize and sign \a IVSigned.
2263 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2264
2265 /// Declarations for LLVM-IR types (simple, array, function and structure) are
2266 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2267 /// we provide the declarations, the initializeTypes function will provide the
2268 /// values.
2269 ///
2270 ///{
2271#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2272#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2273 ArrayType *VarName##Ty = nullptr; \
2274 PointerType *VarName##PtrTy = nullptr;
2275#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2276 FunctionType *VarName = nullptr; \
2277 PointerType *VarName##Ptr = nullptr;
2278#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
2279 StructType *VarName = nullptr; \
2280 PointerType *VarName##Ptr = nullptr;
2281#include "llvm/Frontend/OpenMP/OMPKinds.def"
2282
2283 ///}
2284
2285private:
2286 /// Create all simple and struct types exposed by the runtime and remember
2287 /// the llvm::PointerTypes of them for easy access later.
2288 void initializeTypes(Module &M);
2289
2290 /// Common interface for generating entry calls for OMP Directives.
2291 /// if the directive has a region/body, It will set the insertion
2292 /// point to the body
2293 ///
2294 /// \param OMPD Directive to generate entry blocks for
2295 /// \param EntryCall Call to the entry OMP Runtime Function
2296 /// \param ExitBB block where the region ends.
2297 /// \param Conditional indicate if the entry call result will be used
2298 /// to evaluate a conditional of whether a thread will execute
2299 /// body code or not.
2300 ///
2301 /// \return The insertion position in exit block
2302 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2303 BasicBlock *ExitBB,
2304 bool Conditional = false);
2305
2306 /// Common interface to finalize the region
2307 ///
2308 /// \param OMPD Directive to generate exiting code for
2309 /// \param FinIP Insertion point for emitting Finalization code and exit call
2310 /// \param ExitCall Call to the ending OMP Runtime Function
2311 /// \param HasFinalize indicate if the directive will require finalization
2312 /// and has a finalization callback in the stack that
2313 /// should be called.
2314 ///
2315 /// \return The insertion position in exit block
2316 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2317 InsertPointTy FinIP,
2318 Instruction *ExitCall,
2319 bool HasFinalize = true);
2320
2321 /// Common Interface to generate OMP inlined regions
2322 ///
2323 /// \param OMPD Directive to generate inlined region for
2324 /// \param EntryCall Call to the entry OMP Runtime Function
2325 /// \param ExitCall Call to the ending OMP Runtime Function
2326 /// \param BodyGenCB Body code generation callback.
2327 /// \param FiniCB Finalization Callback. Will be called when finalizing region
2328 /// \param Conditional indicate if the entry call result will be used
2329 /// to evaluate a conditional of whether a thread will execute
2330 /// body code or not.
2331 /// \param HasFinalize indicate if the directive will require finalization
2332 /// and has a finalization callback in the stack that
2333 /// should be called.
2334 /// \param IsCancellable if HasFinalize is set to true, indicate if the
2335 /// the directive should be cancellable.
2336 /// \return The insertion point after the region
2337
2339 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2340 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2341 FinalizeCallbackTy FiniCB, bool Conditional = false,
2342 bool HasFinalize = true, bool IsCancellable = false);
2343
2344 /// Get the platform-specific name separator.
2345 /// \param Parts different parts of the final name that needs separation
2346 /// \param FirstSeparator First separator used between the initial two
2347 /// parts of the name.
2348 /// \param Separator separator used between all of the rest consecutive
2349 /// parts of the name
2350 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2351 StringRef FirstSeparator,
2352 StringRef Separator);
2353
2354 /// Returns corresponding lock object for the specified critical region
2355 /// name. If the lock object does not exist it is created, otherwise the
2356 /// reference to the existing copy is returned.
2357 /// \param CriticalName Name of the critical region.
2358 ///
2359 Value *getOMPCriticalRegionLock(StringRef CriticalName);
2360
2361 /// Callback type for Atomic Expression update
2362 /// ex:
2363 /// \code{.cpp}
2364 /// unsigned x = 0;
2365 /// #pragma omp atomic update
2366 /// x = Expr(x_old); //Expr() is any legal operation
2367 /// \endcode
2368 ///
2369 /// \param XOld the value of the atomic memory address to use for update
2370 /// \param IRB reference to the IRBuilder to use
2371 ///
2372 /// \returns Value to update X to.
2373 using AtomicUpdateCallbackTy =
2374 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2375
2376private:
2377 enum AtomicKind { Read, Write, Update, Capture, Compare };
2378
2379 /// Determine whether to emit flush or not
2380 ///
2381 /// \param Loc The insert and source location description.
2382 /// \param AO The required atomic ordering
2383 /// \param AK The OpenMP atomic operation kind used.
2384 ///
2385 /// \returns wether a flush was emitted or not
2386 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2387 AtomicOrdering AO, AtomicKind AK);
2388
2389 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2390 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2391 /// Only Scalar data types.
2392 ///
2393 /// \param AllocaIP The insertion point to be used for alloca
2394 /// instructions.
2395 /// \param X The target atomic pointer to be updated
2396 /// \param XElemTy The element type of the atomic pointer.
2397 /// \param Expr The value to update X with.
2398 /// \param AO Atomic ordering of the generated atomic
2399 /// instructions.
2400 /// \param RMWOp The binary operation used for update. If
2401 /// operation is not supported by atomicRMW,
2402 /// or belong to {FADD, FSUB, BAD_BINOP}.
2403 /// Then a `cmpExch` based atomic will be generated.
2404 /// \param UpdateOp Code generator for complex expressions that cannot be
2405 /// expressed through atomicrmw instruction.
2406 /// \param VolatileX true if \a X volatile?
2407 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2408 /// update expression, false otherwise.
2409 /// (e.g. true for X = X BinOp Expr)
2410 ///
2411 /// \returns A pair of the old value of X before the update, and the value
2412 /// used for the update.
2413 std::pair<Value *, Value *>
2414 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2416 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2417 bool IsXBinopExpr);
2418
2419 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2420 ///
2421 /// \Return The instruction
2422 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2423 AtomicRMWInst::BinOp RMWOp);
2424
2425public:
2426 /// a struct to pack relevant information while generating atomic Ops
2428 Value *Var = nullptr;
2429 Type *ElemTy = nullptr;
2430 bool IsSigned = false;
2431 bool IsVolatile = false;
2432 };
2433
2434 /// Emit atomic Read for : V = X --- Only Scalar data types.
2435 ///
2436 /// \param Loc The insert and source location description.
2437 /// \param X The target pointer to be atomically read
2438 /// \param V Memory address where to store atomically read
2439 /// value
2440 /// \param AO Atomic ordering of the generated atomic
2441 /// instructions.
2442 ///
2443 /// \return Insertion point after generated atomic read IR.
2446 AtomicOrdering AO);
2447
2448 /// Emit atomic write for : X = Expr --- Only Scalar data types.
2449 ///
2450 /// \param Loc The insert and source location description.
2451 /// \param X The target pointer to be atomically written to
2452 /// \param Expr The value to store.
2453 /// \param AO Atomic ordering of the generated atomic
2454 /// instructions.
2455 ///
2456 /// \return Insertion point after generated atomic Write IR.
2458 AtomicOpValue &X, Value *Expr,
2459 AtomicOrdering AO);
2460
2461 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2462 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2463 /// Only Scalar data types.
2464 ///
2465 /// \param Loc The insert and source location description.
2466 /// \param AllocaIP The insertion point to be used for alloca instructions.
2467 /// \param X The target atomic pointer to be updated
2468 /// \param Expr The value to update X with.
2469 /// \param AO Atomic ordering of the generated atomic instructions.
2470 /// \param RMWOp The binary operation used for update. If operation
2471 /// is not supported by atomicRMW, or belong to
2472 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2473 /// atomic will be generated.
2474 /// \param UpdateOp Code generator for complex expressions that cannot be
2475 /// expressed through atomicrmw instruction.
2476 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2477 /// update expression, false otherwise.
2478 /// (e.g. true for X = X BinOp Expr)
2479 ///
2480 /// \return Insertion point after generated atomic update IR.
2482 InsertPointTy AllocaIP, AtomicOpValue &X,
2483 Value *Expr, AtomicOrdering AO,
2485 AtomicUpdateCallbackTy &UpdateOp,
2486 bool IsXBinopExpr);
2487
2488 /// Emit atomic update for constructs: --- Only Scalar data types
2489 /// V = X; X = X BinOp Expr ,
2490 /// X = X BinOp Expr; V = X,
2491 /// V = X; X = Expr BinOp X,
2492 /// X = Expr BinOp X; V = X,
2493 /// V = X; X = UpdateOp(X),
2494 /// X = UpdateOp(X); V = X,
2495 ///
2496 /// \param Loc The insert and source location description.
2497 /// \param AllocaIP The insertion point to be used for alloca instructions.
2498 /// \param X The target atomic pointer to be updated
2499 /// \param V Memory address where to store captured value
2500 /// \param Expr The value to update X with.
2501 /// \param AO Atomic ordering of the generated atomic instructions
2502 /// \param RMWOp The binary operation used for update. If
2503 /// operation is not supported by atomicRMW, or belong to
2504 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2505 /// atomic will be generated.
2506 /// \param UpdateOp Code generator for complex expressions that cannot be
2507 /// expressed through atomicrmw instruction.
2508 /// \param UpdateExpr true if X is an in place update of the form
2509 /// X = X BinOp Expr or X = Expr BinOp X
2510 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2511 /// update expression, false otherwise.
2512 /// (e.g. true for X = X BinOp Expr)
2513 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2514 /// 'v', not an updated one.
2515 ///
2516 /// \return Insertion point after generated atomic capture IR.
2519 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2521 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2522 bool IsPostfixUpdate, bool IsXBinopExpr);
2523
2524 /// Emit atomic compare for constructs: --- Only scalar data types
2525 /// cond-expr-stmt:
2526 /// x = x ordop expr ? expr : x;
2527 /// x = expr ordop x ? expr : x;
2528 /// x = x == e ? d : x;
2529 /// x = e == x ? d : x; (this one is not in the spec)
2530 /// cond-update-stmt:
2531 /// if (x ordop expr) { x = expr; }
2532 /// if (expr ordop x) { x = expr; }
2533 /// if (x == e) { x = d; }
2534 /// if (e == x) { x = d; } (this one is not in the spec)
2535 /// conditional-update-capture-atomic:
2536 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2537 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2538 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2539 /// IsFailOnly=true)
2540 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2541 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2542 /// IsFailOnly=true)
2543 ///
2544 /// \param Loc The insert and source location description.
2545 /// \param X The target atomic pointer to be updated.
2546 /// \param V Memory address where to store captured value (for
2547 /// compare capture only).
2548 /// \param R Memory address where to store comparison result
2549 /// (for compare capture with '==' only).
2550 /// \param E The expected value ('e') for forms that use an
2551 /// equality comparison or an expression ('expr') for
2552 /// forms that use 'ordop' (logically an atomic maximum or
2553 /// minimum).
2554 /// \param D The desired value for forms that use an equality
2555 /// comparison. If forms that use 'ordop', it should be
2556 /// \p nullptr.
2557 /// \param AO Atomic ordering of the generated atomic instructions.
2558 /// \param Op Atomic compare operation. It can only be ==, <, or >.
2559 /// \param IsXBinopExpr True if the conditional statement is in the form where
2560 /// x is on LHS. It only matters for < or >.
2561 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
2562 /// 'v', not an updated one (for compare capture
2563 /// only).
2564 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
2565 /// only when the comparison fails. This is only valid for
2566 /// the case the comparison is '=='.
2567 ///
2568 /// \return Insertion point after generated atomic capture IR.
2573 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2576 AtomicOpValue &R, Value *E, Value *D,
2577 AtomicOrdering AO,
2579 bool IsXBinopExpr, bool IsPostfixUpdate,
2580 bool IsFailOnly, AtomicOrdering Failure);
2581
2582 /// Create the control flow structure of a canonical OpenMP loop.
2583 ///
2584 /// The emitted loop will be disconnected, i.e. no edge to the loop's
2585 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2586 /// IRBuilder location is not preserved.
2587 ///
2588 /// \param DL DebugLoc used for the instructions in the skeleton.
2589 /// \param TripCount Value to be used for the trip count.
2590 /// \param F Function in which to insert the BasicBlocks.
2591 /// \param PreInsertBefore Where to insert BBs that execute before the body,
2592 /// typically the body itself.
2593 /// \param PostInsertBefore Where to insert BBs that execute after the body.
2594 /// \param Name Base name used to derive BB
2595 /// and instruction names.
2596 ///
2597 /// \returns The CanonicalLoopInfo that represents the emitted loop.
2599 Function *F,
2600 BasicBlock *PreInsertBefore,
2601 BasicBlock *PostInsertBefore,
2602 const Twine &Name = {});
2603 /// OMP Offload Info Metadata name string
2604 const std::string ompOffloadInfoName = "omp_offload.info";
2605
2606 /// Loads all the offload entries information from the host IR
2607 /// metadata. This function is only meant to be used with device code
2608 /// generation.
2609 ///
2610 /// \param M Module to load Metadata info from. Module passed maybe
2611 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2613
2614 /// Loads all the offload entries information from the host IR
2615 /// metadata read from the file passed in as the HostFilePath argument. This
2616 /// function is only meant to be used with device code generation.
2617 ///
2618 /// \param HostFilePath The path to the host IR file,
2619 /// used to load in offload metadata for the device, allowing host and device
2620 /// to maintain the same metadata mapping.
2621 void loadOffloadInfoMetadata(StringRef HostFilePath);
2622
2623 /// Gets (if variable with the given name already exist) or creates
2624 /// internal global variable with the specified Name. The created variable has
2625 /// linkage CommonLinkage by default and is initialized by null value.
2626 /// \param Ty Type of the global variable. If it is exist already the type
2627 /// must be the same.
2628 /// \param Name Name of the variable.
2630 unsigned AddressSpace = 0);
2631
2632 /// Create a global function to register OpenMP requires flags into the
2633 /// runtime, according to the `Config`.
2634 ///
2635 /// This function should be added to the list of constructors of the
2636 /// compilation unit in order to be called before other OpenMP runtime
2637 /// functions.
2638 ///
2639 /// \param Name Name of the created function.
2641};
2642
2643/// Class to represented the control flow structure of an OpenMP canonical loop.
2644///
2645/// The control-flow structure is standardized for easy consumption by
2646/// directives associated with loops. For instance, the worksharing-loop
2647/// construct may change this control flow such that each loop iteration is
2648/// executed on only one thread. The constraints of a canonical loop in brief
2649/// are:
2650///
2651/// * The number of loop iterations must have been computed before entering the
2652/// loop.
2653///
2654/// * Has an (unsigned) logical induction variable that starts at zero and
2655/// increments by one.
2656///
2657/// * The loop's CFG itself has no side-effects. The OpenMP specification
2658/// itself allows side-effects, but the order in which they happen, including
2659/// how often or whether at all, is unspecified. We expect that the frontend
2660/// will emit those side-effect instructions somewhere (e.g. before the loop)
2661/// such that the CanonicalLoopInfo itself can be side-effect free.
2662///
2663/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2664/// execution of a loop body that satifies these constraints. It does NOT
2665/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2666/// CanonicalLoopInfo for such purposes.
2667///
2668/// The control flow can be described as follows:
2669///
2670/// Preheader
2671/// |
2672/// /-> Header
2673/// | |
2674/// | Cond---\
2675/// | | |
2676/// | Body |
2677/// | | | |
2678/// | <...> |
2679/// | | | |
2680/// \--Latch |
2681/// |
2682/// Exit
2683/// |
2684/// After
2685///
2686/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2687/// including) and end at AfterIP (at the After's first instruction, excluding).
2688/// That is, instructions in the Preheader and After blocks (except the
2689/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2690/// side-effects. Typically, the Preheader is used to compute the loop's trip
2691/// count. The instructions from BodyIP (at the Body block's first instruction,
2692/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2693/// control and thus can have side-effects. The body block is the single entry
2694/// point into the loop body, which may contain arbitrary control flow as long
2695/// as all control paths eventually branch to the Latch block.
2696///
2697/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2698/// Latch to guarantee that there is only a single edge to the latch. It would
2699/// make loop transformations easier to not needing to consider multiple
2700/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2701/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2702/// executes after each body iteration.
2703///
2704/// There must be no loop-carried dependencies through llvm::Values. This is
2705/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2706/// for the induction variable.
2707///
2708/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2709/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2710/// by assertOK(). They are expected to not be modified unless explicitly
2711/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2712/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2713/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2714/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2715/// anymore as its underlying control flow may not exist anymore.
2716/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2717/// may also return a new CanonicalLoopInfo that can be passed to other
2718/// loop-associated construct implementing methods. These loop-transforming
2719/// methods may either create a new CanonicalLoopInfo usually using
2720/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2721/// modify one of the input CanonicalLoopInfo and return it as representing the
2722/// modified loop. What is done is an implementation detail of
2723/// transformation-implementing method and callers should always assume that the
2724/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2725/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2726/// created by createCanonicalLoop, such that transforming methods do not have
2727/// to special case where the CanonicalLoopInfo originated from.
2728///
2729/// Generally, methods consuming CanonicalLoopInfo do not need an
2730/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2731/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2732/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2733/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2734/// any InsertPoint in the Preheader, After or Block can still be used after
2735/// calling such a method.
2736///
2737/// TODO: Provide mechanisms for exception handling and cancellation points.
2738///
2739/// Defined outside OpenMPIRBuilder because nested classes cannot be
2740/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2742 friend class OpenMPIRBuilder;
2743
2744private:
2745 BasicBlock *Header = nullptr;
2746 BasicBlock *Cond = nullptr;
2747 BasicBlock *Latch = nullptr;
2748 BasicBlock *Exit = nullptr;
2749
2750 /// Add the control blocks of this loop to \p BBs.
2751 ///
2752 /// This does not include any block from the body, including the one returned
2753 /// by getBody().
2754 ///
2755 /// FIXME: This currently includes the Preheader and After blocks even though
2756 /// their content is (mostly) not under CanonicalLoopInfo's control.
2757 /// Re-evaluated whether this makes sense.
2758 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2759
2760 /// Sets the number of loop iterations to the given value. This value must be
2761 /// valid in the condition block (i.e., defined in the preheader) and is
2762 /// interpreted as an unsigned integer.
2763 void setTripCount(Value *TripCount);
2764
2765 /// Replace all uses of the canonical induction variable in the loop body with
2766 /// a new one.
2767 ///
2768 /// The intended use case is to update the induction variable for an updated
2769 /// iteration space such that it can stay normalized in the 0...tripcount-1
2770 /// range.
2771 ///
2772 /// The \p Updater is called with the (presumable updated) current normalized
2773 /// induction variable and is expected to return the value that uses of the
2774 /// pre-updated induction values should use instead, typically dependent on
2775 /// the new induction variable. This is a lambda (instead of e.g. just passing
2776 /// the new value) to be able to distinguish the uses of the pre-updated
2777 /// induction variable and uses of the induction varible to compute the
2778 /// updated induction variable value.
2779 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2780
2781public:
2782 /// Returns whether this object currently represents the IR of a loop. If
2783 /// returning false, it may have been consumed by a loop transformation or not
2784 /// been intialized. Do not use in this case;
2785 bool isValid() const { return Header; }
2786
2787 /// The preheader ensures that there is only a single edge entering the loop.
2788 /// Code that must be execute before any loop iteration can be emitted here,
2789 /// such as computing the loop trip count and begin lifetime markers. Code in
2790 /// the preheader is not considered part of the canonical loop.
2791 BasicBlock *getPreheader() const;
2792
2793 /// The header is the entry for each iteration. In the canonical control flow,
2794 /// it only contains the PHINode for the induction variable.
2796 assert(isValid() && "Requires a valid canonical loop");
2797 return Header;
2798 }
2799
2800 /// The condition block computes whether there is another loop iteration. If
2801 /// yes, branches to the body; otherwise to the exit block.
2803 assert(isValid() && "Requires a valid canonical loop");
2804 return Cond;
2805 }
2806
2807 /// The body block is the single entry for a loop iteration and not controlled
2808 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2809 /// eventually branch to the \p Latch block.
2811 assert(isValid() && "Requires a valid canonical loop");
2812 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2813 }
2814
2815 /// Reaching the latch indicates the end of the loop body code. In the
2816 /// canonical control flow, it only contains the increment of the induction
2817 /// variable.
2819 assert(isValid() && "Requires a valid canonical loop");
2820 return Latch;
2821 }
2822
2823 /// Reaching the exit indicates no more iterations are being executed.
2825 assert(isValid() && "Requires a valid canonical loop");
2826 return Exit;
2827 }
2828
2829 /// The after block is intended for clean-up code such as lifetime end
2830 /// markers. It is separate from the exit block to ensure, analogous to the
2831 /// preheader, it having just a single entry edge and being free from PHI
2832 /// nodes should there be multiple loop exits (such as from break
2833 /// statements/cancellations).
2835 assert(isValid() && "Requires a valid canonical loop");
2836 return Exit->getSingleSuccessor();
2837 }
2838
2839 /// Returns the llvm::Value containing the number of loop iterations. It must
2840 /// be valid in the preheader and always interpreted as an unsigned integer of
2841 /// any bit-width.
2843 assert(isValid() && "Requires a valid canonical loop");
2844 Instruction *CmpI = &Cond->front();
2845 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2846 return CmpI->getOperand(1);
2847 }
2848
2849 /// Returns the instruction representing the current logical induction
2850 /// variable. Always unsigned, always starting at 0 with an increment of one.
2852 assert(isValid() && "Requires a valid canonical loop");
2853 Instruction *IndVarPHI = &Header->front();
2854 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2855 return IndVarPHI;
2856 }
2857
2858 /// Return the type of the induction variable (and the trip count).
2860 assert(isValid() && "Requires a valid canonical loop");
2861 return getIndVar()->getType();
2862 }
2863
2864 /// Return the insertion point for user code before the loop.
2866 assert(isValid() && "Requires a valid canonical loop");
2867 BasicBlock *Preheader = getPreheader();
2868 return {Preheader, std::prev(Preheader->end())};
2869 };
2870
2871 /// Return the insertion point for user code in the body.
2873 assert(isValid() && "Requires a valid canonical loop");
2874 BasicBlock *Body = getBody();
2875 return {Body, Body->begin()};
2876 };
2877
2878 /// Return the insertion point for user code after the loop.
2880 assert(isValid() && "Requires a valid canonical loop");
2882 return {After, After->begin()};
2883 };
2884
2886 assert(isValid() && "Requires a valid canonical loop");
2887 return Header->getParent();
2888 }
2889
2890 /// Consistency self-check.
2891 void assertOK() const;
2892
2893 /// Invalidate this loop. That is, the underlying IR does not fulfill the
2894 /// requirements of an OpenMP canonical loop anymore.
2895 void invalidate();
2896};
2897
2898} // end namespace llvm
2899
2900#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
arc branch finalize
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
This file defines constans and helpers used when dealing with OpenMP.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:739
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:451
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:214
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:251
BasicBlock * getBlock() const
Definition: IRBuilder.h:266
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:271
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:283
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Class to represent integer types.
Definition: DerivedTypes.h:40
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
Definition: OMPIRBuilder.h:373
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage, const std::string &VarName)
Definition: OMPIRBuilder.h:376
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:391
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:300
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Definition: OMPIRBuilder.h:287
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:221
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:223
OffloadingEntryInfoKinds getKind() const
Definition: OMPIRBuilder.h:239
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
Definition: OMPIRBuilder.h:230
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:247
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Definition: OMPIRBuilder.h:231
Class that manages information about offload code regions and data.
Definition: OMPIRBuilder.h:209
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:413
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
Definition: OMPIRBuilder.h:352
@ OMPTargetDeviceClauseNoHost
The target is marked for non-host devices.
Definition: OMPIRBuilder.h:356
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
Definition: OMPIRBuilder.h:354
@ OMPTargetDeviceClauseNone
The target is marked as having no clause.
Definition: OMPIRBuilder.h:360
@ OMPTargetDeviceClauseHost
The target is marked for host devices.
Definition: OMPIRBuilder.h:358
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:274
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:276
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
Definition: OMPIRBuilder.h:267
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:265
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:334
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
Definition: OMPIRBuilder.h:340
@ OMPTargetGlobalVarEntryNone
Mark the entry as having no declare target entry kind.
Definition: OMPIRBuilder.h:342
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
Definition: OMPIRBuilder.h:344
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:338
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:336
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:325
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:408
bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
Definition: OMPIRBuilder.h:84
void setIsGPU(bool Value)
Definition: OMPIRBuilder.h:166
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
Definition: OMPIRBuilder.h:90
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
Definition: OMPIRBuilder.h:100
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
Definition: OMPIRBuilder.h:106
StringRef separator() const
Definition: OMPIRBuilder.h:157
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
void setFirstSeparator(StringRef FS)
Definition: OMPIRBuilder.h:168
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:147
std::optional< bool > OpenMPOffloadMandatory
Definition: OMPIRBuilder.h:103
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
Definition: OMPIRBuilder.h:108
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:129
void setHasRequiresUnifiedAddress(bool Value)
void setOpenMPOffloadMandatory(bool Value)
Definition: OMPIRBuilder.h:167
void setIsTargetDevice(bool Value)
Definition: OMPIRBuilder.h:165
void setSeparator(StringRef S)
Definition: OMPIRBuilder.h:169
void setHasRequiresDynamicAllocators(bool Value)
bool hasRequiresReverseOffload() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
SmallMapVector< const Value *, std::pair< Value *, Value * >, 4 > DevicePtrInfoMap
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:449
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:495
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
function_ref< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
Definition: OMPIRBuilder.h:547
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
std::function< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> StorableBodyGenCallbackTy
Definition: OMPIRBuilder.h:554
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void setConfig(OpenMPIRBuilderConfig C)
Definition: OMPIRBuilder.h:464
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt)
Generator for '#omp single'.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Function * createRegisterRequires(StringRef Name)
Create a global function to register OpenMP requires flags into the runtime, according to the Config.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
Definition: OMPIRBuilder.h:513
InsertPointTy getInsertionPoint()
}
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:475
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB)
Generator for '#omp target'.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
Definition: OMPIRBuilder.h:453
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
Definition: OMPIRBuilder.h:520
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:128
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:277
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
An efficient, type-erasing, non-owning reference to a callable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
Definition: OMPConstants.h:193
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:66
RTLDependenceKindTy
Dependence kind for RTL.
Definition: OMPConstants.h:271
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:46
WorksharingLoopType
A type of worksharing loop construct.
Definition: OMPConstants.h:281
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:265
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
AtomicOrdering
Atomic ordering for LLVM's memory model.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
bool IsCancellable
Flag to indicate if the directive is cancellable.
Definition: OMPIRBuilder.h:507
FinalizeCallbackTy FiniCB
The finalization callback provided by the last in-flight invocation of createXXXX for the directive o...
Definition: OMPIRBuilder.h:500
omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
Definition: OMPIRBuilder.h:504
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:591
LocationDescription(const InsertPointTy &IP)
Definition: OMPIRBuilder.h:594
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
Definition: OMPIRBuilder.h:595
LocationDescription(const IRBuilderBase &IRB)
Definition: OMPIRBuilder.h:592
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
std::function< void(Function &)> PostOutlineCBTy
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, ReductionGenTy ReductionGen, AtomicReductionGenTy AtomicReductionGen)
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, Value *SizesArray, Value *MapTypesArray, Value *MapTypesArrayEnd, Value *MappersArray, Value *MapNamesArray)
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, Value *NumIterations, Value *NumTeams, Value *NumThreads, Value *DynCGGroupMem, bool HasNoWait)
Constructor for TargetKernelArgs.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:183
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Definition: OMPIRBuilder.h:191
bool operator<(const TargetRegionEntryInfo RHS) const
Definition: OMPIRBuilder.h:201