LLVM 18.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
22#include <forward_list>
23#include <map>
24#include <optional>
25
26namespace llvm {
27class CanonicalLoopInfo;
28struct TargetRegionEntryInfo;
29class OffloadEntriesInfoManager;
30class OpenMPIRBuilder;
31
32/// Move the instruction after an InsertPoint to the beginning of another
33/// BasicBlock.
34///
35/// The instructions after \p IP are moved to the beginning of \p New which must
36/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
37/// \p New will be added such that there is no semantic change. Otherwise, the
38/// \p IP insert block remains degenerate and it is up to the caller to insert a
39/// terminator.
40void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
41 bool CreateBranch);
42
43/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
44/// insert location will stick to after the instruction before the insertion
45/// point (instead of moving with the instruction the InsertPoint stores
46/// internally).
47void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
48
49/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
50/// (missing the terminator).
51///
52/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
53/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
54/// is true, a branch to the new successor will new created such that
55/// semantically there is no change; otherwise the block of the insertion point
56/// remains degenerate and it is the caller's responsibility to insert a
57/// terminator. Returns the new successor block.
58BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
59 llvm::Twine Name = {});
60
61/// Split a BasicBlock at \p Builder's insertion point, even if the block is
62/// degenerate (missing the terminator). Its new insert location will stick to
63/// after the instruction before the insertion point (instead of moving with the
64/// instruction the InsertPoint stores internally).
65BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
66 llvm::Twine Name = {});
67
68/// Split a BasicBlock at \p Builder's insertion point, even if the block is
69/// degenerate (missing the terminator). Its new insert location will stick to
70/// after the instruction before the insertion point (instead of moving with the
71/// instruction the InsertPoint stores internally).
72BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
73
74/// Like splitBB, but reuses the current block's name for the new name.
75BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
76 llvm::Twine Suffix = ".split");
77
78/// Captures attributes that affect generating LLVM-IR using the
79/// OpenMPIRBuilder and related classes. Note that not all attributes are
80/// required for all classes or functions. In some use cases the configuration
81/// is not necessary at all, because because the only functions that are called
82/// are ones that are not dependent on the configuration.
84public:
85 /// Flag for specifying if the compilation is done for embedded device code
86 /// or host code.
87 std::optional<bool> IsTargetDevice;
88
89 /// Flag for specifying if the compilation is done for an accelerator.
90 std::optional<bool> IsGPU;
91
92 // Flag for specifying if offloading is mandatory.
93 std::optional<bool> OpenMPOffloadMandatory;
94
95 /// First separator used between the initial two parts of a name.
96 std::optional<StringRef> FirstSeparator;
97 /// Separator used between all of the rest consecutive parts of s name
98 std::optional<StringRef> Separator;
99
103 bool HasRequiresReverseOffload,
104 bool HasRequiresUnifiedAddress,
105 bool HasRequiresUnifiedSharedMemory,
106 bool HasRequiresDynamicAllocators);
107
108 // Getters functions that assert if the required values are not present.
109 bool isTargetDevice() const {
110 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
111 return *IsTargetDevice;
112 }
113
114 bool isGPU() const {
115 assert(IsGPU.has_value() && "IsGPU is not set");
116 return *IsGPU;
117 }
118
120 assert(OpenMPOffloadMandatory.has_value() &&
121 "OpenMPOffloadMandatory is not set");
123 }
124
125 bool hasRequiresFlags() const { return RequiresFlags; }
126 bool hasRequiresReverseOffload() const;
127 bool hasRequiresUnifiedAddress() const;
129 bool hasRequiresDynamicAllocators() const;
130
131 /// Returns requires directive clauses as flags compatible with those expected
132 /// by libomptarget.
133 int64_t getRequiresFlags() const;
134
135 // Returns the FirstSeparator if set, otherwise use the default separator
136 // depending on isGPU
138 if (FirstSeparator.has_value())
139 return *FirstSeparator;
140 if (isGPU())
141 return "_";
142 return ".";
143 }
144
145 // Returns the Separator if set, otherwise use the default separator depending
146 // on isGPU
148 if (Separator.has_value())
149 return *Separator;
150 if (isGPU())
151 return "$";
152 return ".";
153 }
154
156 void setIsGPU(bool Value) { IsGPU = Value; }
160
165
166private:
167 /// Flags for specifying which requires directive clauses are present.
168 int64_t RequiresFlags;
169};
170
171/// Data structure to contain the information needed to uniquely identify
172/// a target entry.
174 std::string ParentName;
175 unsigned DeviceID;
176 unsigned FileID;
177 unsigned Line;
178 unsigned Count;
179
182 unsigned FileID, unsigned Line, unsigned Count = 0)
184 Count(Count) {}
185
188 unsigned DeviceID, unsigned FileID,
189 unsigned Line, unsigned Count);
190
192 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
193 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
194 RHS.Count);
195 }
196};
197
198/// Class that manages information about offload code regions and data
200 /// Number of entries registered so far.
201 OpenMPIRBuilder *OMPBuilder;
202 unsigned OffloadingEntriesNum = 0;
203
204public:
205 /// Base class of the entries info.
207 public:
208 /// Kind of a given entry.
209 enum OffloadingEntryInfoKinds : unsigned {
210 /// Entry is a target region.
212 /// Entry is a declare target variable.
214 /// Invalid entry info.
216 };
217
218 protected:
220 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
221 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
222 uint32_t Flags)
223 : Flags(Flags), Order(Order), Kind(Kind) {}
224 ~OffloadEntryInfo() = default;
225
226 public:
227 bool isValid() const { return Order != ~0u; }
228 unsigned getOrder() const { return Order; }
229 OffloadingEntryInfoKinds getKind() const { return Kind; }
230 uint32_t getFlags() const { return Flags; }
231 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
232 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
234 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
235 Addr = V;
236 }
237 static bool classof(const OffloadEntryInfo *Info) { return true; }
238
239 private:
240 /// Address of the entity that has to be mapped for offloading.
241 WeakTrackingVH Addr;
242
243 /// Flags associated with the device global.
244 uint32_t Flags = 0u;
245
246 /// Order this entry was emitted.
247 unsigned Order = ~0u;
248
250 };
251
252 /// Return true if a there are no entries defined.
253 bool empty() const;
254 /// Return number of entries defined so far.
255 unsigned size() const { return OffloadingEntriesNum; }
256
258
259 //
260 // Target region entries related.
261 //
262
263 /// Kind of the target registry entry.
265 /// Mark the entry as target region.
267 /// Mark the entry as a global constructor.
269 /// Mark the entry as a global destructor.
271 };
272
273 /// Target region entries info.
275 /// Address that can be used as the ID of the entry.
276 Constant *ID = nullptr;
277
278 public:
281 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
282 Constant *ID,
285 ID(ID) {
287 }
288
289 Constant *getID() const { return ID; }
290 void setID(Constant *V) {
291 assert(!ID && "ID has been set before!");
292 ID = V;
293 }
294 static bool classof(const OffloadEntryInfo *Info) {
295 return Info->getKind() == OffloadingEntryInfoTargetRegion;
296 }
297 };
298
299 /// Initialize target region entry.
300 /// This is ONLY needed for DEVICE compilation.
302 unsigned Order);
303 /// Register target region entry.
307 /// Return true if a target region entry with the provided information
308 /// exists.
310 bool IgnoreAddressId = false) const;
311
312 // Return the Name based on \a EntryInfo using the next available Count.
314 const TargetRegionEntryInfo &EntryInfo);
315
316 /// brief Applies action \a Action on all registered entries.
317 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
318 const OffloadEntryInfoTargetRegion &)>
320 void
322
323 //
324 // Device global variable entries related.
325 //
326
327 /// Kind of the global variable entry..
329 /// Mark the entry as a to declare target.
331 /// Mark the entry as a to declare target link.
333 /// Mark the entry as a declare target enter.
335 /// Mark the entry as having no declare target entry kind.
337 /// Mark the entry as a declare target indirect global.
339 };
340
341 /// Kind of device clause for declare target variables
342 /// and functions
343 /// NOTE: Currently not used as a part of a variable entry
344 /// used for Flang and Clang to interface with the variable
345 /// related registration functions
347 /// The target is marked for all devices
349 /// The target is marked for non-host devices
351 /// The target is marked for host devices
353 /// The target is marked as having no clause
355 };
356
357 /// Device global variable entries info.
359 /// Type of the global variable.
360 int64_t VarSize;
362 const std::string VarName;
363
364 public:
367 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
370 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
371 int64_t VarSize,
374 const std::string &VarName)
376 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
378 }
379
380 int64_t getVarSize() const { return VarSize; }
381 StringRef getVarName() const { return VarName; }
382 void setVarSize(int64_t Size) { VarSize = Size; }
383 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
384 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
385 static bool classof(const OffloadEntryInfo *Info) {
386 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
387 }
388 };
389
390 /// Initialize device global variable entry.
391 /// This is ONLY used for DEVICE compilation.
394 unsigned Order);
395
396 /// Register device global variable entry.
398 int64_t VarSize,
401 /// Checks if the variable with the given name has been registered already.
403 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
404 }
405 /// Applies action \a Action on all registered entries.
406 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
410
411private:
412 /// Return the count of entries at a particular source location.
413 unsigned
414 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
415
416 /// Update the count of entries at a particular source location.
417 void
418 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
419
421 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
422 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
423 EntryInfo.FileID, EntryInfo.Line, 0);
424 }
425
426 // Count of entries at a location.
427 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
428
429 // Storage for target region entries kind.
430 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
431 OffloadEntriesTargetRegionTy;
432 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
433 /// Storage for device global variable entries kind. The storage is to be
434 /// indexed by mangled name.
436 OffloadEntriesDeviceGlobalVarTy;
437 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
438};
439
440/// An interface to create LLVM-IR for OpenMP directives.
441///
442/// Each OpenMP directive has a corresponding public generator method.
444public:
445 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
446 /// not have an effect on \p M (see initialize)
448 : M(M), Builder(M.getContext()), OffloadInfoManager(this) {}
450
451 /// Initialize the internal state, this will put structures types and
452 /// potentially other helpers into the underlying module. Must be called
453 /// before any other method and only once! This internal state includes types
454 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
455 void initialize();
456
458
459 /// Finalize the underlying module, e.g., by outlining regions.
460 /// \param Fn The function to be finalized. If not used,
461 /// all functions are finalized.
462 void finalize(Function *Fn = nullptr);
463
464 /// Add attributes known for \p FnID to \p Fn.
466
467 /// Type used throughout for insertion points.
469
470 /// Get the create a name using the platform specific separators.
471 /// \param Parts parts of the final name that needs separation
472 /// The created name has a first separator between the first and second part
473 /// and a second separator between all other parts.
474 /// E.g. with FirstSeparator "$" and Separator "." and
475 /// parts: "p1", "p2", "p3", "p4"
476 /// The resulting name is "p1$p2.p3.p4"
477 /// The separators are retrieved from the OpenMPIRBuilderConfig.
478 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
479
480 /// Callback type for variable finalization (think destructors).
481 ///
482 /// \param CodeGenIP is the insertion point at which the finalization code
483 /// should be placed.
484 ///
485 /// A finalize callback knows about all objects that need finalization, e.g.
486 /// destruction, when the scope of the currently generated construct is left
487 /// at the time, and location, the callback is invoked.
488 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
489
491 /// The finalization callback provided by the last in-flight invocation of
492 /// createXXXX for the directive of kind DK.
494
495 /// The directive kind of the innermost directive that has an associated
496 /// region which might require finalization when it is left.
497 omp::Directive DK;
498
499 /// Flag to indicate if the directive is cancellable.
501 };
502
503 /// Push a finalization callback on the finalization stack.
504 ///
505 /// NOTE: Temporary solution until Clang CG is gone.
507 FinalizationStack.push_back(FI);
508 }
509
510 /// Pop the last finalization callback from the finalization stack.
511 ///
512 /// NOTE: Temporary solution until Clang CG is gone.
514
515 /// Callback type for body (=inner region) code generation
516 ///
517 /// The callback takes code locations as arguments, each describing a
518 /// location where additional instructions can be inserted.
519 ///
520 /// The CodeGenIP may be in the middle of a basic block or point to the end of
521 /// it. The basic block may have a terminator or be degenerate. The callback
522 /// function may just insert instructions at that position, but also split the
523 /// block (without the Before argument of BasicBlock::splitBasicBlock such
524 /// that the identify of the split predecessor block is preserved) and insert
525 /// additional control flow, including branches that do not lead back to what
526 /// follows the CodeGenIP. Note that since the callback is allowed to split
527 /// the block, callers must assume that InsertPoints to positions in the
528 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
529 /// such InsertPoints need to be preserved, it can split the block itself
530 /// before calling the callback.
531 ///
532 /// AllocaIP and CodeGenIP must not point to the same position.
533 ///
534 /// \param AllocaIP is the insertion point at which new alloca instructions
535 /// should be placed. The BasicBlock it is pointing to must
536 /// not be split.
537 /// \param CodeGenIP is the insertion point at which the body code should be
538 /// placed.
540 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
541
542 // This is created primarily for sections construct as llvm::function_ref
543 // (BodyGenCallbackTy) is not storable (as described in the comments of
544 // function_ref class - function_ref contains non-ownable reference
545 // to the callable.
547 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
548
549 /// Callback type for loop body code generation.
550 ///
551 /// \param CodeGenIP is the insertion point where the loop's body code must be
552 /// placed. This will be a dedicated BasicBlock with a
553 /// conditional branch from the loop condition check and
554 /// terminated with an unconditional branch to the loop
555 /// latch.
556 /// \param IndVar is the induction variable usable at the insertion point.
558 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
559
560 /// Callback type for variable privatization (think copy & default
561 /// constructor).
562 ///
563 /// \param AllocaIP is the insertion point at which new alloca instructions
564 /// should be placed.
565 /// \param CodeGenIP is the insertion point at which the privatization code
566 /// should be placed.
567 /// \param Original The value being copied/created, should not be used in the
568 /// generated IR.
569 /// \param Inner The equivalent of \p Original that should be used in the
570 /// generated IR; this is equal to \p Original if the value is
571 /// a pointer and can thus be passed directly, otherwise it is
572 /// an equivalent but different value.
573 /// \param ReplVal The replacement value, thus a copy or new created version
574 /// of \p Inner.
575 ///
576 /// \returns The new insertion point where code generation continues and
577 /// \p ReplVal the replacement value.
579 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
580 Value &Inner, Value *&ReplVal)>;
581
582 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
583 /// (filename, line, column, ...).
586 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
589 : IP(IP), DL(DL) {}
592 };
593
594 /// Emitter methods for OpenMP directives.
595 ///
596 ///{
597
598 /// Generator for '#omp barrier'
599 ///
600 /// \param Loc The location where the barrier directive was encountered.
601 /// \param DK The kind of directive that caused the barrier.
602 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
603 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
604 /// should be checked and acted upon.
605 ///
606 /// \returns The insertion point after the barrier.
607 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
608 bool ForceSimpleCall = false,
609 bool CheckCancelFlag = true);
610
611 /// Generator for '#omp cancel'
612 ///
613 /// \param Loc The location where the directive was encountered.
614 /// \param IfCondition The evaluated 'if' clause expression, if any.
615 /// \param CanceledDirective The kind of directive that is cancled.
616 ///
617 /// \returns The insertion point after the barrier.
618 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
619 omp::Directive CanceledDirective);
620
621 /// Generator for '#omp parallel'
622 ///
623 /// \param Loc The insert and source location description.
624 /// \param AllocaIP The insertion points to be used for alloca instructions.
625 /// \param BodyGenCB Callback that will generate the region code.
626 /// \param PrivCB Callback to copy a given variable (think copy constructor).
627 /// \param FiniCB Callback to finalize variable copies.
628 /// \param IfCondition The evaluated 'if' clause expression, if any.
629 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
630 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
631 /// \param IsCancellable Flag to indicate a cancellable parallel region.
632 ///
633 /// \returns The insertion position *after* the parallel.
636 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
637 FinalizeCallbackTy FiniCB, Value *IfCondition,
638 Value *NumThreads, omp::ProcBindKind ProcBind,
639 bool IsCancellable);
640
641 /// Generator for the control flow structure of an OpenMP canonical loop.
642 ///
643 /// This generator operates on the logical iteration space of the loop, i.e.
644 /// the caller only has to provide a loop trip count of the loop as defined by
645 /// base language semantics. The trip count is interpreted as an unsigned
646 /// integer. The induction variable passed to \p BodyGenCB will be of the same
647 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
648 /// convert the logical iteration variable to the loop counter variable in the
649 /// loop body.
650 ///
651 /// \param Loc The insert and source location description. The insert
652 /// location can be between two instructions or the end of a
653 /// degenerate block (e.g. a BB under construction).
654 /// \param BodyGenCB Callback that will generate the loop body code.
655 /// \param TripCount Number of iterations the loop body is executed.
656 /// \param Name Base name used to derive BB and instruction names.
657 ///
658 /// \returns An object representing the created control flow structure which
659 /// can be used for loop-associated directives.
661 LoopBodyGenCallbackTy BodyGenCB,
662 Value *TripCount,
663 const Twine &Name = "loop");
664
665 /// Generator for the control flow structure of an OpenMP canonical loop.
666 ///
667 /// Instead of a logical iteration space, this allows specifying user-defined
668 /// loop counter values using increment, upper- and lower bounds. To
669 /// disambiguate the terminology when counting downwards, instead of lower
670 /// bounds we use \p Start for the loop counter value in the first body
671 /// iteration.
672 ///
673 /// Consider the following limitations:
674 ///
675 /// * A loop counter space over all integer values of its bit-width cannot be
676 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
677 /// stored into an 8 bit integer):
678 ///
679 /// DO I = 0, 255, 1
680 ///
681 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
682 /// effectively counting downwards:
683 ///
684 /// for (uint8_t i = 100u; i > 0; i += 127u)
685 ///
686 ///
687 /// TODO: May need to add additional parameters to represent:
688 ///
689 /// * Allow representing downcounting with unsigned integers.
690 ///
691 /// * Sign of the step and the comparison operator might disagree:
692 ///
693 /// for (int i = 0; i < 42; i -= 1u)
694 ///
695 //
696 /// \param Loc The insert and source location description.
697 /// \param BodyGenCB Callback that will generate the loop body code.
698 /// \param Start Value of the loop counter for the first iterations.
699 /// \param Stop Loop counter values past this will stop the loop.
700 /// \param Step Loop counter increment after each iteration; negative
701 /// means counting down.
702 /// \param IsSigned Whether Start, Stop and Step are signed integers.
703 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
704 /// counter.
705 /// \param ComputeIP Insertion point for instructions computing the trip
706 /// count. Can be used to ensure the trip count is available
707 /// at the outermost loop of a loop nest. If not set,
708 /// defaults to the preheader of the generated loop.
709 /// \param Name Base name used to derive BB and instruction names.
710 ///
711 /// \returns An object representing the created control flow structure which
712 /// can be used for loop-associated directives.
714 LoopBodyGenCallbackTy BodyGenCB,
715 Value *Start, Value *Stop, Value *Step,
716 bool IsSigned, bool InclusiveStop,
717 InsertPointTy ComputeIP = {},
718 const Twine &Name = "loop");
719
720 /// Collapse a loop nest into a single loop.
721 ///
722 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
723 /// that has the same number of innermost loop iterations as the origin loop
724 /// nest. The induction variables of the input loops are derived from the
725 /// collapsed loop's induction variable. This is intended to be used to
726 /// implement OpenMP's collapse clause. Before applying a directive,
727 /// collapseLoops normalizes a loop nest to contain only a single loop and the
728 /// directive's implementation does not need to handle multiple loops itself.
729 /// This does not remove the need to handle all loop nest handling by
730 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
731 /// modifier of the worksharing-loop directive.
732 ///
733 /// Example:
734 /// \code
735 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
736 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
737 /// body(i, j);
738 /// \endcode
739 ///
740 /// After collapsing with Loops={i,j}, the loop is changed to
741 /// \code
742 /// for (int ij = 0; ij < 63; ++ij) {
743 /// int i = ij / 9;
744 /// int j = ij % 9;
745 /// body(i, j);
746 /// }
747 /// \endcode
748 ///
749 /// In the current implementation, the following limitations apply:
750 ///
751 /// * All input loops have an induction variable of the same type.
752 ///
753 /// * The collapsed loop will have the same trip count integer type as the
754 /// input loops. Therefore it is possible that the collapsed loop cannot
755 /// represent all iterations of the input loops. For instance, assuming a
756 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
757 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
758 /// which cannot be represented in an 32-bit integer. Behavior is undefined
759 /// in this case.
760 ///
761 /// * The trip counts of every input loop must be available at \p ComputeIP.
762 /// Non-rectangular loops are not yet supported.
763 ///
764 /// * At each nest level, code between a surrounding loop and its nested loop
765 /// is hoisted into the loop body, and such code will be executed more
766 /// often than before collapsing (or not at all if any inner loop iteration
767 /// has a trip count of 0). This is permitted by the OpenMP specification.
768 ///
769 /// \param DL Debug location for instructions added for collapsing,
770 /// such as instructions to compute/derive the input loop's
771 /// induction variables.
772 /// \param Loops Loops in the loop nest to collapse. Loops are specified
773 /// from outermost-to-innermost and every control flow of a
774 /// loop's body must pass through its directly nested loop.
775 /// \param ComputeIP Where additional instruction that compute the collapsed
776 /// trip count. If not set, defaults to before the generated
777 /// loop.
778 ///
779 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
782 InsertPointTy ComputeIP);
783
784 /// Get the default alignment value for given target
785 ///
786 /// \param TargetTriple Target triple
787 /// \param Features StringMap which describes extra CPU features
788 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
789 const StringMap<bool> &Features);
790
791 /// Retrieve (or create if non-existent) the address of a declare
792 /// target variable, used in conjunction with registerTargetGlobalVariable
793 /// to create declare target global variables.
794 ///
795 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
796 /// clause used in conjunction with the variable being registered (link,
797 /// to, enter).
798 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
799 /// clause used in conjunction with the variable being registered (nohost,
800 /// host, any)
801 /// \param IsDeclaration - boolean stating if the variable being registered
802 /// is a declaration-only and not a definition
803 /// \param IsExternallyVisible - boolean stating if the variable is externally
804 /// visible
805 /// \param EntryInfo - Unique entry information for the value generated
806 /// using getTargetEntryUniqueInfo, used to name generated pointer references
807 /// to the declare target variable
808 /// \param MangledName - the mangled name of the variable being registered
809 /// \param GeneratedRefs - references generated by invocations of
810 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
811 /// these are required by Clang for book keeping.
812 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
813 /// \param TargetTriple - The OpenMP device target triple we are compiling
814 /// for
815 /// \param LlvmPtrTy - The type of the variable we are generating or
816 /// retrieving an address for
817 /// \param GlobalInitializer - a lambda function which creates a constant
818 /// used for initializing a pointer reference to the variable in certain
819 /// cases. If a nullptr is passed, it will default to utilising the original
820 /// variable to initialize the pointer reference.
821 /// \param VariableLinkage - a lambda function which returns the variables
822 /// linkage type, if unspecified and a nullptr is given, it will instead
823 /// utilise the linkage stored on the existing global variable in the
824 /// LLVMModule.
828 bool IsDeclaration, bool IsExternallyVisible,
829 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
830 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
831 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
832 std::function<Constant *()> GlobalInitializer,
833 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
834
835 /// Registers a target variable for device or host.
836 ///
837 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
838 /// clause used in conjunction with the variable being registered (link,
839 /// to, enter).
840 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
841 /// clause used in conjunction with the variable being registered (nohost,
842 /// host, any)
843 /// \param IsDeclaration - boolean stating if the variable being registered
844 /// is a declaration-only and not a definition
845 /// \param IsExternallyVisible - boolean stating if the variable is externally
846 /// visible
847 /// \param EntryInfo - Unique entry information for the value generated
848 /// using getTargetEntryUniqueInfo, used to name generated pointer references
849 /// to the declare target variable
850 /// \param MangledName - the mangled name of the variable being registered
851 /// \param GeneratedRefs - references generated by invocations of
852 /// registerTargetGlobalVariable these are required by Clang for book
853 /// keeping.
854 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
855 /// \param TargetTriple - The OpenMP device target triple we are compiling
856 /// for
857 /// \param GlobalInitializer - a lambda function which creates a constant
858 /// used for initializing a pointer reference to the variable in certain
859 /// cases. If a nullptr is passed, it will default to utilising the original
860 /// variable to initialize the pointer reference.
861 /// \param VariableLinkage - a lambda function which returns the variables
862 /// linkage type, if unspecified and a nullptr is given, it will instead
863 /// utilise the linkage stored on the existing global variable in the
864 /// LLVMModule.
865 /// \param LlvmPtrTy - The type of the variable we are generating or
866 /// retrieving an address for
867 /// \param Addr - the original llvm value (addr) of the variable to be
868 /// registered
872 bool IsDeclaration, bool IsExternallyVisible,
873 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
874 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
875 std::vector<Triple> TargetTriple,
876 std::function<Constant *()> GlobalInitializer,
877 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
878 Type *LlvmPtrTy, Constant *Addr);
879
880private:
881 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
882 ///
883 /// This takes a \p LoopInfo representing a canonical loop, such as the one
884 /// created by \p createCanonicalLoop and emits additional instructions to
885 /// turn it into a workshare loop. In particular, it calls to an OpenMP
886 /// runtime function in the preheader to obtain the loop bounds to be used in
887 /// the current thread, updates the relevant instructions in the canonical
888 /// loop and calls to an OpenMP runtime finalization function after the loop.
889 ///
890 /// \param DL Debug location for instructions added for the
891 /// workshare-loop construct itself.
892 /// \param CLI A descriptor of the canonical loop to workshare.
893 /// \param AllocaIP An insertion point for Alloca instructions usable in the
894 /// preheader of the loop.
895 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
896 /// the loop.
897 ///
898 /// \returns Point where to insert code after the workshare construct.
899 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
900 InsertPointTy AllocaIP,
901 bool NeedsBarrier);
902
903 /// Modifies the canonical loop a statically-scheduled workshare loop with a
904 /// user-specified chunk size.
905 ///
906 /// \param DL Debug location for instructions added for the
907 /// workshare-loop construct itself.
908 /// \param CLI A descriptor of the canonical loop to workshare.
909 /// \param AllocaIP An insertion point for Alloca instructions usable in
910 /// the preheader of the loop.
911 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
912 /// loop.
913 /// \param ChunkSize The user-specified chunk size.
914 ///
915 /// \returns Point where to insert code after the workshare construct.
916 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
918 InsertPointTy AllocaIP,
919 bool NeedsBarrier,
920 Value *ChunkSize);
921
922 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
923 ///
924 /// This takes a \p LoopInfo representing a canonical loop, such as the one
925 /// created by \p createCanonicalLoop and emits additional instructions to
926 /// turn it into a workshare loop. In particular, it calls to an OpenMP
927 /// runtime function in the preheader to obtain, and then in each iteration
928 /// to update the loop counter.
929 ///
930 /// \param DL Debug location for instructions added for the
931 /// workshare-loop construct itself.
932 /// \param CLI A descriptor of the canonical loop to workshare.
933 /// \param AllocaIP An insertion point for Alloca instructions usable in the
934 /// preheader of the loop.
935 /// \param SchedType Type of scheduling to be passed to the init function.
936 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
937 /// the loop.
938 /// \param Chunk The size of loop chunk considered as a unit when
939 /// scheduling. If \p nullptr, defaults to 1.
940 ///
941 /// \returns Point where to insert code after the workshare construct.
942 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
943 InsertPointTy AllocaIP,
944 omp::OMPScheduleType SchedType,
945 bool NeedsBarrier,
946 Value *Chunk = nullptr);
947
948 /// Create alternative version of the loop to support if clause
949 ///
950 /// OpenMP if clause can require to generate second loop. This loop
951 /// will be executed when if clause condition is not met. createIfVersion
952 /// adds branch instruction to the copied loop if \p ifCond is not met.
953 ///
954 /// \param Loop Original loop which should be versioned.
955 /// \param IfCond Value which corresponds to if clause condition
956 /// \param VMap Value to value map to define relation between
957 /// original and copied loop values and loop blocks.
958 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
959 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
960 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
961
962public:
963 /// Modifies the canonical loop to be a workshare loop.
964 ///
965 /// This takes a \p LoopInfo representing a canonical loop, such as the one
966 /// created by \p createCanonicalLoop and emits additional instructions to
967 /// turn it into a workshare loop. In particular, it calls to an OpenMP
968 /// runtime function in the preheader to obtain the loop bounds to be used in
969 /// the current thread, updates the relevant instructions in the canonical
970 /// loop and calls to an OpenMP runtime finalization function after the loop.
971 ///
972 /// The concrete transformation is done by applyStaticWorkshareLoop,
973 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
974 /// on the value of \p SchedKind and \p ChunkSize.
975 ///
976 /// \param DL Debug location for instructions added for the
977 /// workshare-loop construct itself.
978 /// \param CLI A descriptor of the canonical loop to workshare.
979 /// \param AllocaIP An insertion point for Alloca instructions usable in the
980 /// preheader of the loop.
981 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
982 /// the loop.
983 /// \param SchedKind Scheduling algorithm to use.
984 /// \param ChunkSize The chunk size for the inner loop.
985 /// \param HasSimdModifier Whether the simd modifier is present in the
986 /// schedule clause.
987 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
988 /// the schedule clause.
989 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
990 /// present in the schedule clause.
991 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
992 /// present.
993 ///
994 /// \returns Point where to insert code after the workshare construct.
997 bool NeedsBarrier,
998 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
999 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1000 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1001 bool HasOrderedClause = false);
1002
1003 /// Tile a loop nest.
1004 ///
1005 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1006 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1007 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1008 /// of every loop and every tile sizes must be usable in the outermost
1009 /// loop's preheader. This implies that the loop nest is rectangular.
1010 ///
1011 /// Example:
1012 /// \code
1013 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1014 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1015 /// body(i, j);
1016 /// \endcode
1017 ///
1018 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1019 /// \code
1020 /// for (int i1 = 0; i1 < 3; ++i1)
1021 /// for (int j1 = 0; j1 < 2; ++j1)
1022 /// for (int i2 = 0; i2 < 5; ++i2)
1023 /// for (int j2 = 0; j2 < 7; ++j2)
1024 /// body(i1*3+i2, j1*3+j2);
1025 /// \endcode
1026 ///
1027 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1028 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1029 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1030 /// that are not multiples of the tile size. In the latter case the tile loop
1031 /// of the last floor-loop iteration will have fewer iterations than specified
1032 /// as its tile size.
1033 ///
1034 ///
1035 /// @param DL Debug location for instructions added by tiling, for
1036 /// instance the floor- and tile trip count computation.
1037 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1038 /// invalidated by this method, i.e. should not used after
1039 /// tiling.
1040 /// @param TileSizes For each loop in \p Loops, the tile size for that
1041 /// dimensions.
1042 ///
1043 /// \returns A list of generated loops. Contains twice as many loops as the
1044 /// input loop nest; the first half are the floor loops and the
1045 /// second half are the tile loops.
1046 std::vector<CanonicalLoopInfo *>
1048 ArrayRef<Value *> TileSizes);
1049
1050 /// Fully unroll a loop.
1051 ///
1052 /// Instead of unrolling the loop immediately (and duplicating its body
1053 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1054 /// metadata.
1055 ///
1056 /// \param DL Debug location for instructions added by unrolling.
1057 /// \param Loop The loop to unroll. The loop will be invalidated.
1059
1060 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1061 /// using LLVM's LoopUnrollPass.
1062 ///
1063 /// \param DL Debug location for instructions added by unrolling.
1064 /// \param Loop The loop to unroll. The loop will be invalidated.
1066
1067 /// Partially unroll a loop.
1068 ///
1069 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1070 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1071 /// needing the CanonicalLoopInfo allows more efficient code generation by
1072 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1073 /// A loop-associated directive applied to the unrolled loop needs to know the
1074 /// new trip count which means that if using a heuristically determined unroll
1075 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1076 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1077 /// but which assumes that some canonicalization has taken place (e.g.
1078 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1079 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1080 ///
1081 /// \param DL Debug location for instructions added by unrolling.
1082 /// \param Loop The loop to unroll. The loop will be invalidated.
1083 /// \param Factor The factor to unroll the loop by. A factor of 0
1084 /// indicates that a heuristic should be used to determine
1085 /// the unroll-factor.
1086 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1087 /// partially unrolled loop. Otherwise, uses loop metadata
1088 /// to defer unrolling to the LoopUnrollPass.
1089 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1090 CanonicalLoopInfo **UnrolledCLI);
1091
1092 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1093 /// is cloned. The metadata which prevents vectorization is added to
1094 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1095 /// to false.
1096 ///
1097 /// \param Loop The loop to simd-ize.
1098 /// \param AlignedVars The map which containts pairs of the pointer
1099 /// and its corresponding alignment.
1100 /// \param IfCond The value which corresponds to the if clause
1101 /// condition.
1102 /// \param Order The enum to map order clause.
1103 /// \param Simdlen The Simdlen length to apply to the simd loop.
1104 /// \param Safelen The Safelen length to apply to the simd loop.
1106 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1107 omp::OrderKind Order, ConstantInt *Simdlen,
1108 ConstantInt *Safelen);
1109
1110 /// Generator for '#omp flush'
1111 ///
1112 /// \param Loc The location where the flush directive was encountered
1113 void createFlush(const LocationDescription &Loc);
1114
1115 /// Generator for '#omp taskwait'
1116 ///
1117 /// \param Loc The location where the taskwait directive was encountered.
1118 void createTaskwait(const LocationDescription &Loc);
1119
1120 /// Generator for '#omp taskyield'
1121 ///
1122 /// \param Loc The location where the taskyield directive was encountered.
1123 void createTaskyield(const LocationDescription &Loc);
1124
1125 /// A struct to pack the relevant information for an OpenMP depend clause.
1126 struct DependData {
1130 explicit DependData() = default;
1132 Value *DepVal)
1134 };
1135
1136 /// Generator for `#omp task`
1137 ///
1138 /// \param Loc The location where the task construct was encountered.
1139 /// \param AllocaIP The insertion point to be used for alloca instructions.
1140 /// \param BodyGenCB Callback that will generate the region code.
1141 /// \param Tied True if the task is tied, false if the task is untied.
1142 /// \param Final i1 value which is `true` if the task is final, `false` if the
1143 /// task is not final.
1144 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1145 /// task is generated, and the encountering thread must
1146 /// suspend the current task region, for which execution
1147 /// cannot be resumed until execution of the structured
1148 /// block that is associated with the generated task is
1149 /// completed.
1150 InsertPointTy createTask(const LocationDescription &Loc,
1151 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1152 bool Tied = true, Value *Final = nullptr,
1153 Value *IfCondition = nullptr,
1154 SmallVector<DependData> Dependencies = {});
1155
1156 /// Generator for the taskgroup construct
1157 ///
1158 /// \param Loc The location where the taskgroup construct was encountered.
1159 /// \param AllocaIP The insertion point to be used for alloca instructions.
1160 /// \param BodyGenCB Callback that will generate the region code.
1161 InsertPointTy createTaskgroup(const LocationDescription &Loc,
1162 InsertPointTy AllocaIP,
1163 BodyGenCallbackTy BodyGenCB);
1164
1166 std::function<std::tuple<std::string, uint64_t>()>;
1167
1168 /// Creates a unique info for a target entry when provided a filename and
1169 /// line number from.
1170 ///
1171 /// \param CallBack A callback function which should return filename the entry
1172 /// resides in as well as the line number for the target entry
1173 /// \param ParentName The name of the parent the target entry resides in, if
1174 /// any.
1177 StringRef ParentName = "");
1178
1179 /// Functions used to generate reductions. Such functions take two Values
1180 /// representing LHS and RHS of the reduction, respectively, and a reference
1181 /// to the value that is updated to refer to the reduction result.
1184
1185 /// Functions used to generate atomic reductions. Such functions take two
1186 /// Values representing pointers to LHS and RHS of the reduction, as well as
1187 /// the element type of these pointers. They are expected to atomically
1188 /// update the LHS to the reduced value.
1191
1192 /// Information about an OpenMP reduction.
1200
1201 /// Reduction element type, must match pointee type of variable.
1203
1204 /// Reduction variable of pointer type.
1206
1207 /// Thread-private partial reduction variable.
1209
1210 /// Callback for generating the reduction body. The IR produced by this will
1211 /// be used to combine two values in a thread-safe context, e.g., under
1212 /// lock or within the same thread, and therefore need not be atomic.
1214
1215 /// Callback for generating the atomic reduction body, may be null. The IR
1216 /// produced by this will be used to atomically combine two values during
1217 /// reduction. If null, the implementation will use the non-atomic version
1218 /// along with the appropriate synchronization mechanisms.
1220 };
1221
1222 // TODO: provide atomic and non-atomic reduction generators for reduction
1223 // operators defined by the OpenMP specification.
1224
1225 /// Generator for '#omp reduction'.
1226 ///
1227 /// Emits the IR instructing the runtime to perform the specific kind of
1228 /// reductions. Expects reduction variables to have been privatized and
1229 /// initialized to reduction-neutral values separately. Emits the calls to
1230 /// runtime functions as well as the reduction function and the basic blocks
1231 /// performing the reduction atomically and non-atomically.
1232 ///
1233 /// The code emitted for the following:
1234 ///
1235 /// \code
1236 /// type var_1;
1237 /// type var_2;
1238 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1239 /// /* body */;
1240 /// \endcode
1241 ///
1242 /// corresponds to the following sketch.
1243 ///
1244 /// \code
1245 /// void _outlined_par() {
1246 /// // N is the number of different reductions.
1247 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1248 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1249 /// _omp_reduction_func,
1250 /// _gomp_critical_user.reduction.var)) {
1251 /// case 1: {
1252 /// var_1 = var_1 <reduction-op> privatized_var_1;
1253 /// var_2 = var_2 <reduction-op> privatized_var_2;
1254 /// // ...
1255 /// __kmpc_end_reduce(...);
1256 /// break;
1257 /// }
1258 /// case 2: {
1259 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
1260 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
1261 /// // ...
1262 /// break;
1263 /// }
1264 /// default: break;
1265 /// }
1266 /// }
1267 ///
1268 /// void _omp_reduction_func(void **lhs, void **rhs) {
1269 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1270 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1271 /// // ...
1272 /// }
1273 /// \endcode
1274 ///
1275 /// \param Loc The location where the reduction was
1276 /// encountered. Must be within the associate
1277 /// directive and after the last local access to the
1278 /// reduction variables.
1279 /// \param AllocaIP An insertion point suitable for allocas usable
1280 /// in reductions.
1281 /// \param ReductionInfos A list of info on each reduction variable.
1282 /// \param IsNoWait A flag set if the reduction is marked as nowait.
1284 InsertPointTy AllocaIP,
1285 ArrayRef<ReductionInfo> ReductionInfos,
1286 bool IsNoWait = false);
1287
1288 ///}
1289
1290 /// Return the insertion point used by the underlying IRBuilder.
1292
1293 /// Update the internal location to \p Loc.
1295 Builder.restoreIP(Loc.IP);
1297 return Loc.IP.getBlock() != nullptr;
1298 }
1299
1300 /// Return the function declaration for the runtime function with \p FnID.
1303
1305
1306 /// Return the (LLVM-IR) string describing the source location \p LocStr.
1307 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1308
1309 /// Return the (LLVM-IR) string describing the default source location.
1311
1312 /// Return the (LLVM-IR) string describing the source location identified by
1313 /// the arguments.
1314 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1315 unsigned Line, unsigned Column,
1316 uint32_t &SrcLocStrSize);
1317
1318 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1319 /// fallback if \p DL does not specify the function name.
1321 Function *F = nullptr);
1322
1323 /// Return the (LLVM-IR) string describing the source location \p Loc.
1324 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1325 uint32_t &SrcLocStrSize);
1326
1327 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1328 /// TODO: Create a enum class for the Reserve2Flags
1329 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1330 omp::IdentFlag Flags = omp::IdentFlag(0),
1331 unsigned Reserve2Flags = 0);
1332
1333 /// Create a hidden global flag \p Name in the module with initial value \p
1334 /// Value.
1336
1337 /// Create an offloading section struct used to register this global at
1338 /// runtime.
1339 ///
1340 /// Type struct __tgt_offload_entry{
1341 /// void *addr; // Pointer to the offload entry info.
1342 /// // (function or global)
1343 /// char *name; // Name of the function or global.
1344 /// size_t size; // Size of the entry info (0 if it a function).
1345 /// int32_t flags;
1346 /// int32_t reserved;
1347 /// };
1348 ///
1349 /// \param Addr The pointer to the global being registered.
1350 /// \param Name The symbol name associated with the global.
1351 /// \param Size The size in bytes of the global (0 for functions).
1352 /// \param Flags Flags associated with the entry.
1353 /// \param SectionName The section this entry will be placed at.
1355 int32_t Flags,
1356 StringRef SectionName = "omp_offloading_entries");
1357
1358 /// Generate control flow and cleanup for cancellation.
1359 ///
1360 /// \param CancelFlag Flag indicating if the cancellation is performed.
1361 /// \param CanceledDirective The kind of directive that is cancled.
1362 /// \param ExitCB Extra code to be generated in the exit block.
1363 void emitCancelationCheckImpl(Value *CancelFlag,
1364 omp::Directive CanceledDirective,
1365 FinalizeCallbackTy ExitCB = {});
1366
1367 /// Generate a target region entry call.
1368 ///
1369 /// \param Loc The location at which the request originated and is fulfilled.
1370 /// \param AllocaIP The insertion point to be used for alloca instructions.
1371 /// \param Return Return value of the created function returned by reference.
1372 /// \param DeviceID Identifier for the device via the 'device' clause.
1373 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1374 /// or 0 if unspecified and -1 if there is no 'teams' clause.
1375 /// \param NumThreads Number of threads via the 'thread_limit' clause.
1376 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1377 /// \param KernelArgs Array of arguments to the kernel.
1378 InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1379 InsertPointTy AllocaIP, Value *&Return,
1380 Value *Ident, Value *DeviceID, Value *NumTeams,
1381 Value *NumThreads, Value *HostPtr,
1382 ArrayRef<Value *> KernelArgs);
1383
1384 /// Generate a barrier runtime call.
1385 ///
1386 /// \param Loc The location at which the request originated and is fulfilled.
1387 /// \param DK The directive which caused the barrier
1388 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1389 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1390 /// should be checked and acted upon.
1391 ///
1392 /// \returns The insertion point after the barrier.
1393 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1394 omp::Directive DK, bool ForceSimpleCall,
1395 bool CheckCancelFlag);
1396
1397 /// Generate a flush runtime call.
1398 ///
1399 /// \param Loc The location at which the request originated and is fulfilled.
1400 void emitFlush(const LocationDescription &Loc);
1401
1402 /// The finalization stack made up of finalize callbacks currently in-flight,
1403 /// wrapped into FinalizationInfo objects that reference also the finalization
1404 /// target block and the kind of cancellable directive.
1406
1407 /// Return true if the last entry in the finalization stack is of kind \p DK
1408 /// and cancellable.
1409 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1410 return !FinalizationStack.empty() &&
1411 FinalizationStack.back().IsCancellable &&
1412 FinalizationStack.back().DK == DK;
1413 }
1414
1415 /// Generate a taskwait runtime call.
1416 ///
1417 /// \param Loc The location at which the request originated and is fulfilled.
1418 void emitTaskwaitImpl(const LocationDescription &Loc);
1419
1420 /// Generate a taskyield runtime call.
1421 ///
1422 /// \param Loc The location at which the request originated and is fulfilled.
1423 void emitTaskyieldImpl(const LocationDescription &Loc);
1424
1425 /// Return the current thread ID.
1426 ///
1427 /// \param Ident The ident (ident_t*) describing the query origin.
1429
1430 /// The OpenMPIRBuilder Configuration
1432
1433 /// The underlying LLVM-IR module
1435
1436 /// The LLVM-IR Builder used to create IR.
1438
1439 /// Map to remember source location strings
1441
1442 /// Map to remember existing ident_t*.
1444
1445 /// Info manager to keep track of target regions.
1447
1448 /// Helper that contains information about regions we need to outline
1449 /// during finalization.
1451 using PostOutlineCBTy = std::function<void(Function &)>;
1455
1456 /// Collect all blocks in between EntryBB and ExitBB in both the given
1457 /// vector and set.
1459 SmallVectorImpl<BasicBlock *> &BlockVector);
1460
1461 /// Return the function that contains the region to be outlined.
1462 Function *getFunction() const { return EntryBB->getParent(); }
1463 };
1464
1465 /// Collection of regions that need to be outlined during finalization.
1467
1468 /// Collection of owned canonical loop objects that eventually need to be
1469 /// free'd.
1470 std::forward_list<CanonicalLoopInfo> LoopInfos;
1471
1472 /// Add a new region that will be outlined later.
1473 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1474
1475 /// An ordered map of auto-generated variables to their unique names.
1476 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1477 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1478 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1479 /// variables.
1481
1482 /// Computes the size of type in bytes.
1483 Value *getSizeInBytes(Value *BasePtr);
1484
1485 // Emit a branch from the current block to the Target block only if
1486 // the current block has a terminator.
1488
1489 // If BB has no use then delete it and return. Else place BB after the current
1490 // block, if possible, or else at the end of the function. Also add a branch
1491 // from current block to BB if current block does not have a terminator.
1492 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1493
1494 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1495 /// Here is the logic:
1496 /// if (Cond) {
1497 /// ThenGen();
1498 /// } else {
1499 /// ElseGen();
1500 /// }
1502 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1503
1504 /// Create the global variable holding the offload mappings information.
1506 std::string VarName);
1507
1508 /// Create the global variable holding the offload names information.
1511 std::string VarName);
1512
1515 AllocaInst *Args = nullptr;
1517 };
1518
1519 /// Create the allocas instruction used in call to mapper functions.
1521 InsertPointTy AllocaIP, unsigned NumOperands,
1523
1524 /// Create the call for the target mapper function.
1525 /// \param Loc The source location description.
1526 /// \param MapperFunc Function to be called.
1527 /// \param SrcLocInfo Source location information global.
1528 /// \param MaptypesArg The argument types.
1529 /// \param MapnamesArg The argument names.
1530 /// \param MapperAllocas The AllocaInst used for the call.
1531 /// \param DeviceID Device ID for the call.
1532 /// \param NumOperands Number of operands in the call.
1533 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1534 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1535 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1536 unsigned NumOperands);
1537
1538 /// Container for the arguments used to pass data to the runtime library.
1540 /// The array of base pointer passed to the runtime library.
1542 /// The array of section pointers passed to the runtime library.
1544 /// The array of sizes passed to the runtime library.
1545 Value *SizesArray = nullptr;
1546 /// The array of map types passed to the runtime library for the beginning
1547 /// of the region or for the entire region if there are no separate map
1548 /// types for the region end.
1550 /// The array of map types passed to the runtime library for the end of the
1551 /// region, or nullptr if there are no separate map types for the region
1552 /// end.
1554 /// The array of user-defined mappers passed to the runtime library.
1556 /// The array of original declaration names of mapped pointers sent to the
1557 /// runtime library for debugging
1559
1560 explicit TargetDataRTArgs() {}
1569 };
1570
1571 /// Data structure that contains the needed information to construct the
1572 /// kernel args vector.
1574 /// Number of arguments passed to the runtime library.
1576 /// Arguments passed to the runtime library
1578 /// The number of iterations
1580 /// The number of teams.
1582 /// The number of threads.
1584 /// The size of the dynamic shared memory.
1586 /// True if the kernel has 'no wait' clause.
1588
1589 /// Constructor for TargetKernelArgs
1597 };
1598
1599 /// Create the kernel args vector used by emitTargetKernel. This function
1600 /// creates various constant values that are used in the resulting args
1601 /// vector.
1602 static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1604 SmallVector<Value *> &ArgsVector);
1605
1606 /// Struct that keeps the information that should be kept throughout
1607 /// a 'target data' region.
1609 /// Set to true if device pointer information have to be obtained.
1610 bool RequiresDevicePointerInfo = false;
1611 /// Set to true if Clang emits separate runtime calls for the beginning and
1612 /// end of the region. These calls might have separate map type arrays.
1613 bool SeparateBeginEndCalls = false;
1614
1615 public:
1617
1620
1621 /// Indicate whether any user-defined mapper exists.
1622 bool HasMapper = false;
1623 /// The total number of pointers passed to the runtime library.
1624 unsigned NumberOfPtrs = 0u;
1625
1626 explicit TargetDataInfo() {}
1627 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1628 bool SeparateBeginEndCalls)
1629 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1630 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1631 /// Clear information about the data arrays.
1634 HasMapper = false;
1635 NumberOfPtrs = 0u;
1636 }
1637 /// Return true if the current target data information has valid arrays.
1638 bool isValid() {
1642 }
1643 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1644 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1645 };
1646
1654
1655 /// This structure contains combined information generated for mappable
1656 /// clauses, including base pointers, pointers, sizes, map types, user-defined
1657 /// mappers, and non-contiguous information.
1658 struct MapInfosTy {
1660 bool IsNonContiguous = false;
1665 };
1673
1674 /// Append arrays in \a CurInfo.
1675 void append(MapInfosTy &CurInfo) {
1677 CurInfo.BasePointers.end());
1678 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1680 CurInfo.DevicePointers.end());
1681 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1682 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1683 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1685 CurInfo.NonContigInfo.Dims.end());
1687 CurInfo.NonContigInfo.Offsets.end());
1689 CurInfo.NonContigInfo.Counts.end());
1691 CurInfo.NonContigInfo.Strides.end());
1692 }
1693 };
1694
1695 /// Callback function type for functions emitting the host fallback code that
1696 /// is executed when the kernel launch fails. It takes an insertion point as
1697 /// parameter where the code should be emitted. It returns an insertion point
1698 /// that points right after after the emitted code.
1700
1701 /// Generate a target region entry call and host fallback call.
1702 ///
1703 /// \param Loc The location at which the request originated and is fulfilled.
1704 /// \param OutlinedFn The outlined kernel function.
1705 /// \param OutlinedFnID The ooulined function ID.
1706 /// \param EmitTargetCallFallbackCB Call back function to generate host
1707 /// fallback code.
1708 /// \param Args Data structure holding information about the kernel arguments.
1709 /// \param DeviceID Identifier for the device via the 'device' clause.
1710 /// \param RTLoc Source location identifier
1711 /// \param AllocaIP The insertion point to be used for alloca instructions.
1713 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1714 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1715 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1716
1717 /// Emit the arguments to be passed to the runtime library based on the
1718 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
1719 /// ForEndCall, emit map types to be passed for the end of the region instead
1720 /// of the beginning.
1724 bool EmitDebug = false,
1725 bool ForEndCall = false);
1726
1727 /// Emit an array of struct descriptors to be assigned to the offload args.
1729 InsertPointTy CodeGenIP,
1730 MapInfosTy &CombinedInfo,
1732
1733 /// Emit the arrays used to pass the captures and map information to the
1734 /// offloading runtime library. If there is no map or capture information,
1735 /// return nullptr by reference.
1737 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1738 TargetDataInfo &Info, bool IsNonContiguous = false,
1739 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1740 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1741
1742 /// Creates offloading entry for the provided entry ID \a ID, address \a
1743 /// Addr, size \a Size, and flags \a Flags.
1745 int32_t Flags, GlobalValue::LinkageTypes,
1746 StringRef Name = "");
1747
1748 /// The kind of errors that can occur when emitting the offload entries and
1749 /// metadata.
1755
1756 /// Callback function type
1758 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1759
1760 // Emit the offloading entries and metadata so that the device codegen side
1761 // can easily figure out what to emit. The produced metadata looks like
1762 // this:
1763 //
1764 // !omp_offload.info = !{!1, ...}
1765 //
1766 // We only generate metadata for function that contain target regions.
1768 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1769
1770public:
1771 /// Generator for __kmpc_copyprivate
1772 ///
1773 /// \param Loc The source location description.
1774 /// \param BufSize Number of elements in the buffer.
1775 /// \param CpyBuf List of pointers to data to be copied.
1776 /// \param CpyFn function to call for copying data.
1777 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1778 ///
1779 /// \return The insertion position *after* the CopyPrivate call.
1780
1782 llvm::Value *BufSize, llvm::Value *CpyBuf,
1783 llvm::Value *CpyFn, llvm::Value *DidIt);
1784
1785 /// Generator for '#omp single'
1786 ///
1787 /// \param Loc The source location description.
1788 /// \param BodyGenCB Callback that will generate the region code.
1789 /// \param FiniCB Callback to finalize variable copies.
1790 /// \param IsNowait If false, a barrier is emitted.
1791 /// \param DidIt Local variable used as a flag to indicate 'single' thread
1792 ///
1793 /// \returns The insertion position *after* the single call.
1795 BodyGenCallbackTy BodyGenCB,
1796 FinalizeCallbackTy FiniCB, bool IsNowait,
1797 llvm::Value *DidIt);
1798
1799 /// Generator for '#omp master'
1800 ///
1801 /// \param Loc The insert and source location description.
1802 /// \param BodyGenCB Callback that will generate the region code.
1803 /// \param FiniCB Callback to finalize variable copies.
1804 ///
1805 /// \returns The insertion position *after* the master.
1807 BodyGenCallbackTy BodyGenCB,
1808 FinalizeCallbackTy FiniCB);
1809
1810 /// Generator for '#omp masked'
1811 ///
1812 /// \param Loc The insert and source location description.
1813 /// \param BodyGenCB Callback that will generate the region code.
1814 /// \param FiniCB Callback to finialize variable copies.
1815 ///
1816 /// \returns The insertion position *after* the masked.
1818 BodyGenCallbackTy BodyGenCB,
1820
1821 /// Generator for '#omp critical'
1822 ///
1823 /// \param Loc The insert and source location description.
1824 /// \param BodyGenCB Callback that will generate the region body code.
1825 /// \param FiniCB Callback to finalize variable copies.
1826 /// \param CriticalName name of the lock used by the critical directive
1827 /// \param HintInst Hint Instruction for hint clause associated with critical
1828 ///
1829 /// \returns The insertion position *after* the critical.
1831 BodyGenCallbackTy BodyGenCB,
1832 FinalizeCallbackTy FiniCB,
1833 StringRef CriticalName, Value *HintInst);
1834
1835 /// Generator for '#omp ordered depend (source | sink)'
1836 ///
1837 /// \param Loc The insert and source location description.
1838 /// \param AllocaIP The insertion point to be used for alloca instructions.
1839 /// \param NumLoops The number of loops in depend clause.
1840 /// \param StoreValues The value will be stored in vector address.
1841 /// \param Name The name of alloca instruction.
1842 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1843 ///
1844 /// \return The insertion position *after* the ordered.
1846 InsertPointTy AllocaIP, unsigned NumLoops,
1847 ArrayRef<llvm::Value *> StoreValues,
1848 const Twine &Name, bool IsDependSource);
1849
1850 /// Generator for '#omp ordered [threads | simd]'
1851 ///
1852 /// \param Loc The insert and source location description.
1853 /// \param BodyGenCB Callback that will generate the region code.
1854 /// \param FiniCB Callback to finalize variable copies.
1855 /// \param IsThreads If true, with threads clause or without clause;
1856 /// otherwise, with simd clause;
1857 ///
1858 /// \returns The insertion position *after* the ordered.
1860 BodyGenCallbackTy BodyGenCB,
1861 FinalizeCallbackTy FiniCB,
1862 bool IsThreads);
1863
1864 /// Generator for '#omp sections'
1865 ///
1866 /// \param Loc The insert and source location description.
1867 /// \param AllocaIP The insertion points to be used for alloca instructions.
1868 /// \param SectionCBs Callbacks that will generate body of each section.
1869 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1870 /// \param FiniCB Callback to finalize variable copies.
1871 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1872 /// \param IsNowait If true, barrier - to ensure all sections are executed
1873 /// before moving forward will not be generated.
1874 /// \returns The insertion position *after* the sections.
1876 InsertPointTy AllocaIP,
1878 PrivatizeCallbackTy PrivCB,
1879 FinalizeCallbackTy FiniCB, bool IsCancellable,
1880 bool IsNowait);
1881
1882 /// Generator for '#omp section'
1883 ///
1884 /// \param Loc The insert and source location description.
1885 /// \param BodyGenCB Callback that will generate the region body code.
1886 /// \param FiniCB Callback to finalize variable copies.
1887 /// \returns The insertion position *after* the section.
1889 BodyGenCallbackTy BodyGenCB,
1890 FinalizeCallbackTy FiniCB);
1891
1892 /// Generate conditional branch and relevant BasicBlocks through which private
1893 /// threads copy the 'copyin' variables from Master copy to threadprivate
1894 /// copies.
1895 ///
1896 /// \param IP insertion block for copyin conditional
1897 /// \param MasterVarPtr a pointer to the master variable
1898 /// \param PrivateVarPtr a pointer to the threadprivate variable
1899 /// \param IntPtrTy Pointer size type
1900 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1901 // and copy.in.end block
1902 ///
1903 /// \returns The insertion point where copying operation to be emitted.
1905 Value *PrivateAddr,
1906 llvm::IntegerType *IntPtrTy,
1907 bool BranchtoEnd = true);
1908
1909 /// Create a runtime call for kmpc_Alloc
1910 ///
1911 /// \param Loc The insert and source location description.
1912 /// \param Size Size of allocated memory space
1913 /// \param Allocator Allocator information instruction
1914 /// \param Name Name of call Instruction for OMP_alloc
1915 ///
1916 /// \returns CallInst to the OMP_Alloc call
1918 Value *Allocator, std::string Name = "");
1919
1920 /// Create a runtime call for kmpc_free
1921 ///
1922 /// \param Loc The insert and source location description.
1923 /// \param Addr Address of memory space to be freed
1924 /// \param Allocator Allocator information instruction
1925 /// \param Name Name of call Instruction for OMP_Free
1926 ///
1927 /// \returns CallInst to the OMP_Free call
1929 Value *Allocator, std::string Name = "");
1930
1931 /// Create a runtime call for kmpc_threadprivate_cached
1932 ///
1933 /// \param Loc The insert and source location description.
1934 /// \param Pointer pointer to data to be cached
1935 /// \param Size size of data to be cached
1936 /// \param Name Name of call Instruction for callinst
1937 ///
1938 /// \returns CallInst to the thread private cache call.
1942 const llvm::Twine &Name = Twine(""));
1943
1944 /// Create a runtime call for __tgt_interop_init
1945 ///
1946 /// \param Loc The insert and source location description.
1947 /// \param InteropVar variable to be allocated
1948 /// \param InteropType type of interop operation
1949 /// \param Device devide to which offloading will occur
1950 /// \param NumDependences number of dependence variables
1951 /// \param DependenceAddress pointer to dependence variables
1952 /// \param HaveNowaitClause does nowait clause exist
1953 ///
1954 /// \returns CallInst to the __tgt_interop_init call
1956 Value *InteropVar,
1957 omp::OMPInteropType InteropType, Value *Device,
1958 Value *NumDependences,
1959 Value *DependenceAddress,
1960 bool HaveNowaitClause);
1961
1962 /// Create a runtime call for __tgt_interop_destroy
1963 ///
1964 /// \param Loc The insert and source location description.
1965 /// \param InteropVar variable to be allocated
1966 /// \param Device devide to which offloading will occur
1967 /// \param NumDependences number of dependence variables
1968 /// \param DependenceAddress pointer to dependence variables
1969 /// \param HaveNowaitClause does nowait clause exist
1970 ///
1971 /// \returns CallInst to the __tgt_interop_destroy call
1973 Value *InteropVar, Value *Device,
1974 Value *NumDependences,
1975 Value *DependenceAddress,
1976 bool HaveNowaitClause);
1977
1978 /// Create a runtime call for __tgt_interop_use
1979 ///
1980 /// \param Loc The insert and source location description.
1981 /// \param InteropVar variable to be allocated
1982 /// \param Device devide to which offloading will occur
1983 /// \param NumDependences number of dependence variables
1984 /// \param DependenceAddress pointer to dependence variables
1985 /// \param HaveNowaitClause does nowait clause exist
1986 ///
1987 /// \returns CallInst to the __tgt_interop_use call
1989 Value *InteropVar, Value *Device,
1990 Value *NumDependences, Value *DependenceAddress,
1991 bool HaveNowaitClause);
1992
1993 /// The `omp target` interface
1994 ///
1995 /// For more information about the usage of this interface,
1996 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1997 ///
1998 ///{
1999
2000 /// Create a runtime call for kmpc_target_init
2001 ///
2002 /// \param Loc The insert and source location description.
2003 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2004 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
2005
2006 /// Create a runtime call for kmpc_target_deinit
2007 ///
2008 /// \param Loc The insert and source location description.
2009 void createTargetDeinit(const LocationDescription &Loc);
2010
2011 ///}
2012
2013private:
2014 // Sets the function attributes expected for the outlined function
2015 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
2016 int32_t NumTeams,
2017 int32_t NumThreads);
2018
2019 // Creates the function ID/Address for the given outlined function.
2020 // In the case of an embedded device function the address of the function is
2021 // used, in the case of a non-offload function a constant is created.
2022 Constant *createOutlinedFunctionID(Function *OutlinedFn,
2023 StringRef EntryFnIDName);
2024
2025 // Creates the region entry address for the outlined function
2026 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2027 StringRef EntryFnName);
2028
2029public:
2030 /// Functions used to generate a function with the given name.
2031 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2032
2033 /// Create a unique name for the entry function using the source location
2034 /// information of the current target region. The name will be something like:
2035 ///
2036 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2037 ///
2038 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2039 /// mangled name of the function that encloses the target region and BB is the
2040 /// line number of the target region. CC is a count added when more than one
2041 /// region is located at the same location.
2042 ///
2043 /// If this target outline function is not an offload entry, we don't need to
2044 /// register it. This may happen if it is guarded by an if clause that is
2045 /// false at compile time, or no target archs have been specified.
2046 ///
2047 /// The created target region ID is used by the runtime library to identify
2048 /// the current target region, so it only has to be unique and not
2049 /// necessarily point to anything. It could be the pointer to the outlined
2050 /// function that implements the target region, but we aren't using that so
2051 /// that the compiler doesn't need to keep that, and could therefore inline
2052 /// the host function if proven worthwhile during optimization. In the other
2053 /// hand, if emitting code for the device, the ID has to be the function
2054 /// address so that it can retrieved from the offloading entry and launched
2055 /// by the runtime library. We also mark the outlined function to have
2056 /// external linkage in case we are emitting code for the device, because
2057 /// these functions will be entry points to the device.
2058 ///
2059 /// \param InfoManager The info manager keeping track of the offload entries
2060 /// \param EntryInfo The entry information about the function
2061 /// \param GenerateFunctionCallback The callback function to generate the code
2062 /// \param NumTeams Number default teams
2063 /// \param NumThreads Number default threads
2064 /// \param OutlinedFunction Pointer to the outlined function
2065 /// \param EntryFnIDName Name of the ID o be created
2067 FunctionGenCallback &GenerateFunctionCallback,
2068 int32_t NumTeams, int32_t NumThreads,
2069 bool IsOffloadEntry, Function *&OutlinedFn,
2070 Constant *&OutlinedFnID);
2071
2072 /// Registers the given function and sets up the attribtues of the function
2073 /// Returns the FunctionID.
2074 ///
2075 /// \param InfoManager The info manager keeping track of the offload entries
2076 /// \param EntryInfo The entry information about the function
2077 /// \param OutlinedFunction Pointer to the outlined function
2078 /// \param EntryFnName Name of the outlined function
2079 /// \param EntryFnIDName Name of the ID o be created
2080 /// \param NumTeams Number default teams
2081 /// \param NumThreads Number default threads
2083 Function *OutlinedFunction,
2084 StringRef EntryFnName,
2085 StringRef EntryFnIDName,
2086 int32_t NumTeams, int32_t NumThreads);
2087 /// Type of BodyGen to use for region codegen
2088 ///
2089 /// Priv: If device pointer privatization is required, emit the body of the
2090 /// region here. It will have to be duplicated: with and without
2091 /// privatization.
2092 /// DupNoPriv: If we need device pointer privatization, we need
2093 /// to emit the body of the region with no privatization in the 'else' branch
2094 /// of the conditional.
2095 /// NoPriv: If we don't require privatization of device
2096 /// pointers, we emit the body in between the runtime calls. This avoids
2097 /// duplicating the body code.
2099
2100 /// Callback type for creating the map infos for the kernel parameters.
2101 /// \param CodeGenIP is the insertion point where code should be generated,
2102 /// if any.
2105
2106 /// Generator for '#omp target data'
2107 ///
2108 /// \param Loc The location where the target data construct was encountered.
2109 /// \param AllocaIP The insertion points to be used for alloca instructions.
2110 /// \param CodeGenIP The insertion point at which the target directive code
2111 /// should be placed.
2112 /// \param IsBegin If true then emits begin mapper call otherwise emits
2113 /// end mapper call.
2114 /// \param DeviceID Stores the DeviceID from the device clause.
2115 /// \param IfCond Value which corresponds to the if clause condition.
2116 /// \param Info Stores all information realted to the Target Data directive.
2117 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2118 /// \param BodyGenCB Optional Callback to generate the region code.
2119 /// \param DeviceAddrCB Optional callback to generate code related to
2120 /// use_device_ptr and use_device_addr.
2121 /// \param CustomMapperCB Optional callback to generate code related to
2122 /// custom mappers.
2124 const LocationDescription &Loc, InsertPointTy AllocaIP,
2125 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2127 omp::RuntimeFunction *MapperFunc = nullptr,
2129 BodyGenTy BodyGenType)>
2130 BodyGenCB = nullptr,
2131 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2132 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2133 Value *SrcLocInfo = nullptr);
2134
2136 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2137
2138 /// Generator for '#omp target'
2139 ///
2140 /// \param Loc where the target data construct was encountered.
2141 /// \param CodeGenIP The insertion point where the call to the outlined
2142 /// function should be emitted.
2143 /// \param EntryInfo The entry information about the function.
2144 /// \param NumTeams Number of teams specified in the num_teams clause.
2145 /// \param NumThreads Number of teams specified in the thread_limit clause.
2146 /// \param Inputs The input values to the region that will be passed.
2147 /// as arguments to the outlined function.
2148 /// \param BodyGenCB Callback that will generate the region code.
2152 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2153 int32_t NumThreads,
2155 GenMapInfoCallbackTy GenMapInfoCB,
2156 TargetBodyGenCallbackTy BodyGenCB);
2157
2158 /// Returns __kmpc_for_static_init_* runtime function for the specified
2159 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2160 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2161 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2162 bool IsGPUDistribute);
2163
2164 /// Returns __kmpc_dispatch_init_* runtime function for the specified
2165 /// size \a IVSize and sign \a IVSigned.
2166 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2167
2168 /// Returns __kmpc_dispatch_next_* runtime function for the specified
2169 /// size \a IVSize and sign \a IVSigned.
2170 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2171
2172 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2173 /// size \a IVSize and sign \a IVSigned.
2174 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2175
2176 /// Declarations for LLVM-IR types (simple, array, function and structure) are
2177 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2178 /// we provide the declarations, the initializeTypes function will provide the
2179 /// values.
2180 ///
2181 ///{
2182#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2183#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2184 ArrayType *VarName##Ty = nullptr; \
2185 PointerType *VarName##PtrTy = nullptr;
2186#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2187 FunctionType *VarName = nullptr; \
2188 PointerType *VarName##Ptr = nullptr;
2189#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
2190 StructType *VarName = nullptr; \
2191 PointerType *VarName##Ptr = nullptr;
2192#include "llvm/Frontend/OpenMP/OMPKinds.def"
2193
2194 ///}
2195
2196private:
2197 /// Create all simple and struct types exposed by the runtime and remember
2198 /// the llvm::PointerTypes of them for easy access later.
2199 void initializeTypes(Module &M);
2200
2201 /// Common interface for generating entry calls for OMP Directives.
2202 /// if the directive has a region/body, It will set the insertion
2203 /// point to the body
2204 ///
2205 /// \param OMPD Directive to generate entry blocks for
2206 /// \param EntryCall Call to the entry OMP Runtime Function
2207 /// \param ExitBB block where the region ends.
2208 /// \param Conditional indicate if the entry call result will be used
2209 /// to evaluate a conditional of whether a thread will execute
2210 /// body code or not.
2211 ///
2212 /// \return The insertion position in exit block
2213 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2214 BasicBlock *ExitBB,
2215 bool Conditional = false);
2216
2217 /// Common interface to finalize the region
2218 ///
2219 /// \param OMPD Directive to generate exiting code for
2220 /// \param FinIP Insertion point for emitting Finalization code and exit call
2221 /// \param ExitCall Call to the ending OMP Runtime Function
2222 /// \param HasFinalize indicate if the directive will require finalization
2223 /// and has a finalization callback in the stack that
2224 /// should be called.
2225 ///
2226 /// \return The insertion position in exit block
2227 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2228 InsertPointTy FinIP,
2229 Instruction *ExitCall,
2230 bool HasFinalize = true);
2231
2232 /// Common Interface to generate OMP inlined regions
2233 ///
2234 /// \param OMPD Directive to generate inlined region for
2235 /// \param EntryCall Call to the entry OMP Runtime Function
2236 /// \param ExitCall Call to the ending OMP Runtime Function
2237 /// \param BodyGenCB Body code generation callback.
2238 /// \param FiniCB Finalization Callback. Will be called when finalizing region
2239 /// \param Conditional indicate if the entry call result will be used
2240 /// to evaluate a conditional of whether a thread will execute
2241 /// body code or not.
2242 /// \param HasFinalize indicate if the directive will require finalization
2243 /// and has a finalization callback in the stack that
2244 /// should be called.
2245 /// \param IsCancellable if HasFinalize is set to true, indicate if the
2246 /// the directive should be cancellable.
2247 /// \return The insertion point after the region
2248
2250 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2251 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2252 FinalizeCallbackTy FiniCB, bool Conditional = false,
2253 bool HasFinalize = true, bool IsCancellable = false);
2254
2255 /// Get the platform-specific name separator.
2256 /// \param Parts different parts of the final name that needs separation
2257 /// \param FirstSeparator First separator used between the initial two
2258 /// parts of the name.
2259 /// \param Separator separator used between all of the rest consecutive
2260 /// parts of the name
2261 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2262 StringRef FirstSeparator,
2263 StringRef Separator);
2264
2265 /// Returns corresponding lock object for the specified critical region
2266 /// name. If the lock object does not exist it is created, otherwise the
2267 /// reference to the existing copy is returned.
2268 /// \param CriticalName Name of the critical region.
2269 ///
2270 Value *getOMPCriticalRegionLock(StringRef CriticalName);
2271
2272 /// Callback type for Atomic Expression update
2273 /// ex:
2274 /// \code{.cpp}
2275 /// unsigned x = 0;
2276 /// #pragma omp atomic update
2277 /// x = Expr(x_old); //Expr() is any legal operation
2278 /// \endcode
2279 ///
2280 /// \param XOld the value of the atomic memory address to use for update
2281 /// \param IRB reference to the IRBuilder to use
2282 ///
2283 /// \returns Value to update X to.
2284 using AtomicUpdateCallbackTy =
2285 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2286
2287private:
2288 enum AtomicKind { Read, Write, Update, Capture, Compare };
2289
2290 /// Determine whether to emit flush or not
2291 ///
2292 /// \param Loc The insert and source location description.
2293 /// \param AO The required atomic ordering
2294 /// \param AK The OpenMP atomic operation kind used.
2295 ///
2296 /// \returns wether a flush was emitted or not
2297 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2298 AtomicOrdering AO, AtomicKind AK);
2299
2300 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2301 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2302 /// Only Scalar data types.
2303 ///
2304 /// \param AllocaIP The insertion point to be used for alloca
2305 /// instructions.
2306 /// \param X The target atomic pointer to be updated
2307 /// \param XElemTy The element type of the atomic pointer.
2308 /// \param Expr The value to update X with.
2309 /// \param AO Atomic ordering of the generated atomic
2310 /// instructions.
2311 /// \param RMWOp The binary operation used for update. If
2312 /// operation is not supported by atomicRMW,
2313 /// or belong to {FADD, FSUB, BAD_BINOP}.
2314 /// Then a `cmpExch` based atomic will be generated.
2315 /// \param UpdateOp Code generator for complex expressions that cannot be
2316 /// expressed through atomicrmw instruction.
2317 /// \param VolatileX true if \a X volatile?
2318 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2319 /// update expression, false otherwise.
2320 /// (e.g. true for X = X BinOp Expr)
2321 ///
2322 /// \returns A pair of the old value of X before the update, and the value
2323 /// used for the update.
2324 std::pair<Value *, Value *>
2325 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2327 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2328 bool IsXBinopExpr);
2329
2330 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2331 ///
2332 /// \Return The instruction
2333 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2334 AtomicRMWInst::BinOp RMWOp);
2335
2336public:
2337 /// a struct to pack relevant information while generating atomic Ops
2339 Value *Var = nullptr;
2340 Type *ElemTy = nullptr;
2341 bool IsSigned = false;
2342 bool IsVolatile = false;
2343 };
2344
2345 /// Emit atomic Read for : V = X --- Only Scalar data types.
2346 ///
2347 /// \param Loc The insert and source location description.
2348 /// \param X The target pointer to be atomically read
2349 /// \param V Memory address where to store atomically read
2350 /// value
2351 /// \param AO Atomic ordering of the generated atomic
2352 /// instructions.
2353 ///
2354 /// \return Insertion point after generated atomic read IR.
2357 AtomicOrdering AO);
2358
2359 /// Emit atomic write for : X = Expr --- Only Scalar data types.
2360 ///
2361 /// \param Loc The insert and source location description.
2362 /// \param X The target pointer to be atomically written to
2363 /// \param Expr The value to store.
2364 /// \param AO Atomic ordering of the generated atomic
2365 /// instructions.
2366 ///
2367 /// \return Insertion point after generated atomic Write IR.
2369 AtomicOpValue &X, Value *Expr,
2370 AtomicOrdering AO);
2371
2372 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2373 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2374 /// Only Scalar data types.
2375 ///
2376 /// \param Loc The insert and source location description.
2377 /// \param AllocaIP The insertion point to be used for alloca instructions.
2378 /// \param X The target atomic pointer to be updated
2379 /// \param Expr The value to update X with.
2380 /// \param AO Atomic ordering of the generated atomic instructions.
2381 /// \param RMWOp The binary operation used for update. If operation
2382 /// is not supported by atomicRMW, or belong to
2383 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2384 /// atomic will be generated.
2385 /// \param UpdateOp Code generator for complex expressions that cannot be
2386 /// expressed through atomicrmw instruction.
2387 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2388 /// update expression, false otherwise.
2389 /// (e.g. true for X = X BinOp Expr)
2390 ///
2391 /// \return Insertion point after generated atomic update IR.
2393 InsertPointTy AllocaIP, AtomicOpValue &X,
2394 Value *Expr, AtomicOrdering AO,
2396 AtomicUpdateCallbackTy &UpdateOp,
2397 bool IsXBinopExpr);
2398
2399 /// Emit atomic update for constructs: --- Only Scalar data types
2400 /// V = X; X = X BinOp Expr ,
2401 /// X = X BinOp Expr; V = X,
2402 /// V = X; X = Expr BinOp X,
2403 /// X = Expr BinOp X; V = X,
2404 /// V = X; X = UpdateOp(X),
2405 /// X = UpdateOp(X); V = X,
2406 ///
2407 /// \param Loc The insert and source location description.
2408 /// \param AllocaIP The insertion point to be used for alloca instructions.
2409 /// \param X The target atomic pointer to be updated
2410 /// \param V Memory address where to store captured value
2411 /// \param Expr The value to update X with.
2412 /// \param AO Atomic ordering of the generated atomic instructions
2413 /// \param RMWOp The binary operation used for update. If
2414 /// operation is not supported by atomicRMW, or belong to
2415 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2416 /// atomic will be generated.
2417 /// \param UpdateOp Code generator for complex expressions that cannot be
2418 /// expressed through atomicrmw instruction.
2419 /// \param UpdateExpr true if X is an in place update of the form
2420 /// X = X BinOp Expr or X = Expr BinOp X
2421 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2422 /// update expression, false otherwise.
2423 /// (e.g. true for X = X BinOp Expr)
2424 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2425 /// 'v', not an updated one.
2426 ///
2427 /// \return Insertion point after generated atomic capture IR.
2430 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2432 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2433 bool IsPostfixUpdate, bool IsXBinopExpr);
2434
2435 /// Emit atomic compare for constructs: --- Only scalar data types
2436 /// cond-expr-stmt:
2437 /// x = x ordop expr ? expr : x;
2438 /// x = expr ordop x ? expr : x;
2439 /// x = x == e ? d : x;
2440 /// x = e == x ? d : x; (this one is not in the spec)
2441 /// cond-update-stmt:
2442 /// if (x ordop expr) { x = expr; }
2443 /// if (expr ordop x) { x = expr; }
2444 /// if (x == e) { x = d; }
2445 /// if (e == x) { x = d; } (this one is not in the spec)
2446 /// conditional-update-capture-atomic:
2447 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2448 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2449 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2450 /// IsFailOnly=true)
2451 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2452 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2453 /// IsFailOnly=true)
2454 ///
2455 /// \param Loc The insert and source location description.
2456 /// \param X The target atomic pointer to be updated.
2457 /// \param V Memory address where to store captured value (for
2458 /// compare capture only).
2459 /// \param R Memory address where to store comparison result
2460 /// (for compare capture with '==' only).
2461 /// \param E The expected value ('e') for forms that use an
2462 /// equality comparison or an expression ('expr') for
2463 /// forms that use 'ordop' (logically an atomic maximum or
2464 /// minimum).
2465 /// \param D The desired value for forms that use an equality
2466 /// comparison. If forms that use 'ordop', it should be
2467 /// \p nullptr.
2468 /// \param AO Atomic ordering of the generated atomic instructions.
2469 /// \param Op Atomic compare operation. It can only be ==, <, or >.
2470 /// \param IsXBinopExpr True if the conditional statement is in the form where
2471 /// x is on LHS. It only matters for < or >.
2472 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
2473 /// 'v', not an updated one (for compare capture
2474 /// only).
2475 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
2476 /// only when the comparison fails. This is only valid for
2477 /// the case the comparison is '=='.
2478 ///
2479 /// \return Insertion point after generated atomic capture IR.
2484 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2485
2486 /// Create the control flow structure of a canonical OpenMP loop.
2487 ///
2488 /// The emitted loop will be disconnected, i.e. no edge to the loop's
2489 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2490 /// IRBuilder location is not preserved.
2491 ///
2492 /// \param DL DebugLoc used for the instructions in the skeleton.
2493 /// \param TripCount Value to be used for the trip count.
2494 /// \param F Function in which to insert the BasicBlocks.
2495 /// \param PreInsertBefore Where to insert BBs that execute before the body,
2496 /// typically the body itself.
2497 /// \param PostInsertBefore Where to insert BBs that execute after the body.
2498 /// \param Name Base name used to derive BB
2499 /// and instruction names.
2500 ///
2501 /// \returns The CanonicalLoopInfo that represents the emitted loop.
2503 Function *F,
2504 BasicBlock *PreInsertBefore,
2505 BasicBlock *PostInsertBefore,
2506 const Twine &Name = {});
2507 /// OMP Offload Info Metadata name string
2508 const std::string ompOffloadInfoName = "omp_offload.info";
2509
2510 /// Loads all the offload entries information from the host IR
2511 /// metadata. This function is only meant to be used with device code
2512 /// generation.
2513 ///
2514 /// \param M Module to load Metadata info from. Module passed maybe
2515 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2517
2518 /// Loads all the offload entries information from the host IR
2519 /// metadata read from the file passed in as the HostFilePath argument. This
2520 /// function is only meant to be used with device code generation.
2521 ///
2522 /// \param HostFilePath The path to the host IR file,
2523 /// used to load in offload metadata for the device, allowing host and device
2524 /// to maintain the same metadata mapping.
2525 void loadOffloadInfoMetadata(StringRef HostFilePath);
2526
2527 /// Gets (if variable with the given name already exist) or creates
2528 /// internal global variable with the specified Name. The created variable has
2529 /// linkage CommonLinkage by default and is initialized by null value.
2530 /// \param Ty Type of the global variable. If it is exist already the type
2531 /// must be the same.
2532 /// \param Name Name of the variable.
2534 unsigned AddressSpace = 0);
2535
2536 /// Create a global function to register OpenMP requires flags into the
2537 /// runtime, according to the `Config`.
2538 ///
2539 /// This function should be added to the list of constructors of the
2540 /// compilation unit in order to be called before other OpenMP runtime
2541 /// functions.
2542 ///
2543 /// \param Name Name of the created function.
2545};
2546
2547/// Class to represented the control flow structure of an OpenMP canonical loop.
2548///
2549/// The control-flow structure is standardized for easy consumption by
2550/// directives associated with loops. For instance, the worksharing-loop
2551/// construct may change this control flow such that each loop iteration is
2552/// executed on only one thread. The constraints of a canonical loop in brief
2553/// are:
2554///
2555/// * The number of loop iterations must have been computed before entering the
2556/// loop.
2557///
2558/// * Has an (unsigned) logical induction variable that starts at zero and
2559/// increments by one.
2560///
2561/// * The loop's CFG itself has no side-effects. The OpenMP specification
2562/// itself allows side-effects, but the order in which they happen, including
2563/// how often or whether at all, is unspecified. We expect that the frontend
2564/// will emit those side-effect instructions somewhere (e.g. before the loop)
2565/// such that the CanonicalLoopInfo itself can be side-effect free.
2566///
2567/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2568/// execution of a loop body that satifies these constraints. It does NOT
2569/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2570/// CanonicalLoopInfo for such purposes.
2571///
2572/// The control flow can be described as follows:
2573///
2574/// Preheader
2575/// |
2576/// /-> Header
2577/// | |
2578/// | Cond---\
2579/// | | |
2580/// | Body |
2581/// | | | |
2582/// | <...> |
2583/// | | | |
2584/// \--Latch |
2585/// |
2586/// Exit
2587/// |
2588/// After
2589///
2590/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2591/// including) and end at AfterIP (at the After's first instruction, excluding).
2592/// That is, instructions in the Preheader and After blocks (except the
2593/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2594/// side-effects. Typically, the Preheader is used to compute the loop's trip
2595/// count. The instructions from BodyIP (at the Body block's first instruction,
2596/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2597/// control and thus can have side-effects. The body block is the single entry
2598/// point into the loop body, which may contain arbitrary control flow as long
2599/// as all control paths eventually branch to the Latch block.
2600///
2601/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2602/// Latch to guarantee that there is only a single edge to the latch. It would
2603/// make loop transformations easier to not needing to consider multiple
2604/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2605/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2606/// executes after each body iteration.
2607///
2608/// There must be no loop-carried dependencies through llvm::Values. This is
2609/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2610/// for the induction variable.
2611///
2612/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2613/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2614/// by assertOK(). They are expected to not be modified unless explicitly
2615/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2616/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2617/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2618/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2619/// anymore as its underlying control flow may not exist anymore.
2620/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2621/// may also return a new CanonicalLoopInfo that can be passed to other
2622/// loop-associated construct implementing methods. These loop-transforming
2623/// methods may either create a new CanonicalLoopInfo usually using
2624/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2625/// modify one of the input CanonicalLoopInfo and return it as representing the
2626/// modified loop. What is done is an implementation detail of
2627/// transformation-implementing method and callers should always assume that the
2628/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2629/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2630/// created by createCanonicalLoop, such that transforming methods do not have
2631/// to special case where the CanonicalLoopInfo originated from.
2632///
2633/// Generally, methods consuming CanonicalLoopInfo do not need an
2634/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2635/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2636/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2637/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2638/// any InsertPoint in the Preheader, After or Block can still be used after
2639/// calling such a method.
2640///
2641/// TODO: Provide mechanisms for exception handling and cancellation points.
2642///
2643/// Defined outside OpenMPIRBuilder because nested classes cannot be
2644/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2646 friend class OpenMPIRBuilder;
2647
2648private:
2649 BasicBlock *Header = nullptr;
2650 BasicBlock *Cond = nullptr;
2651 BasicBlock *Latch = nullptr;
2652 BasicBlock *Exit = nullptr;
2653
2654 /// Add the control blocks of this loop to \p BBs.
2655 ///
2656 /// This does not include any block from the body, including the one returned
2657 /// by getBody().
2658 ///
2659 /// FIXME: This currently includes the Preheader and After blocks even though
2660 /// their content is (mostly) not under CanonicalLoopInfo's control.
2661 /// Re-evaluated whether this makes sense.
2662 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2663
2664 /// Sets the number of loop iterations to the given value. This value must be
2665 /// valid in the condition block (i.e., defined in the preheader) and is
2666 /// interpreted as an unsigned integer.
2667 void setTripCount(Value *TripCount);
2668
2669 /// Replace all uses of the canonical induction variable in the loop body with
2670 /// a new one.
2671 ///
2672 /// The intended use case is to update the induction variable for an updated
2673 /// iteration space such that it can stay normalized in the 0...tripcount-1
2674 /// range.
2675 ///
2676 /// The \p Updater is called with the (presumable updated) current normalized
2677 /// induction variable and is expected to return the value that uses of the
2678 /// pre-updated induction values should use instead, typically dependent on
2679 /// the new induction variable. This is a lambda (instead of e.g. just passing
2680 /// the new value) to be able to distinguish the uses of the pre-updated
2681 /// induction variable and uses of the induction varible to compute the
2682 /// updated induction variable value.
2683 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2684
2685public:
2686 /// Returns whether this object currently represents the IR of a loop. If
2687 /// returning false, it may have been consumed by a loop transformation or not
2688 /// been intialized. Do not use in this case;
2689 bool isValid() const { return Header; }
2690
2691 /// The preheader ensures that there is only a single edge entering the loop.
2692 /// Code that must be execute before any loop iteration can be emitted here,
2693 /// such as computing the loop trip count and begin lifetime markers. Code in
2694 /// the preheader is not considered part of the canonical loop.
2695 BasicBlock *getPreheader() const;
2696
2697 /// The header is the entry for each iteration. In the canonical control flow,
2698 /// it only contains the PHINode for the induction variable.
2700 assert(isValid() && "Requires a valid canonical loop");
2701 return Header;
2702 }
2703
2704 /// The condition block computes whether there is another loop iteration. If
2705 /// yes, branches to the body; otherwise to the exit block.
2707 assert(isValid() && "Requires a valid canonical loop");
2708 return Cond;
2709 }
2710
2711 /// The body block is the single entry for a loop iteration and not controlled
2712 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2713 /// eventually branch to the \p Latch block.
2715 assert(isValid() && "Requires a valid canonical loop");
2716 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2717 }
2718
2719 /// Reaching the latch indicates the end of the loop body code. In the
2720 /// canonical control flow, it only contains the increment of the induction
2721 /// variable.
2723 assert(isValid() && "Requires a valid canonical loop");
2724 return Latch;
2725 }
2726
2727 /// Reaching the exit indicates no more iterations are being executed.
2729 assert(isValid() && "Requires a valid canonical loop");
2730 return Exit;
2731 }
2732
2733 /// The after block is intended for clean-up code such as lifetime end
2734 /// markers. It is separate from the exit block to ensure, analogous to the
2735 /// preheader, it having just a single entry edge and being free from PHI
2736 /// nodes should there be multiple loop exits (such as from break
2737 /// statements/cancellations).
2739 assert(isValid() && "Requires a valid canonical loop");
2740 return Exit->getSingleSuccessor();
2741 }
2742
2743 /// Returns the llvm::Value containing the number of loop iterations. It must
2744 /// be valid in the preheader and always interpreted as an unsigned integer of
2745 /// any bit-width.
2747 assert(isValid() && "Requires a valid canonical loop");
2748 Instruction *CmpI = &Cond->front();
2749 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2750 return CmpI->getOperand(1);
2751 }
2752
2753 /// Returns the instruction representing the current logical induction
2754 /// variable. Always unsigned, always starting at 0 with an increment of one.
2756 assert(isValid() && "Requires a valid canonical loop");
2757 Instruction *IndVarPHI = &Header->front();
2758 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2759 return IndVarPHI;
2760 }
2761
2762 /// Return the type of the induction variable (and the trip count).
2764 assert(isValid() && "Requires a valid canonical loop");
2765 return getIndVar()->getType();
2766 }
2767
2768 /// Return the insertion point for user code before the loop.
2770 assert(isValid() && "Requires a valid canonical loop");
2771 BasicBlock *Preheader = getPreheader();
2772 return {Preheader, std::prev(Preheader->end())};
2773 };
2774
2775 /// Return the insertion point for user code in the body.
2777 assert(isValid() && "Requires a valid canonical loop");
2778 BasicBlock *Body = getBody();
2779 return {Body, Body->begin()};
2780 };
2781
2782 /// Return the insertion point for user code after the loop.
2784 assert(isValid() && "Requires a valid canonical loop");
2785 BasicBlock *After = getAfter();
2786 return {After, After->begin()};
2787 };
2788
2790 assert(isValid() && "Requires a valid canonical loop");
2791 return Header->getParent();
2792 }
2793
2794 /// Consistency self-check.
2795 void assertOK() const;
2796
2797 /// Invalidate this loop. That is, the underlying IR does not fulfill the
2798 /// requirements of an OpenMP canonical loop anymore.
2799 void invalidate();
2800};
2801
2802} // end namespace llvm
2803
2804#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
arc branch finalize
This file defines the BumpPtrAllocator interface.
assume builder
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
This file defines constans and helpers used when dealing with OpenMP.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
an instruction to allocate memory on the stack
Definition: Instructions.h:58
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:730
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:337
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:326
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:243
BasicBlock * getBlock() const
Definition: IRBuilder.h:258
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:212
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:263
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:275
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2628
Class to represent integer types.
Definition: DerivedTypes.h:40
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
Definition: OMPIRBuilder.h:367
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage, const std::string &VarName)
Definition: OMPIRBuilder.h:370
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:385
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:294
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Definition: OMPIRBuilder.h:281
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:211
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:213
OffloadingEntryInfoKinds getKind() const
Definition: OMPIRBuilder.h:229
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
Definition: OMPIRBuilder.h:220
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:237
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Definition: OMPIRBuilder.h:221
Class that manages information about offload code regions and data.
Definition: OMPIRBuilder.h:199
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:407
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
Definition: OMPIRBuilder.h:346
@ OMPTargetDeviceClauseNoHost
The target is marked for non-host devices.
Definition: OMPIRBuilder.h:350
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
Definition: OMPIRBuilder.h:348
@ OMPTargetDeviceClauseNone
The target is marked as having no clause.
Definition: OMPIRBuilder.h:354
@ OMPTargetDeviceClauseHost
The target is marked for host devices.
Definition: OMPIRBuilder.h:352
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:264
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:266
@ OMPTargetRegionEntryDtor
Mark the entry as a global destructor.
Definition: OMPIRBuilder.h:270
@ OMPTargetRegionEntryCtor
Mark the entry as a global constructor.
Definition: OMPIRBuilder.h:268
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
Definition: OMPIRBuilder.h:257
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:255
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:328
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
Definition: OMPIRBuilder.h:334
@ OMPTargetGlobalVarEntryNone
Mark the entry as having no declare target entry kind.
Definition: OMPIRBuilder.h:336
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
Definition: OMPIRBuilder.h:338
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:332
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:330
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:319
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:402
bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
Definition: OMPIRBuilder.h:83
void setIsGPU(bool Value)
Definition: OMPIRBuilder.h:156
std::optional< bool > IsTargetDevice
Flag for specifying if the compilation is done for embedded device code or host code.
Definition: OMPIRBuilder.h:87
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
Definition: OMPIRBuilder.h:90
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
Definition: OMPIRBuilder.h:96
StringRef separator() const
Definition: OMPIRBuilder.h:147
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
void setFirstSeparator(StringRef FS)
Definition: OMPIRBuilder.h:158
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:137
std::optional< bool > OpenMPOffloadMandatory
Definition: OMPIRBuilder.h:93
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
Definition: OMPIRBuilder.h:98
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:119
void setHasRequiresUnifiedAddress(bool Value)
void setOpenMPOffloadMandatory(bool Value)
Definition: OMPIRBuilder.h:157
void setIsTargetDevice(bool Value)
Definition: OMPIRBuilder.h:155
void setSeparator(StringRef S)
Definition: OMPIRBuilder.h:159
void setHasRequiresDynamicAllocators(bool Value)
bool hasRequiresReverseOffload() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
SmallMapVector< const Value *, std::pair< Value *, Value * >, 4 > DevicePtrInfoMap
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:443
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:488
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
function_ref< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
Definition: OMPIRBuilder.h:540
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
std::function< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> StorableBodyGenCallbackTy
Definition: OMPIRBuilder.h:547
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB)
Generator for '#omp target'.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void setConfig(OpenMPIRBuilderConfig C)
Definition: OMPIRBuilder.h:457
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
void createTargetDeinit(const LocationDescription &Loc)
Create a runtime call for kmpc_target_deinit.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, StringRef SectionName="omp_offloading_entries")
Create an offloading section struct used to register this global at runtime.
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt)
Generator for '#omp single'.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false)
Modifies the canonical loop to be a workshare loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD)
The omp target interface.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Function * createRegisterRequires(StringRef Name)
Create a global function to register OpenMP requires flags into the runtime, according to the Config.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
Definition: OMPIRBuilder.h:506
InsertPointTy getInsertionPoint()
}
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:468
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName, int32_t NumTeams, int32_t NumThreads)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
Definition: OMPIRBuilder.h:447
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
Definition: OMPIRBuilder.h:513
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:112
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:257
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
An efficient, type-erasing, non-owning reference to a callable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:66
RTLDependenceKindTy
Dependence kind for RTL.
Definition: OMPConstants.h:271
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:46
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:265
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
AtomicOrdering
Atomic ordering for LLVM's memory model.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
bool IsCancellable
Flag to indicate if the directive is cancellable.
Definition: OMPIRBuilder.h:500
FinalizeCallbackTy FiniCB
The finalization callback provided by the last in-flight invocation of createXXXX for the directive o...
Definition: OMPIRBuilder.h:493
omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
Definition: OMPIRBuilder.h:497
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:584
LocationDescription(const InsertPointTy &IP)
Definition: OMPIRBuilder.h:587
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
Definition: OMPIRBuilder.h:588
LocationDescription(const IRBuilderBase &IRB)
Definition: OMPIRBuilder.h:585
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
std::function< void(Function &)> PostOutlineCBTy
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, ReductionGenTy ReductionGen, AtomicReductionGenTy AtomicReductionGen)
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, Value *SizesArray, Value *MapTypesArray, Value *MapTypesArrayEnd, Value *MappersArray, Value *MapNamesArray)
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, Value *NumIterations, Value *NumTeams, Value *NumThreads, Value *DynCGGroupMem, bool HasNoWait)
Constructor for TargetKernelArgs.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:233
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:173
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Definition: OMPIRBuilder.h:181
bool operator<(const TargetRegionEntryInfo RHS) const
Definition: OMPIRBuilder.h:191