62#define DEBUG_TYPE "openmp-ir-builder"
69 cl::desc(
"Use optimistic attributes describing "
70 "'as-if' properties of runtime calls."),
74 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
75 cl::desc(
"Factor for the unroll threshold to account for code "
76 "simplifications still taking place"),
87 if (!IP1.isSet() || !IP2.isSet())
89 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
94 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
95 case OMPScheduleType::UnorderedStaticChunked:
96 case OMPScheduleType::UnorderedStatic:
97 case OMPScheduleType::UnorderedDynamicChunked:
98 case OMPScheduleType::UnorderedGuidedChunked:
99 case OMPScheduleType::UnorderedRuntime:
100 case OMPScheduleType::UnorderedAuto:
101 case OMPScheduleType::UnorderedTrapezoidal:
102 case OMPScheduleType::UnorderedGreedy:
103 case OMPScheduleType::UnorderedBalanced:
104 case OMPScheduleType::UnorderedGuidedIterativeChunked:
105 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
106 case OMPScheduleType::UnorderedSteal:
107 case OMPScheduleType::UnorderedStaticBalancedChunked:
108 case OMPScheduleType::UnorderedGuidedSimd:
109 case OMPScheduleType::UnorderedRuntimeSimd:
110 case OMPScheduleType::OrderedStaticChunked:
111 case OMPScheduleType::OrderedStatic:
112 case OMPScheduleType::OrderedDynamicChunked:
113 case OMPScheduleType::OrderedGuidedChunked:
114 case OMPScheduleType::OrderedRuntime:
115 case OMPScheduleType::OrderedAuto:
116 case OMPScheduleType::OrderdTrapezoidal:
117 case OMPScheduleType::NomergeUnorderedStaticChunked:
118 case OMPScheduleType::NomergeUnorderedStatic:
119 case OMPScheduleType::NomergeUnorderedDynamicChunked:
120 case OMPScheduleType::NomergeUnorderedGuidedChunked:
121 case OMPScheduleType::NomergeUnorderedRuntime:
122 case OMPScheduleType::NomergeUnorderedAuto:
123 case OMPScheduleType::NomergeUnorderedTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedGreedy:
125 case OMPScheduleType::NomergeUnorderedBalanced:
126 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
128 case OMPScheduleType::NomergeUnorderedSteal:
129 case OMPScheduleType::NomergeOrderedStaticChunked:
130 case OMPScheduleType::NomergeOrderedStatic:
131 case OMPScheduleType::NomergeOrderedDynamicChunked:
132 case OMPScheduleType::NomergeOrderedGuidedChunked:
133 case OMPScheduleType::NomergeOrderedRuntime:
134 case OMPScheduleType::NomergeOrderedAuto:
135 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 SchedType & OMPScheduleType::MonotonicityMask;
144 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
155 if (Features.
count(
"+wavefrontsize64"))
156 return omp::getAMDGPUGridValues<64>();
157 return omp::getAMDGPUGridValues<32>();
168 bool HasSimdModifier) {
170 switch (ClauseKind) {
171 case OMP_SCHEDULE_Default:
172 case OMP_SCHEDULE_Static:
173 return HasChunks ? OMPScheduleType::BaseStaticChunked
174 : OMPScheduleType::BaseStatic;
175 case OMP_SCHEDULE_Dynamic:
176 return OMPScheduleType::BaseDynamicChunked;
177 case OMP_SCHEDULE_Guided:
178 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
179 : OMPScheduleType::BaseGuidedChunked;
180 case OMP_SCHEDULE_Auto:
182 case OMP_SCHEDULE_Runtime:
183 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
184 : OMPScheduleType::BaseRuntime;
192 bool HasOrderedClause) {
193 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
194 OMPScheduleType::None &&
195 "Must not have ordering nor monotonicity flags already set");
198 ? OMPScheduleType::ModifierOrdered
199 : OMPScheduleType::ModifierUnordered;
200 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
203 if (OrderingScheduleType ==
204 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
205 return OMPScheduleType::OrderedGuidedChunked;
206 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
207 OMPScheduleType::ModifierOrdered))
208 return OMPScheduleType::OrderedRuntime;
210 return OrderingScheduleType;
216 bool HasSimdModifier,
bool HasMonotonic,
217 bool HasNonmonotonic,
bool HasOrderedClause) {
218 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
219 OMPScheduleType::None &&
220 "Must not have monotonicity flags already set");
221 assert((!HasMonotonic || !HasNonmonotonic) &&
222 "Monotonic and Nonmonotonic are contradicting each other");
225 return ScheduleType | OMPScheduleType::ModifierMonotonic;
226 }
else if (HasNonmonotonic) {
227 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
237 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
238 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
252 bool HasSimdModifier,
bool HasMonotonicModifier,
253 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
259 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
260 HasNonmonotonicModifier, HasOrderedClause);
274 auto *Br = cast<BranchInst>(Term);
275 assert(!Br->isConditional() &&
276 "BB's terminator must be an unconditional branch (or degenerate)");
279 Br->setSuccessor(0,
Target);
284 NewBr->setDebugLoc(
DL);
289 assert(New->getFirstInsertionPt() == New->begin() &&
290 "Target BB must not have PHI nodes");
294 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
322 New->replaceSuccessorsPhiUsesWith(Old, New);
367 const Twine &
Name =
"",
bool AsPtr =
true) {
375 FakeVal = FakeValAddr;
403enum OpenMPOffloadingRequiresDirFlags {
405 OMP_REQ_UNDEFINED = 0x000,
407 OMP_REQ_NONE = 0x001,
409 OMP_REQ_REVERSE_OFFLOAD = 0x002,
411 OMP_REQ_UNIFIED_ADDRESS = 0x004,
413 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
415 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
422 : RequiresFlags(OMP_REQ_UNDEFINED) {}
425 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
426 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
427 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
428 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
429 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
430 RequiresFlags(OMP_REQ_UNDEFINED) {
431 if (HasRequiresReverseOffload)
432 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
433 if (HasRequiresUnifiedAddress)
434 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
435 if (HasRequiresUnifiedSharedMemory)
436 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
437 if (HasRequiresDynamicAllocators)
438 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
442 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
446 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
450 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
454 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
459 :
static_cast<int64_t
>(OMP_REQ_NONE);
464 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
466 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
471 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
473 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
478 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
480 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
485 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
487 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
505 Value *NumThreads3D =
528 auto FnAttrs = Attrs.getFnAttrs();
529 auto RetAttrs = Attrs.getRetAttrs();
531 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
536 bool Param =
true) ->
void {
537 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
538 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
539 if (HasSignExt || HasZeroExt) {
540 assert(AS.getNumAttributes() == 1 &&
541 "Currently not handling extension attr combined with others.");
543 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
546 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
553#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
554#include "llvm/Frontend/OpenMP/OMPKinds.def"
558#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
560 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
561 addAttrSet(RetAttrs, RetAttrSet, false); \
562 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
563 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
564 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
566#include "llvm/Frontend/OpenMP/OMPKinds.def"
580#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
582 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
584 Fn = M.getFunction(Str); \
586#include "llvm/Frontend/OpenMP/OMPKinds.def"
592#define OMP_RTL(Enum, Str, ...) \
594 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
610 LLVMContext::MD_callback,
612 2, {-1, -1},
true)}));
625 assert(Fn &&
"Failed to create OpenMP runtime function");
632 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
633 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
648 for (
auto Inst =
Block->getReverseIterator()->begin();
649 Inst !=
Block->getReverseIterator()->end();) {
650 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
674 ParallelRegionBlockSet.
clear();
676 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
695 ".omp_par", ArgsInZeroAddressSpace);
699 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
701 "Expected OpenMP outlining to be possible!");
703 for (
auto *V : OI.ExcludeArgsFromAggregate)
710 if (TargetCpuAttr.isStringAttribute())
713 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
714 if (TargetFeaturesAttr.isStringAttribute())
715 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
718 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
720 "OpenMP outlined functions should not return a value!");
732 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
739 "Expected instructions to add in the outlined region entry");
746 if (
I.isTerminator())
749 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
752 OI.EntryBB->moveBefore(&ArtificialEntry);
759 if (OI.PostOutlineCB)
760 OI.PostOutlineCB(*OutlinedFn);
791 errs() <<
"Error of kind: " << Kind
792 <<
" when emitting offload entries and metadata during "
793 "OMPIRBuilder finalization \n";
800 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
802 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
824 unsigned Reserve2Flags) {
826 LocFlags |= OMP_IDENT_FLAG_KMPC;
834 ConstantInt::get(
Int32, Reserve2Flags),
835 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
842 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
843 if (
GV.getInitializer() == Initializer)
848 M, OpenMPIRBuilder::Ident,
863 SrcLocStrSize = LocStr.
size();
872 if (
GV.isConstant() &&
GV.hasInitializer() &&
873 GV.getInitializer() == Initializer)
884 unsigned Line,
unsigned Column,
890 Buffer.
append(FunctionName);
892 Buffer.
append(std::to_string(Line));
894 Buffer.
append(std::to_string(Column));
902 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
913 if (
DIFile *DIF = DIL->getFile())
914 if (std::optional<StringRef> Source = DIF->getSource())
920 DIL->getColumn(), SrcLocStrSize);
932 "omp_global_thread_num");
937 bool ForceSimpleCall,
bool CheckCancelFlag) {
947 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
950 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
953 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
956 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
959 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
972 bool UseCancelBarrier =
977 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
978 : OMPRTL___kmpc_barrier),
981 if (UseCancelBarrier && CheckCancelFlag)
990 omp::Directive CanceledDirective) {
1002 Value *CancelKind =
nullptr;
1003 switch (CanceledDirective) {
1004#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1005 case DirectiveEnum: \
1006 CancelKind = Builder.getInt32(Value); \
1008#include "llvm/Frontend/OpenMP/OMPKinds.def"
1019 auto ExitCB = [
this, CanceledDirective, Loc](
InsertPointTy IP) {
1020 if (CanceledDirective == OMPD_parallel) {
1024 omp::Directive::OMPD_unknown,
false,
1034 UI->eraseFromParent();
1047 auto *KernelArgsPtr =
1060 NumThreads, HostPtr, KernelArgsPtr};
1088 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1092 Value *Return =
nullptr;
1112 Args.NumTeams, Args.NumThreads,
1113 OutlinedFnID, ArgsVector));
1126 emitBlock(OffloadContBlock, CurFn,
true);
1131 omp::Directive CanceledDirective,
1134 "Unexpected cancellation!");
1184 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1187 "Expected at least tid and bounded tid as arguments");
1188 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1191 assert(CI &&
"Expected call instruction to outlined function");
1192 CI->
getParent()->setName(
"omp_parallel");
1195 Type *PtrTy = OMPIRBuilder->VoidPtr;
1199 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1203 Value *Args = ArgsAlloca;
1211 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1223 Value *Parallel51CallArgs[] = {
1227 NumThreads ? NumThreads : Builder.
getInt32(-1),
1230 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1233 Builder.
getInt64(NumCapturedVars)};
1238 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1253 I->eraseFromParent();
1275 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1276 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1284 F->addMetadata(LLVMContext::MD_callback,
1293 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1296 "Expected at least tid and bounded tid as arguments");
1297 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1300 CI->
getParent()->setName(
"omp_parallel");
1304 Value *ForkCallArgs[] = {
1305 Ident, Builder.
getInt32(NumCapturedVars),
1306 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1309 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1318 auto PtrTy = OMPIRBuilder->VoidPtr;
1319 if (IfCondition && NumCapturedVars == 0) {
1323 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1341 I->eraseFromParent();
1349 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1376 if (ProcBind != OMP_PROC_BIND_default) {
1380 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1408 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1413 "zero.addr.ascast");
1437 if (IP.getBlock()->end() == IP.getPoint()) {
1443 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1444 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1445 "Unexpected insertion point for finalization call!");
1481 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1484 assert(BodyGenCB &&
"Expected body generation callback!");
1486 BodyGenCB(InnerAllocaIP, CodeGenIP);
1488 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1494 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1496 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1497 ThreadID, ToBeDeletedVec);
1502 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1504 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1521 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1522 Blocks.push_back(PRegOutlinedExitBB);
1533 ".omp_par", ArgsInZeroAddressSpace);
1538 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1541 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1546 auto PrivHelper = [&](
Value &V) {
1547 if (&V == TIDAddr || &V == ZeroAddr) {
1553 for (
Use &U : V.uses())
1554 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1555 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1565 if (!V.getType()->isPointerTy()) {
1584 Value *ReplacementValue =
nullptr;
1585 CallInst *CI = dyn_cast<CallInst>(&V);
1587 ReplacementValue = PrivTID;
1590 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue));
1595 assert(ReplacementValue &&
1596 "Expected copy/create callback to set replacement value!");
1597 if (ReplacementValue == &V)
1602 UPtr->set(ReplacementValue);
1619 for (
Value *Input : Inputs) {
1624 for (
Value *Output : Outputs)
1628 "OpenMP outlining should not produce live-out values!");
1630 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1633 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1641 assert(FiniInfo.DK == OMPD_parallel &&
1642 "Unexpected finalization stack state!");
1652 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1653 UI->eraseFromParent();
1719 if (Dependencies.
empty())
1739 Type *DependInfo = OMPBuilder.DependInfo;
1742 Value *DepArray =
nullptr;
1748 DepArray = Builder.
CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1750 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1756 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1761 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1763 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1768 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1771 static_cast<unsigned int>(Dep.DepKind)),
1814 BodyGenCB(TaskAllocaIP, TaskBodyIP);
1824 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1826 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1827 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
1829 assert(OutlinedFn.getNumUses() == 1 &&
1830 "there must be a single user for the outlined function");
1831 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1835 bool HasShareds = StaleCI->
arg_size() > 1;
1874 assert(ArgStructAlloca &&
1875 "Unable to find the alloca instruction corresponding to arguments "
1876 "for extracted function");
1879 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1880 "arguments for extracted function");
1888 TaskAllocFn, {Ident, ThreadID,
Flags,
1889 TaskSize, SharedsSize,
1901 Value *DepArray =
nullptr;
1902 if (Dependencies.
size()) {
1917 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1924 static_cast<unsigned int>(RTLDependInfoFields::Len));
1931 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1934 static_cast<unsigned int>(Dep.DepKind)),
1965 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
1971 if (Dependencies.
size()) {
1995 if (Dependencies.
size()) {
2016 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2068 if (IP.getBlock()->end() != IP.getPoint())
2079 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2080 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2081 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2112 unsigned CaseNumber = 0;
2113 for (
auto SectionCB : SectionCBs) {
2129 Value *LB = ConstantInt::get(I32Ty, 0);
2130 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2131 Value *ST = ConstantInt::get(I32Ty, 1);
2133 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2135 applyStaticWorkshareLoop(Loc.
DL,
LoopInfo, AllocaIP, !IsNowait);
2139 assert(FiniInfo.DK == OMPD_sections &&
2140 "Unexpected finalization stack state!");
2146 AfterIP = {FiniBB, FiniBB->
begin()};
2160 if (IP.getBlock()->end() != IP.getPoint())
2179 Directive OMPD = Directive::OMPD_sections;
2182 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2194 std::vector<WeakTrackingVH> &
List) {
2201 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
2205 if (UsedArray.
empty())
2212 GV->setSection(
"llvm.metadata");
2215Value *OpenMPIRBuilder::getGPUThreadID() {
2218 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2222Value *OpenMPIRBuilder::getGPUWarpSize() {
2227Value *OpenMPIRBuilder::getNVPTXWarpID() {
2232Value *OpenMPIRBuilder::getNVPTXLaneID() {
2234 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2235 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2240Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *
From,
2245 assert(FromSize > 0 &&
"From size must be greater than zero");
2246 assert(ToSize > 0 &&
"To size must be greater than zero");
2247 if (FromType == ToType)
2249 if (FromSize == ToSize)
2259 CastItem,
FromType->getPointerTo());
2264Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2269 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2273 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2277 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2278 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2279 Value *WarpSizeCast =
2281 Value *ShuffleCall =
2283 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2286void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2302 Value *ElemPtr = DstAddr;
2304 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2316 if ((
Size / IntSize) > 1) {
2340 Value *Res = createRuntimeShuffleFunction(
2349 Value *LocalElemPtr =
2356 Value *Res = createRuntimeShuffleFunction(
2370void OpenMPIRBuilder::emitReductionListCopy(
2371 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2373 CopyOptionsTy CopyOptions) {
2376 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2380 for (
auto En :
enumerate(ReductionInfos)) {
2381 const ReductionInfo &RI = En.value();
2382 Value *SrcElementAddr =
nullptr;
2383 Value *DestElementAddr =
nullptr;
2384 Value *DestElementPtrAddr =
nullptr;
2386 bool ShuffleInElement =
false;
2389 bool UpdateDestListPtr =
false;
2393 ReductionArrayTy, SrcBase,
2394 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2400 ReductionArrayTy, DestBase,
2401 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2407 ".omp.reduction.element");
2410 DestElementAddr = DestAlloca;
2413 DestElementAddr->
getName() +
".ascast");
2415 ShuffleInElement =
true;
2416 UpdateDestListPtr =
true;
2428 if (ShuffleInElement) {
2429 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2430 RemoteLaneOffset, ReductionArrayTy);
2432 switch (RI.EvaluationKind) {
2441 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2443 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2445 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2447 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2450 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2452 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2474 if (UpdateDestListPtr) {
2477 DestElementAddr->
getName() +
".ascast");
2483Function *OpenMPIRBuilder::emitInterWarpCopyFunction(
2493 "_omp_reduction_inter_warp_copy_func", &
M);
2516 "__openmp_nvptx_data_transfer_temporary_storage";
2520 if (!TransferMedium) {
2529 Value *GPUThreadID = getGPUThreadID();
2531 Value *LaneID = getNVPTXLaneID();
2533 Value *WarpID = getNVPTXWarpID();
2542 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2546 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2549 NumWarpsAlloca->
getName() +
".ascast");
2560 for (
auto En :
enumerate(ReductionInfos)) {
2565 const ReductionInfo &RI = En.value();
2567 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2570 unsigned NumIters = RealTySize / TySize;
2573 Value *Cnt =
nullptr;
2574 Value *CntAddr =
nullptr;
2584 CntAddr->
getName() +
".ascast");
2603 omp::Directive::OMPD_unknown,
2616 auto *RedListArrayTy =
2622 {ConstantInt::get(IndexTy, 0),
2623 ConstantInt::get(IndexTy, En.index())});
2648 omp::Directive::OMPD_unknown,
2657 Value *NumWarpsVal =
2660 Value *IsActiveThread =
2671 Value *TargetElemPtrPtr =
2673 {ConstantInt::get(IndexTy, 0),
2674 ConstantInt::get(IndexTy, En.index())});
2675 Value *TargetElemPtrVal =
2677 Value *TargetElemPtr = TargetElemPtrVal;
2683 Value *SrcMediumValue =
2702 RealTySize %= TySize;
2712Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2718 {Builder.getPtrTy(), Builder.getInt16Ty(),
2719 Builder.getInt16Ty(), Builder.getInt16Ty()},
2723 "_omp_reduction_shuffle_and_reduce_func", &
M);
2744 Type *ReduceListArgType = ReduceListArg->
getType();
2748 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2750 LaneIDArg->
getName() +
".addr");
2752 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2754 AlgoVerArg->
getName() +
".addr");
2761 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2764 ReduceListAlloca, ReduceListArgType,
2765 ReduceListAlloca->
getName() +
".ascast");
2767 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2769 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2770 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2772 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2775 RemoteReductionListAlloca->
getName() +
".ascast");
2784 Value *RemoteLaneOffset =
2793 emitReductionListCopy(
2795 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2826 Value *RemoteOffsetComp =
2843 ->addFnAttr(Attribute::NoUnwind);
2864 ReductionInfos, RemoteListAddrCast, ReduceList);
2877Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
2884 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
2888 "_omp_reduction_list_to_global_copy_func", &
M);
2905 BufferArg->
getName() +
".addr");
2912 BufferArgAlloca->
getName() +
".ascast");
2917 ReduceListArgAlloca->
getName() +
".ascast");
2923 Value *LocalReduceList =
2925 Value *BufferArgVal =
2930 for (
auto En :
enumerate(ReductionInfos)) {
2931 const ReductionInfo &RI = En.value();
2932 auto *RedListArrayTy =
2936 RedListArrayTy, LocalReduceList,
2937 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2945 ReductionsBufferTy, BufferVD, 0, En.index());
2947 switch (RI.EvaluationKind) {
2955 RI.ElementType, ElemPtr, 0, 0,
".realp");
2957 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2959 RI.ElementType, ElemPtr, 0, 1,
".imagp");
2961 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2964 RI.ElementType, GlobVal, 0, 0,
".realp");
2966 RI.ElementType, GlobVal, 0, 1,
".imagp");
2987Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
2994 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
2998 "_omp_reduction_list_to_global_reduce_func", &
M);
3015 BufferArg->
getName() +
".addr");
3020 auto *RedListArrayTy =
3025 Value *LocalReduceList =
3030 BufferArgAlloca->
getName() +
".ascast");
3035 ReduceListArgAlloca->
getName() +
".ascast");
3038 LocalReduceList->
getName() +
".ascast");
3048 for (
auto En :
enumerate(ReductionInfos)) {
3050 RedListArrayTy, LocalReduceListAddrCast,
3051 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3056 ReductionsBufferTy, BufferVD, 0, En.index());
3064 ->addFnAttr(Attribute::NoUnwind);
3070Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3077 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3081 "_omp_reduction_global_to_list_copy_func", &
M);
3098 BufferArg->
getName() +
".addr");
3105 BufferArgAlloca->
getName() +
".ascast");
3110 ReduceListArgAlloca->
getName() +
".ascast");
3115 Value *LocalReduceList =
3121 for (
auto En :
enumerate(ReductionInfos)) {
3123 auto *RedListArrayTy =
3127 RedListArrayTy, LocalReduceList,
3128 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3135 ReductionsBufferTy, BufferVD, 0, En.index());
3178Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3185 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3189 "_omp_reduction_global_to_list_reduce_func", &
M);
3206 BufferArg->
getName() +
".addr");
3216 Value *LocalReduceList =
3221 BufferArgAlloca->
getName() +
".ascast");
3226 ReduceListArgAlloca->
getName() +
".ascast");
3229 LocalReduceList->
getName() +
".ascast");
3239 for (
auto En :
enumerate(ReductionInfos)) {
3241 RedListArrayTy, ReductionList,
3242 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3247 ReductionsBufferTy, BufferVD, 0, En.index());
3255 ->addFnAttr(Attribute::NoUnwind);
3261std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3262 std::string Suffix =
3264 return (
Name + Suffix).str();
3267Function *OpenMPIRBuilder::createReductionFunction(
3269 ReductionGenCBKind ReductionGenCBKind,
AttributeList FuncAttrs) {
3271 {Builder.getPtrTy(), Builder.getPtrTy()},
3273 std::string
Name = getReductionFuncName(ReducerName);
3285 Value *LHSArrayPtr =
nullptr;
3286 Value *RHSArrayPtr =
nullptr;
3297 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3299 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3309 for (
auto En :
enumerate(ReductionInfos)) {
3310 const ReductionInfo &RI = En.value();
3312 RedArrayTy, RHSArrayPtr,
3313 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3316 RHSI8Ptr, RI.PrivateVariable->getType(),
3317 RHSI8Ptr->
getName() +
".ascast");
3320 RedArrayTy, LHSArrayPtr,
3321 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3324 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3335 return ReductionFunc;
3341 for (
auto En :
enumerate(ReductionInfos)) {
3342 unsigned Index = En.index();
3343 const ReductionInfo &RI = En.value();
3344 Value *LHSFixupPtr, *RHSFixupPtr;
3351 LHSPtrs[
Index], [ReductionFunc](
const Use &U) {
3352 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3356 RHSPtrs[
Index], [ReductionFunc](
const Use &U) {
3357 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3363 return ReductionFunc;
3371 assert(RI.Variable &&
"expected non-null variable");
3372 assert(RI.PrivateVariable &&
"expected non-null private variable");
3373 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3374 "expected non-null reduction generator callback");
3377 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3378 "expected variables and their private equivalents to have the same "
3381 assert(RI.Variable->getType()->isPointerTy() &&
3382 "expected variables to be pointers");
3389 bool IsNoWait,
bool IsTeamsReduction,
bool HasDistribute,
3391 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3405 if (ReductionInfos.
size() == 0)
3424 if (GridValue.has_value())
3447 Value *ReductionListAlloca =
3450 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3454 for (
auto En :
enumerate(ReductionInfos)) {
3457 RedArrayTy, ReductionList,
3458 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3465 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3466 Function *WcFunc = emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
3471 unsigned MaxDataSize = 0;
3473 for (
auto En :
enumerate(ReductionInfos)) {
3475 if (
Size > MaxDataSize)
3477 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3479 Value *ReductionDataSize =
3481 if (!IsTeamsReduction) {
3482 Value *SarFuncCast =
3486 Value *Args[] = {RTLoc, ReductionDataSize, RL, SarFuncCast, WcFuncCast};
3488 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3493 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3495 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3496 Function *LtGCFunc = emitListToGlobalCopyFunction(
3497 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3498 Function *LtGRFunc = emitListToGlobalReduceFunction(
3499 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3500 Function *GtLCFunc = emitGlobalToListCopyFunction(
3501 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3502 Function *GtLRFunc = emitGlobalToListReduceFunction(
3503 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3507 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3509 Value *Args3[] = {RTLoc,
3510 KernelTeamsReductionPtr,
3522 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3539 for (
auto En :
enumerate(ReductionInfos)) {
3546 Value *LHSPtr, *RHSPtr;
3548 &LHSPtr, &RHSPtr, CurFunc));
3553 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3557 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3561 assert(
false &&
"Unhandled ReductionGenCBKind");
3577 ".omp.reduction.func", &M);
3588 assert(RI.Variable &&
"expected non-null variable");
3589 assert(RI.PrivateVariable &&
"expected non-null private variable");
3590 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
3591 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
3592 "expected variables and their private equivalents to have the same "
3594 assert(RI.Variable->getType()->isPointerTy() &&
3595 "expected variables to be pointers");
3608 unsigned NumReductions = ReductionInfos.
size();
3615 for (
auto En :
enumerate(ReductionInfos)) {
3616 unsigned Index = En.index();
3634 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3639 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3642 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3644 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3645 : RuntimeFunction::OMPRTL___kmpc_reduce);
3648 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3649 ReductionFunc, Lock},
3668 for (
auto En :
enumerate(ReductionInfos)) {
3673 Value *RedValue =
nullptr;
3674 if (!IsByRef[En.index()]) {
3676 "red.value." +
Twine(En.index()));
3678 Value *PrivateRedValue =
3680 "red.private.value." +
Twine(En.index()));
3682 if (IsByRef[En.index()]) {
3684 PrivateRedValue, Reduced));
3687 PrivateRedValue, Reduced));
3692 if (!IsByRef[En.index()])
3696 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3697 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3705 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3726 for (
auto En :
enumerate(ReductionInfos)) {
3729 RedArrayTy, LHSArrayPtr, 0, En.index());
3734 RedArrayTy, RHSArrayPtr, 0, En.index());
3744 if (!IsByRef[En.index()])
3761 Directive OMPD = Directive::OMPD_master;
3766 Value *Args[] = {Ident, ThreadId};
3774 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3785 Directive OMPD = Directive::OMPD_masked;
3791 Value *ArgsEnd[] = {Ident, ThreadId};
3799 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3834 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
3847 "omp_" +
Name +
".next",
true);
3858 CL->Header = Header;
3877 NextBB, NextBB,
Name);
3901 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
3911 auto *IndVarTy = cast<IntegerType>(Start->getType());
3912 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
3913 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
3919 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
3947 Value *CountIfLooping;
3948 if (InclusiveStop) {
3958 "omp_" +
Name +
".tripcount");
3979 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
3982 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
3988 InsertPointTy AllocaIP,
3989 bool NeedsBarrier) {
3990 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
3992 "Require dedicated allocate IP");
4004 Type *IVTy =
IV->getType();
4024 Constant *One = ConstantInt::get(IVTy, 1);
4032 Constant *SchedulingType = ConstantInt::get(
4033 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
4038 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4039 PUpperBound, PStride, One,
Zero});
4044 CLI->setTripCount(TripCount);
4065 omp::Directive::OMPD_for,
false,
4076 bool NeedsBarrier,
Value *ChunkSize) {
4077 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4078 assert(ChunkSize &&
"Chunk size is required");
4083 Type *IVTy =
IV->getType();
4085 "Max supported tripcount bitwidth is 64 bits");
4087 :
Type::getInt64Ty(Ctx);
4090 Constant *One = ConstantInt::get(InternalIVTy, 1);
4102 Value *PLowerBound =
4104 Value *PUpperBound =
4113 Value *CastedChunkSize =
4115 Value *CastedTripCount =
4118 Constant *SchedulingType = ConstantInt::get(
4119 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4133 SchedulingType, PLastIter,
4134 PLowerBound, PUpperBound,
4139 Value *FirstChunkStart =
4141 Value *FirstChunkStop =
4146 Value *NextChunkStride =
4151 Value *DispatchCounter;
4155 FirstChunkStart, CastedTripCount, NextChunkStride,
4179 Value *IsLastChunk =
4181 Value *CountUntilOrigTripCount =
4184 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4185 Value *BackcastedChunkTC =
4187 CLI->setTripCount(BackcastedChunkTC);
4192 Value *BackcastedDispatchCounter =
4226 case WorksharingLoopType::ForStaticLoop:
4229 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4232 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4234 case WorksharingLoopType::DistributeStaticLoop:
4237 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4240 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4242 case WorksharingLoopType::DistributeForStaticLoop:
4245 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4248 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4251 if (Bitwidth != 32 && Bitwidth != 64) {
4273 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4274 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4279 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4280 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4285 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4286 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4287 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4323 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
4331 "Expected unique undroppable user of outlined function");
4332 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
4333 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
4335 "Expected outlined function call to be located in loop preheader");
4337 if (OutlinedFnCallInstruction->
arg_size() > 1)
4344 LoopBodyArg, ParallelTaskPtr, TripCount,
4347 for (
auto &ToBeDeletedItem : ToBeDeleted)
4348 ToBeDeletedItem->eraseFromParent();
4354 InsertPointTy AllocaIP,
4367 OI.OuterAllocaBB = AllocaIP.getBlock();
4372 "omp.prelatch",
true);
4392 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
4394 ParallelRegionBlockSet.
end());
4414 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
4423 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
4424 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
4430 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
4437 OI.PostOutlineCB = [=, ToBeDeletedVec =
4438 std::move(ToBeDeleted)](
Function &OutlinedFn) {
4440 ToBeDeletedVec, LoopType);
4448 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
4449 bool HasSimdModifier,
bool HasMonotonicModifier,
4450 bool HasNonmonotonicModifier,
bool HasOrderedClause,
4453 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
4455 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
4456 HasNonmonotonicModifier, HasOrderedClause);
4458 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
4459 OMPScheduleType::ModifierOrdered;
4460 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
4461 case OMPScheduleType::BaseStatic:
4462 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
4464 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4465 NeedsBarrier, ChunkSize);
4467 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
4469 case OMPScheduleType::BaseStaticChunked:
4471 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4472 NeedsBarrier, ChunkSize);
4474 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
4477 case OMPScheduleType::BaseRuntime:
4478 case OMPScheduleType::BaseAuto:
4479 case OMPScheduleType::BaseGreedy:
4480 case OMPScheduleType::BaseBalanced:
4481 case OMPScheduleType::BaseSteal:
4482 case OMPScheduleType::BaseGuidedSimd:
4483 case OMPScheduleType::BaseRuntimeSimd:
4485 "schedule type does not support user-defined chunk sizes");
4487 case OMPScheduleType::BaseDynamicChunked:
4488 case OMPScheduleType::BaseGuidedChunked:
4489 case OMPScheduleType::BaseGuidedIterativeChunked:
4490 case OMPScheduleType::BaseGuidedAnalyticalChunked:
4491 case OMPScheduleType::BaseStaticBalancedChunked:
4492 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4493 NeedsBarrier, ChunkSize);
4509 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
4512 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
4525 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
4528 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
4540 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
4543 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
4550 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4552 "Require dedicated allocate IP");
4554 "Require valid schedule type");
4556 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
4557 OMPScheduleType::ModifierOrdered;
4568 Type *IVTy =
IV->getType();
4586 Constant *One = ConstantInt::get(IVTy, 1);
4607 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4611 {SrcLoc, ThreadNum, SchedulingType, One,
4612 UpperBound, One, Chunk});
4622 PLowerBound, PUpperBound, PStride});
4623 Constant *Zero32 = ConstantInt::get(I32Type, 0);
4632 auto *PI = cast<PHINode>(Phi);
4633 PI->setIncomingBlock(0, OuterCond);
4634 PI->setIncomingValue(0, LowerBound);
4638 auto *Br = cast<BranchInst>(Term);
4639 Br->setSuccessor(0, OuterCond);
4647 auto *CI = cast<CmpInst>(Comp);
4648 CI->setOperand(1, UpperBound);
4651 auto *BI = cast<BranchInst>(Branch);
4652 assert(BI->getSuccessor(1) == Exit);
4653 BI->setSuccessor(1, OuterCond);
4666 omp::Directive::OMPD_for,
false,
4686 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
4687 for (
Use &U : BB->uses()) {
4688 auto *UseInst = dyn_cast<Instruction>(U.getUser());
4691 if (BBsToErase.count(UseInst->getParent()))
4698 while (BBsToErase.remove_if(HasRemainingUses)) {
4709 assert(
Loops.size() >= 1 &&
"At least one loop required");
4710 size_t NumLoops =
Loops.size();
4714 return Loops.front();
4726 Loop->collectControlBlocks(OldControlBBs);
4730 if (ComputeIP.
isSet())
4737 Value *CollapsedTripCount =
nullptr;
4740 "All loops to collapse must be valid canonical loops");
4741 Value *OrigTripCount = L->getTripCount();
4742 if (!CollapsedTripCount) {
4743 CollapsedTripCount = OrigTripCount;
4755 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
4763 Value *Leftover = Result->getIndVar();
4765 NewIndVars.
resize(NumLoops);
4766 for (
int i = NumLoops - 1; i >= 1; --i) {
4767 Value *OrigTripCount =
Loops[i]->getTripCount();
4770 NewIndVars[i] = NewIndVar;
4775 NewIndVars[0] = Leftover;
4784 BasicBlock *ContinueBlock = Result->getBody();
4786 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
4793 ContinueBlock =
nullptr;
4794 ContinuePred = NextSrc;
4801 for (
size_t i = 0; i < NumLoops - 1; ++i)
4802 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
4808 for (
size_t i = NumLoops - 1; i > 0; --i)
4809 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
4812 ContinueWith(Result->getLatch(),
nullptr);
4819 for (
size_t i = 0; i < NumLoops; ++i)
4820 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
4834std::vector<CanonicalLoopInfo *>
4838 "Must pass as many tile sizes as there are loops");
4839 int NumLoops =
Loops.size();
4840 assert(NumLoops >= 1 &&
"At least one loop to tile required");
4852 Loop->collectControlBlocks(OldControlBBs);
4860 assert(L->isValid() &&
"All input loops must be valid canonical loops");
4861 OrigTripCounts.
push_back(L->getTripCount());
4872 for (
int i = 0; i < NumLoops - 1; ++i) {
4885 for (
int i = 0; i < NumLoops; ++i) {
4887 Value *OrigTripCount = OrigTripCounts[i];
4900 Value *FloorTripOverflow =
4906 "omp_floor" +
Twine(i) +
".tripcount",
true);
4914 std::vector<CanonicalLoopInfo *> Result;
4915 Result.reserve(NumLoops * 2);
4928 auto EmbeddNewLoop =
4929 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
4932 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
4937 Enter = EmbeddedLoop->
getBody();
4939 OutroInsertBefore = EmbeddedLoop->
getLatch();
4940 return EmbeddedLoop;
4944 const Twine &NameBase) {
4947 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
4948 Result.push_back(EmbeddedLoop);
4952 EmbeddNewLoops(FloorCount,
"floor");
4958 for (
int i = 0; i < NumLoops; ++i) {
4962 Value *FloorIsEpilogue =
4964 Value *TileTripCount =
4971 EmbeddNewLoops(TileCounts,
"tile");
4976 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
4985 BodyEnter =
nullptr;
4986 BodyEntered = ExitBB;
4999 for (
int i = 0; i < NumLoops; ++i) {
5002 Value *OrigIndVar = OrigIndVars[i];
5030 if (Properties.
empty())
5053 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5057 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5065 if (
I.mayReadOrWriteMemory()) {
5069 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5091 const Twine &NamePrefix) {
5097 SplitBefore = dyn_cast<Instruction>(IfCond);
5143 VMap[
Block] = NewBB;
5153 if (TargetTriple.
isX86()) {
5154 if (Features.
lookup(
"avx512f"))
5156 else if (Features.
lookup(
"avx"))
5160 if (TargetTriple.
isPPC())
5162 if (TargetTriple.
isWasm())
5169 Value *IfCond, OrderKind Order,
5188 if (AlignedVars.
size()) {
5191 for (
auto &AlignedItem : AlignedVars) {
5192 Value *AlignedPtr = AlignedItem.first;
5193 Value *Alignment = AlignedItem.second;
5195 AlignedPtr, Alignment);
5202 createIfVersion(CanonicalLoop, IfCond, VMap,
"simd");
5206 "Cannot find value which corresponds to original loop latch");
5207 assert(isa<BasicBlock>(MappedLatch) &&
5208 "Cannot cast mapped latch block value to BasicBlock");
5209 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
5238 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5246 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5254 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
5256 if (Simdlen || Safelen) {
5260 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
5286static std::unique_ptr<TargetMachine>
5290 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
5291 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
5292 const std::string &
Triple = M->getTargetTriple();
5302 std::nullopt, OptLevel));
5326 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
5341 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
5346 nullptr, ORE,
static_cast<int>(OptLevel),
5367 <<
" Threshold=" << UP.
Threshold <<
"\n"
5370 <<
" PartialOptSizeThreshold="
5389 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
5390 Ptr = Load->getPointerOperand();
5391 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
5392 Ptr = Store->getPointerOperand();
5396 Ptr =
Ptr->stripPointerCasts();
5398 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
5399 if (Alloca->getParent() == &
F->getEntryBlock())
5419 int MaxTripCount = 0;
5420 bool MaxOrZero =
false;
5421 unsigned TripMultiple = 0;
5423 bool UseUpperBound =
false;
5425 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
5427 unsigned Factor = UP.
Count;
5428 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
5439 assert(Factor >= 0 &&
"Unroll factor must not be negative");
5455 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
5468 *UnrolledCLI =
Loop;
5473 "unrolling only makes sense with a factor of 2 or larger");
5475 Type *IndVarTy =
Loop->getIndVarType();
5482 std::vector<CanonicalLoopInfo *>
LoopNest =
5497 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
5500 (*UnrolledCLI)->assertOK();
5518 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
5537 if (!CPVars.
empty()) {
5542 Directive OMPD = Directive::OMPD_single;
5547 Value *Args[] = {Ident, ThreadId};
5573 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
5578 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
5581 ConstantInt::get(
Int64, 0), CPVars[
I],
5584 }
else if (!IsNowait)
5586 omp::Directive::OMPD_unknown,
false,
5598 Directive OMPD = Directive::OMPD_critical;
5603 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
5604 Value *Args[] = {Ident, ThreadId, LockVar};
5621 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5629 const Twine &
Name,
bool IsDependSource) {
5632 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
5633 "OpenMP runtime requires depend vec with i64 type");
5646 for (
unsigned I = 0;
I < NumLoops; ++
I) {
5660 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
5678 Directive OMPD = Directive::OMPD_ordered;
5687 Value *Args[] = {Ident, ThreadId};
5697 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5703 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
5704 bool HasFinalize,
bool IsCancellable) {
5713 if (!isa_and_nonnull<BranchInst>(SplitPos))
5720 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
5730 "Unexpected control flow graph state!!");
5731 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
5733 "Unexpected Control Flow State!");
5739 "Unexpected Insertion point location!");
5742 auto InsertBB = merged ? ExitPredBB : ExitBB;
5743 if (!isa_and_nonnull<BranchInst>(SplitPos))
5753 if (!Conditional || !EntryCall)
5773 UI->eraseFromParent();
5781 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
5789 "Unexpected finalization stack state!");
5792 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
5842 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
5844 "copyin.not.master.end");
5899 Value *DependenceAddress,
bool HaveNowaitClause) {
5907 if (Device ==
nullptr)
5908 Device = ConstantInt::get(
Int32, -1);
5909 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
5910 if (NumDependences ==
nullptr) {
5911 NumDependences = ConstantInt::get(
Int32, 0);
5915 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
5917 Ident, ThreadId, InteropVar, InteropTypeVal,
5918 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
5927 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
5935 if (Device ==
nullptr)
5936 Device = ConstantInt::get(
Int32, -1);
5937 if (NumDependences ==
nullptr) {
5938 NumDependences = ConstantInt::get(
Int32, 0);
5942 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
5944 Ident, ThreadId, InteropVar, Device,
5945 NumDependences, DependenceAddress, HaveNowaitClauseVal};
5954 Value *NumDependences,
5955 Value *DependenceAddress,
5956 bool HaveNowaitClause) {
5963 if (Device ==
nullptr)
5964 Device = ConstantInt::get(
Int32, -1);
5965 if (NumDependences ==
nullptr) {
5966 NumDependences = ConstantInt::get(
Int32, 0);
5970 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
5972 Ident, ThreadId, InteropVar, Device,
5973 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6002 int32_t MinThreadsVal, int32_t MaxThreadsVal,
6003 int32_t MinTeamsVal, int32_t MaxTeamsVal) {
6021 const std::string DebugPrefix =
"_debug__";
6022 if (KernelName.
ends_with(DebugPrefix)) {
6023 KernelName = KernelName.
drop_back(DebugPrefix.length());
6030 if (MinTeamsVal > 1 || MaxTeamsVal > 0)
6034 if (MaxThreadsVal < 0)
6035 MaxThreadsVal = std::max(
6038 if (MaxThreadsVal > 0)
6049 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6052 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6053 Constant *DynamicEnvironmentInitializer =
6057 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6059 DL.getDefaultGlobalsAddressSpace());
6063 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6064 ? DynamicEnvironmentGV
6066 DynamicEnvironmentPtr);
6069 ConfigurationEnvironment, {
6070 UseGenericStateMachineVal,
6071 MayUseNestedParallelismVal,
6078 ReductionBufferLength,
6081 KernelEnvironment, {
6082 ConfigurationEnvironmentInitializer,
6086 std::string KernelEnvironmentName =
6087 (KernelName +
"_kernel_environment").str();
6090 KernelEnvironmentInitializer, KernelEnvironmentName,
6092 DL.getDefaultGlobalsAddressSpace());
6096 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6097 ? KernelEnvironmentGV
6099 KernelEnvironmentPtr);
6100 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6105 ThreadKind, ConstantInt::get(ThreadKind->
getType(), -1),
6128 UI->eraseFromParent();
6136 int32_t TeamsReductionDataSize,
6137 int32_t TeamsReductionBufferLength) {
6142 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6146 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6152 const std::string DebugPrefix =
"_debug__";
6154 KernelName = KernelName.
drop_back(DebugPrefix.length());
6155 auto *KernelEnvironmentGV =
6157 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6158 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
6160 KernelEnvironmentInitializer,
6161 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6163 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6165 KernelEnvironmentGV->setInitializer(NewInitializer);
6170 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6174 auto *KernelOp = dyn_cast<ConstantAsMetadata>(
Op->getOperand(0));
6175 if (!KernelOp || KernelOp->getValue() != &
Kernel)
6177 auto *Prop = dyn_cast<MDString>(
Op->getOperand(1));
6178 if (!Prop || Prop->getString() !=
Name)
6190 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->
getOperand(2));
6191 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6194 OldVal->getValue()->getType(),
6195 Min ? std::min(OldLimit,
Value) : std::max(OldLimit,
Value))));
6204 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6209std::pair<int32_t, int32_t>
6211 int32_t ThreadLimit =
6216 if (!Attr.isValid() || !Attr.isStringAttribute())
6217 return {0, ThreadLimit};
6220 if (!llvm::to_integer(UBStr, UB, 10))
6221 return {0, ThreadLimit};
6222 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6223 if (!llvm::to_integer(LBStr, LB, 10))
6229 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
6230 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6231 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6233 return {0, ThreadLimit};
6243 llvm::utostr(LB) +
"," + llvm::utostr(UB));
6250std::pair<int32_t, int32_t>
6257 int32_t LB, int32_t UB) {
6262 Kernel.
addFnAttr(
"amdgpu-max-num-workgroups", llvm::utostr(LB) +
",1,1");
6267void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
6282 assert(OutlinedFn &&
"The outlined function must exist if embedded");
6291Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
6297 "Named kernel already exists?");
6312 ? GenerateFunctionCallback(EntryFnName)
6318 if (!IsOffloadEntry)
6321 std::string EntryFnIDName =
6323 ? std::string(EntryFnName)
6327 EntryFnName, EntryFnIDName);
6334 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
6335 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
6336 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
6338 EntryInfo, EntryAddr, OutlinedFnID,
6340 return OutlinedFnID;
6363 bool IsStandAlone = !BodyGenCB;
6388 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6395 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
6400 omp::OMPRTL___tgt_target_data_begin_mapper);
6404 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
6405 if (isa<AllocaInst>(DeviceMap.second.second)) {
6442 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6459 emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
6469 emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
6475 emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
6486 bool IsGPUDistribute) {
6487 assert((IVSize == 32 || IVSize == 64) &&
6488 "IV size is not compatible with the omp runtime");
6490 if (IsGPUDistribute)
6492 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
6493 : omp::OMPRTL___kmpc_distribute_static_init_4u)
6494 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
6495 : omp::OMPRTL___kmpc_distribute_static_init_8u);
6497 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
6498 : omp::OMPRTL___kmpc_for_static_init_4u)
6499 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
6500 : omp::OMPRTL___kmpc_for_static_init_8u);
6507 assert((IVSize == 32 || IVSize == 64) &&
6508 "IV size is not compatible with the omp runtime");
6510 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
6511 : omp::OMPRTL___kmpc_dispatch_init_4u)
6512 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
6513 : omp::OMPRTL___kmpc_dispatch_init_8u);
6520 assert((IVSize == 32 || IVSize == 64) &&
6521 "IV size is not compatible with the omp runtime");
6523 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
6524 : omp::OMPRTL___kmpc_dispatch_next_4u)
6525 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
6526 : omp::OMPRTL___kmpc_dispatch_next_8u);
6533 assert((IVSize == 32 || IVSize == 64) &&
6534 "IV size is not compatible with the omp runtime");
6536 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
6537 : omp::OMPRTL___kmpc_dispatch_fini_4u)
6538 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
6539 : omp::OMPRTL___kmpc_dispatch_fini_8u);
6562 for (
auto &Arg : Inputs)
6563 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
6567 for (
auto &Arg : Inputs)
6568 ParameterTypes.
push_back(Arg->getType());
6577 auto OldInsertPoint = Builder.
saveIP();
6605 auto AllocaIP = Builder.
saveIP();
6610 const auto &ArgRange =
6612 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
6633 if (
auto *Const = dyn_cast<Constant>(Input))
6638 if (
auto *Instr = dyn_cast<Instruction>(
User))
6639 if (Instr->getFunction() == Func)
6640 Instr->replaceUsesOfWith(Input, InputCopy);
6646 for (
auto InArg :
zip(Inputs, ArgRange)) {
6647 Value *Input = std::get<0>(InArg);
6648 Argument &Arg = std::get<1>(InArg);
6649 Value *InputCopy =
nullptr;
6652 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.
saveIP()));
6670 if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
6671 llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
6672 llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
6673 DeferredReplacement.
push_back(std::make_pair(Input, InputCopy));
6677 ReplaceValue(Input, InputCopy, Func);
6681 for (
auto Deferred : DeferredReplacement)
6682 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
6725 Type *TaskPtrTy = OMPBuilder.TaskPtr;
6726 Type *TaskTy = OMPBuilder.Task;
6731 ".omp_target_task_proxy_func",
6733 ProxyFn->getArg(0)->setName(
"thread.id");
6734 ProxyFn->getArg(1)->setName(
"task");
6740 bool HasShareds = StaleCI->
arg_size() > 1;
6746 "StaleCI with shareds should have exactly two arguments.");
6748 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
6749 assert(ArgStructAlloca &&
6750 "Unable to find the alloca instruction corresponding to arguments "
6751 "for extracted function");
6752 auto *ArgStructType =
6753 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
6756 Builder.
CreateAlloca(ArgStructType,
nullptr,
"structArg");
6757 Value *TaskT = ProxyFn->getArg(1);
6758 Value *ThreadId = ProxyFn->getArg(0);
6759 Value *SharedsSize =
6760 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
6767 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
6770 Builder.
CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
6783 [&OMPBuilder, &Builder, &Inputs, &CBFunc,
6784 &ArgAccessorFuncCB](
StringRef EntryFnName) {
6786 CBFunc, ArgAccessorFuncCB);
6790 OutlinedFn, OutlinedFnID);
6892 TargetTaskAllocaBB->
begin());
6896 OI.
EntryBB = TargetTaskAllocaBB;
6902 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
6913 EmitTargetCallFallbackCB, Args, DeviceID,
6914 RTLoc, TargetTaskAllocaIP));
6918 HasNoWait](
Function &OutlinedFn)
mutable {
6920 "there must be a single user for the outlined function");
6923 bool HasShareds = StaleCI->
arg_size() > 1;
6927 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
6958 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
6959 assert(ArgStructAlloca &&
6960 "Unable to find the alloca instruction corresponding to arguments "
6961 "for extracted function");
6962 auto *ArgStructType =
6963 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
6964 assert(ArgStructType &&
"Unable to find struct type corresponding to "
6965 "arguments for extracted function");
6982 TaskAllocFn, {Ident, ThreadID,
Flags,
6983 TaskSize, SharedsSize,
7030 }
else if (DepArray) {
7063 Constant *OutlinedFnID, int32_t NumTeams, int32_t NumThreads,
7081 auto &&EmitTargetCallFallbackCB =
7102 bool HasNoWait =
false;
7103 bool HasDependencies = Dependencies.size() > 0;
7104 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7107 NumTeamsVal, NumThreadsVal,
7108 DynCGGroupMem, HasNoWait);
7112 if (RequiresOuterTargetTask) {
7114 OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID,
7115 RTLoc, AllocaIP, Dependencies, HasNoWait));
7118 Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
7119 DeviceID, RTLoc, AllocaIP));
7142 OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
7149 NumThreads, Args, GenMapInfoCB, Dependencies);
7163 return OS.str().str();
7177 assert(Elem.second->getValueType() == Ty &&
7178 "OMP internal variable has different type than requested");
7194 GV->setAlignment(std::max(TypeAlign, PtrAlign));
7201Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
7202 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
7203 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
7214 return SizePtrToInt;
7219 std::string VarName) {
7227 return MaptypesArrayGlobal;
7232 unsigned NumOperands,
7241 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
7245 ArrI64Ty,
nullptr,
".offload_sizes");
7256 int64_t DeviceID,
unsigned NumOperands) {
7262 Value *ArgsBaseGEP =
7264 {Builder.getInt32(0), Builder.getInt32(0)});
7267 {Builder.getInt32(0), Builder.getInt32(0)});
7268 Value *ArgSizesGEP =
7270 {Builder.getInt32(0), Builder.getInt32(0)});
7276 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
7284 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
7285 "expected region end call to runtime only when end call is separate");
7287 auto VoidPtrTy = UnqualPtrTy;
7288 auto VoidPtrPtrTy = UnqualPtrTy;
7290 auto Int64PtrTy = UnqualPtrTy;
7292 if (!
Info.NumberOfPtrs) {
7304 Info.RTArgs.BasePointersArray,
7315 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
7316 :
Info.RTArgs.MapTypesArray,
7331 if (!
Info.HasMapper)
7356 "struct.descriptor_dim");
7358 enum { OffsetFD = 0, CountFD, StrideFD };
7362 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
7365 if (NonContigInfo.
Dims[
I] == 1)
7372 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
7373 unsigned RevIdx = EE -
II - 1;
7376 {Builder.getInt64(0), Builder.getInt64(II)});
7380 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
7385 NonContigInfo.
Counts[L][RevIdx], CountLVal,
7390 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
7399 Info.RTArgs.PointersArray, 0,
I);
7413 Info.clearArrayInfo();
7416 if (
Info.NumberOfPtrs == 0)
7426 PointerArrayType,
nullptr,
".offload_baseptrs");
7429 PointerArrayType,
nullptr,
".offload_ptrs");
7431 PointerArrayType,
nullptr,
".offload_mappers");
7432 Info.RTArgs.MappersArray = MappersArray;
7439 ConstantInt::get(Int64Ty, 0));
7441 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
7442 if (
auto *CI = dyn_cast<Constant>(CombinedInfo.
Sizes[
I])) {
7443 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
7444 if (IsNonContiguous &&
7445 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7447 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
7455 RuntimeSizes.
set(
I);
7458 if (RuntimeSizes.
all()) {
7461 SizeArrayType,
nullptr,
".offload_sizes");
7467 auto *SizesArrayGbl =
7472 if (!RuntimeSizes.
any()) {
7473 Info.RTArgs.SizesArray = SizesArrayGbl;
7479 SizeArrayType,
nullptr,
".offload_sizes");
7484 SizesArrayGbl, OffloadSizeAlign,
7489 Info.RTArgs.SizesArray = Buffer;
7497 for (
auto mapFlag : CombinedInfo.
Types)
7499 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7503 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
7508 auto *MapNamesArrayGbl =
7510 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
7512 Info.RTArgs.MapNamesArray =
7518 if (
Info.separateBeginEndCalls()) {
7519 bool EndMapTypesDiffer =
false;
7521 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7522 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
7523 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7524 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
7525 EndMapTypesDiffer =
true;
7528 if (EndMapTypesDiffer) {
7530 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
7535 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
7543 if (
Info.requiresDevicePointerInfo()) {
7550 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
7552 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
7554 DeviceAddrCB(
I, BP);
7566 if (RuntimeSizes.
test(
I)) {
7580 if (
Value *CustomMFunc = CustomMapperCB(
I))
7584 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
7590 Info.NumberOfPtrs == 0)
7635 if (
auto *CI = dyn_cast<ConstantInt>(
Cond)) {
7636 auto CondConstant = CI->getSExtValue();
7666bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
7670 "Unexpected Atomic Ordering.");
7734 assert(
X.Var->getType()->isPointerTy() &&
7735 "OMP Atomic expects a pointer to target memory");
7736 Type *XElemTy =
X.ElemTy;
7739 "OMP atomic read expected a scalar type");
7741 Value *XRead =
nullptr;
7747 XRead = cast<Value>(XLD);
7761 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
7773 assert(
X.Var->getType()->isPointerTy() &&
7774 "OMP Atomic expects a pointer to target memory");
7775 Type *XElemTy =
X.ElemTy;
7778 "OMP atomic write expected a scalar type");
7793 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
7806 Type *XTy =
X.Var->getType();
7808 "OMP Atomic expects a pointer to target memory");
7809 Type *XElemTy =
X.ElemTy;
7812 "OMP atomic update expected a scalar type");
7815 "OpenMP atomic does not support LT or GT operations");
7818 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
7819 X.IsVolatile, IsXBinopExpr);
7820 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
7825Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
7857std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
7860 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr) {
7863 bool emitRMWOp =
false;
7871 emitRMWOp = XElemTy;
7874 emitRMWOp = (IsXBinopExpr && XElemTy);
7881 std::pair<Value *, Value *> Res;
7888 Res.second = Res.first;
7890 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
7908 X->getName() +
".atomic.cont");
7912 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
7915 PHI->addIncoming(OldVal, CurBB);
7921 X->getName() +
".atomic.fltCast");
7924 X->getName() +
".atomic.ptrCast");
7935 Result->setVolatile(VolatileX);
7941 Res.first = OldExprVal;
7961 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr) {
7966 Type *XTy =
X.Var->getType();
7968 "OMP Atomic expects a pointer to target memory");
7969 Type *XElemTy =
X.ElemTy;
7972 "OMP atomic capture expected a scalar type");
7974 "OpenMP atomic does not support LT or GT operations");
7980 std::pair<Value *, Value *> Result =
7981 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
7982 X.IsVolatile, IsXBinopExpr);
7984 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
7987 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
7999 IsPostfixUpdate, IsFailOnly, Failure);
8011 assert(
X.Var->getType()->isPointerTy() &&
8012 "OMP atomic expects a pointer to target memory");
8015 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
8016 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
8021 if (
Op == OMPAtomicCompareOp::EQ) {
8040 "OldValue and V must be of same type");
8041 if (IsPostfixUpdate) {
8059 CurBBTI,
X.Var->getName() +
".atomic.exit");
8079 Value *CapturedValue =
8087 assert(R.Var->getType()->isPointerTy() &&
8088 "r.var must be of pointer type");
8089 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
8092 Value *ResultCast = R.IsSigned
8098 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
8099 "Op should be either max or min at this point");
8100 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
8140 Value *CapturedValue =
nullptr;
8141 if (IsPostfixUpdate) {
8142 CapturedValue = OldValue;
8174 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
8221 bool SubClausesPresent =
8222 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
8225 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
8226 "if lowerbound is non-null, then upperbound must also be non-null "
8227 "for bounds on num_teams");
8229 if (NumTeamsUpper ==
nullptr)
8232 if (NumTeamsLower ==
nullptr)
8233 NumTeamsLower = NumTeamsUpper;
8237 "argument to if clause must be an integer value");
8242 ConstantInt::get(IfExpr->
getType(), 0));
8251 if (ThreadLimit ==
nullptr)
8257 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
8262 BodyGenCB(AllocaIP, CodeGenIP);
8273 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
8275 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
8277 auto HostPostOutlineCB = [
this, Ident,
8278 ToBeDeleted](
Function &OutlinedFn)
mutable {
8283 "there must be a single user for the outlined function");
8288 "Outlined function must have two or three arguments only");
8290 bool HasShared = OutlinedFn.
arg_size() == 3;
8298 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
8299 "outlined function.");
8306 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
8326 std::string VarName) {
8335 return MapNamesArrayGlobal;
8340void OpenMPIRBuilder::initializeTypes(
Module &M) {
8343#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
8344#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
8345 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
8346 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
8347#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
8348 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
8349 VarName##Ptr = PointerType::getUnqual(VarName);
8350#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
8351 T = StructType::getTypeByName(Ctx, StructName); \
8353 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
8355 VarName##Ptr = PointerType::getUnqual(T);
8356#include "llvm/Frontend/OpenMP/OMPKinds.def"
8367 while (!Worklist.
empty()) {
8371 if (BlockSet.
insert(SuccBB).second)
8383 "omp_offloading_entries");
8407 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
8426 auto &&GetMDInt = [
this](
unsigned V) {
8434 auto &&TargetRegionMetadataEmitter =
8435 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
8450 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
8451 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
8452 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
8453 GetMDInt(E.getOrder())};
8456 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
8465 auto &&DeviceGlobalVarMetadataEmitter =
8466 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
8476 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
8477 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
8481 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
8488 DeviceGlobalVarMetadataEmitter);
8490 for (
const auto &E : OrderedEntries) {
8491 assert(E.first &&
"All ordered entries must exist!");
8492 if (
const auto *CE =
8493 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
8495 if (!CE->getID() || !CE->getAddress()) {
8507 }
else if (
const auto *CE =
dyn_cast<
8518 if (!CE->getAddress()) {
8523 if (CE->getVarSize() == 0)
8529 "Declaret target link address is set.");
8532 if (!CE->getAddress()) {
8544 if (
auto *
GV = dyn_cast<GlobalValue>(CE->getAddress()))
8545 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
8553 Flags, CE->getLinkage(), CE->getVarName());
8556 Flags, CE->getLinkage());
8577 unsigned FileID,
unsigned Line,
unsigned Count) {
8580 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
8587 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
8590 EntryInfo.
Line, NewCount);
8597 auto FileIDInfo = CallBack();
8600 "getTargetEntryUniqueInfo, error message: " +
8606 std::get<1>(FileIDInfo));
8612 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
8614 !(Remain & 1); Remain = Remain >> 1)
8632 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
8634 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
8641 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8642 Flags |= MemberOfFlag;
8648 bool IsDeclaration,
bool IsExternallyVisible,
8650 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
8651 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
8652 std::function<
Constant *()> GlobalInitializer,
8668 if (!IsExternallyVisible)
8670 OS <<
"_decl_tgt_ref_ptr";
8679 auto *
GV = cast<GlobalVariable>(
Ptr);
8683 if (GlobalInitializer)
8684 GV->setInitializer(GlobalInitializer());
8690 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
8691 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
8692 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(
Ptr));
8695 return cast<Constant>(
Ptr);
8704 bool IsDeclaration,
bool IsExternallyVisible,
8706 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
8707 std::vector<Triple> TargetTriple,
8708 std::function<
Constant *()> GlobalInitializer,
8725 VarName = MangledName;
8733 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
8749 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
8750 GvAddrRef->setConstant(
true);
8752 GvAddrRef->setInitializer(
Addr);
8753 GeneratedRefs.push_back(GvAddrRef);
8763 VarName = (
Addr) ?
Addr->getName() :
"";
8767 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
8768 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
8769 LlvmPtrTy, GlobalInitializer, VariableLinkage);
8770 VarName = (
Addr) ?
Addr->getName() :
"";
8791 auto &&GetMDInt = [MN](
unsigned Idx) {
8792 auto *V = cast<ConstantAsMetadata>(MN->getOperand(
Idx));
8793 return cast<ConstantInt>(V->getValue())->getZExtValue();
8796 auto &&GetMDString = [MN](
unsigned Idx) {
8797 auto *V = cast<MDString>(MN->getOperand(
Idx));
8798 return V->getString();
8801 switch (GetMDInt(0)) {
8829 if (HostFilePath.
empty())
8833 if (std::error_code Err = Buf.getError()) {
8835 "OpenMPIRBuilder: " +
8843 if (std::error_code Err =
M.getError()) {
8845 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
8857 return OffloadEntriesTargetRegion.empty() &&
8858 OffloadEntriesDeviceGlobalVar.empty();
8861unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
8863 auto It = OffloadEntriesTargetRegionCount.find(
8864 getTargetRegionEntryCountKey(EntryInfo));
8865 if (It == OffloadEntriesTargetRegionCount.end())
8870void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
8872 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
8873 EntryInfo.
Count + 1;
8879 OffloadEntriesTargetRegion[EntryInfo] =
8881 OMPTargetRegionEntryTargetRegion);
8882 ++OffloadingEntriesNum;
8888 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
8891 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
8897 if (!hasTargetRegionEntryInfo(EntryInfo)) {
8900 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
8901 Entry.setAddress(
Addr);
8903 Entry.setFlags(
Flags);
8906 hasTargetRegionEntryInfo(EntryInfo,
true))
8908 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
8909 "Target region entry already registered!");
8911 OffloadEntriesTargetRegion[EntryInfo] = Entry;
8912 ++OffloadingEntriesNum;
8914 incrementTargetRegionEntryInfoCount(EntryInfo);
8921 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
8923 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
8924 if (It == OffloadEntriesTargetRegion.end()) {
8928 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
8936 for (
const auto &It : OffloadEntriesTargetRegion) {
8937 Action(It.first, It.second);
8943 OffloadEntriesDeviceGlobalVar.try_emplace(
Name, Order,
Flags);
8944 ++OffloadingEntriesNum;
8952 if (!hasDeviceGlobalVarEntryInfo(VarName))
8954 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
8955 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
8956 if (Entry.getVarSize() == 0) {
8957 Entry.setVarSize(VarSize);
8958 Entry.setLinkage(Linkage);
8962 Entry.setVarSize(VarSize);
8963 Entry.setLinkage(Linkage);
8964 Entry.setAddress(
Addr);
8966 if (hasDeviceGlobalVarEntryInfo(VarName)) {
8967 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
8968 assert(Entry.isValid() && Entry.getFlags() ==
Flags &&
8969 "Entry not initialized!");
8970 if (Entry.getVarSize() == 0) {
8971 Entry.setVarSize(VarSize);
8972 Entry.setLinkage(Linkage);
8977 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
8981 OffloadEntriesDeviceGlobalVar.try_emplace(
8982 VarName, OffloadingEntriesNum,
Addr, VarSize,
Flags, Linkage,
"");
8983 ++OffloadingEntriesNum;
8990 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
8991 Action(E.getKey(), E.getValue());
8998void CanonicalLoopInfo::collectControlBlocks(
9005 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
9017void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
9021 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
9029void CanonicalLoopInfo::mapIndVar(
9039 for (
Use &U : OldIV->
uses()) {
9040 auto *
User = dyn_cast<Instruction>(U.getUser());
9043 if (
User->getParent() == getCond())
9045 if (
User->getParent() == getLatch())
9051 Value *NewIV = Updater(OldIV);
9054 for (
Use *U : ReplacableUses)
9075 "Preheader must terminate with unconditional branch");
9077 "Preheader must jump to header");
9080 assert(isa<BranchInst>(Header->getTerminator()) &&
9081 "Header must terminate with unconditional branch");
9082 assert(Header->getSingleSuccessor() ==
Cond &&
9083 "Header must jump to exiting block");
9086 assert(
Cond->getSinglePredecessor() == Header &&
9087 "Exiting block only reachable from header");
9089 assert(isa<BranchInst>(
Cond->getTerminator()) &&
9090 "Exiting block must terminate with conditional branch");
9092 "Exiting block must have two successors");
9093 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
9094 "Exiting block's first successor jump to the body");
9095 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
9096 "Exiting block's second successor must exit the loop");
9100 "Body only reachable from exiting block");
9105 "Latch must terminate with unconditional branch");
9113 assert(isa<BranchInst>(Exit->getTerminator()) &&
9114 "Exit block must terminate with unconditional branch");
9116 "Exit block must jump to after block");
9120 "After block only reachable from exit block");
9124 assert(IndVar &&
"Canonical induction variable not found?");
9126 "Induction variable must be an integer");
9128 "Induction variable must be a PHI in the loop header");
9129 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
9131 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
9132 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
9134 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
9136 assert(cast<BinaryOperator>(NextIndVar)->
getOpcode() == BinaryOperator::Add);
9137 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
9138 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
9141 Value *TripCount = getTripCount();
9142 assert(TripCount &&
"Loop trip count not found?");
9144 "Trip count and induction variable must have the same type");
9146 auto *CmpI = cast<CmpInst>(&
Cond->front());
9148 "Exit condition must be a signed less-than comparison");
9150 "Exit condition must compare the induction variable");
9152 "Exit condition must compare with the trip count");
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
BlockVerifier::State From
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static Function * createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI)
Create an entry point for a target task with the following.
static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static MDNode * getNVPTXMDNode(Function &Kernel, StringRef Name)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, Type *ParallelTaskPtr, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, Constant *OutlinedFnID, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, SmallVector< llvm::OpenMPIRBuilder::DependData > Dependencies={})
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
A container for analyses that lazily runs them and caches their results.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Instruction & back() const
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static ConstantInt * getFalse(LLVMContext &Context)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
unsigned getDefaultGlobalsAddressSpace() const
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
unsigned getAllocaAddrSpace() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Analysis pass which computes a DominatorTree.
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ ExternalLinkage
Externally visible function.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
Value * CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS, const Twine &Name="")
Return the i64 difference between two pointer values, dividing out the size of the pointed-to objects...
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
UnreachableInst * CreateUnreachable()
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
IntegerType * getIndexTy(const DataLayout &DL, unsigned AddrSpace)
Fetch the type of an integer that should be used to index GEP operations within AddressSpace.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name="")
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
InsertPoint saveIP() const
Returns the current insert point.
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveBeforePreserving(Instruction *MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static bool classof(const Value *V)
Methods for support type inquiry through isa, cast, and dyn_cast:
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
StringRef getName() const
Get a short "name" for the module.
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
iterator_range< global_iterator > globals()
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
void setGridValue(omp::GV G)
StringRef separator() const
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
omp::GV getGridValue() const
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, SmallVector< DependData > Dependencies={})
Generator for '#omp target'.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
InsertPointTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsTeamsReduction=false, bool HasDistribute=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false)
Generator for '#omp reduction'.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
InsertPointTy emitTargetTask(Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP, SmallVector< OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
Generate a target-task for the target construct.
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
unsigned getNumUses() const
This method computes the number of uses of this Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
iterator insertAfter(iterator where, pointer New)
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
void emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data, StringRef SectionName)
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_GENERIC
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
CodeGenOptLevel
Code generation optimization level.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
DWARFExpression::Operation Op
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
Data structure to contain the information needed to uniquely identify a target entry.
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
unsigned GV_Warp_Size
The default value of maximum number of threads in a worker warp.