68#define DEBUG_TYPE "openmp-ir-builder"
75 cl::desc(
"Use optimistic attributes describing "
76 "'as-if' properties of runtime calls."),
80 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
81 cl::desc(
"Factor for the unroll threshold to account for code "
82 "simplifications still taking place"),
93 if (!IP1.isSet() || !IP2.isSet())
95 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
100 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
101 case OMPScheduleType::UnorderedStaticChunked:
102 case OMPScheduleType::UnorderedStatic:
103 case OMPScheduleType::UnorderedDynamicChunked:
104 case OMPScheduleType::UnorderedGuidedChunked:
105 case OMPScheduleType::UnorderedRuntime:
106 case OMPScheduleType::UnorderedAuto:
107 case OMPScheduleType::UnorderedTrapezoidal:
108 case OMPScheduleType::UnorderedGreedy:
109 case OMPScheduleType::UnorderedBalanced:
110 case OMPScheduleType::UnorderedGuidedIterativeChunked:
111 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
112 case OMPScheduleType::UnorderedSteal:
113 case OMPScheduleType::UnorderedStaticBalancedChunked:
114 case OMPScheduleType::UnorderedGuidedSimd:
115 case OMPScheduleType::UnorderedRuntimeSimd:
116 case OMPScheduleType::OrderedStaticChunked:
117 case OMPScheduleType::OrderedStatic:
118 case OMPScheduleType::OrderedDynamicChunked:
119 case OMPScheduleType::OrderedGuidedChunked:
120 case OMPScheduleType::OrderedRuntime:
121 case OMPScheduleType::OrderedAuto:
122 case OMPScheduleType::OrderdTrapezoidal:
123 case OMPScheduleType::NomergeUnorderedStaticChunked:
124 case OMPScheduleType::NomergeUnorderedStatic:
125 case OMPScheduleType::NomergeUnorderedDynamicChunked:
126 case OMPScheduleType::NomergeUnorderedGuidedChunked:
127 case OMPScheduleType::NomergeUnorderedRuntime:
128 case OMPScheduleType::NomergeUnorderedAuto:
129 case OMPScheduleType::NomergeUnorderedTrapezoidal:
130 case OMPScheduleType::NomergeUnorderedGreedy:
131 case OMPScheduleType::NomergeUnorderedBalanced:
132 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
133 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
134 case OMPScheduleType::NomergeUnorderedSteal:
135 case OMPScheduleType::NomergeOrderedStaticChunked:
136 case OMPScheduleType::NomergeOrderedStatic:
137 case OMPScheduleType::NomergeOrderedDynamicChunked:
138 case OMPScheduleType::NomergeOrderedGuidedChunked:
139 case OMPScheduleType::NomergeOrderedRuntime:
140 case OMPScheduleType::NomergeOrderedAuto:
141 case OMPScheduleType::NomergeOrderedTrapezoidal:
142 case OMPScheduleType::OrderedDistributeChunked:
143 case OMPScheduleType::OrderedDistribute:
151 SchedType & OMPScheduleType::MonotonicityMask;
152 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
164 Builder.restoreIP(IP);
174 Kernel->getFnAttribute(
"target-features").getValueAsString();
175 if (Features.
count(
"+wavefrontsize64"))
190 bool HasSimdModifier,
bool HasDistScheduleChunks) {
192 switch (ClauseKind) {
193 case OMP_SCHEDULE_Default:
194 case OMP_SCHEDULE_Static:
195 return HasChunks ? OMPScheduleType::BaseStaticChunked
196 : OMPScheduleType::BaseStatic;
197 case OMP_SCHEDULE_Dynamic:
198 return OMPScheduleType::BaseDynamicChunked;
199 case OMP_SCHEDULE_Guided:
200 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
201 : OMPScheduleType::BaseGuidedChunked;
202 case OMP_SCHEDULE_Auto:
204 case OMP_SCHEDULE_Runtime:
205 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
206 : OMPScheduleType::BaseRuntime;
207 case OMP_SCHEDULE_Distribute:
208 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
209 : OMPScheduleType::BaseDistribute;
217 bool HasOrderedClause) {
218 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
219 OMPScheduleType::None &&
220 "Must not have ordering nor monotonicity flags already set");
223 ? OMPScheduleType::ModifierOrdered
224 : OMPScheduleType::ModifierUnordered;
225 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
228 if (OrderingScheduleType ==
229 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
230 return OMPScheduleType::OrderedGuidedChunked;
231 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
232 OMPScheduleType::ModifierOrdered))
233 return OMPScheduleType::OrderedRuntime;
235 return OrderingScheduleType;
241 bool HasSimdModifier,
bool HasMonotonic,
242 bool HasNonmonotonic,
bool HasOrderedClause) {
243 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
244 OMPScheduleType::None &&
245 "Must not have monotonicity flags already set");
246 assert((!HasMonotonic || !HasNonmonotonic) &&
247 "Monotonic and Nonmonotonic are contradicting each other");
250 return ScheduleType | OMPScheduleType::ModifierMonotonic;
251 }
else if (HasNonmonotonic) {
252 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
262 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
263 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
269 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
277 bool HasSimdModifier,
bool HasMonotonicModifier,
278 bool HasNonmonotonicModifier,
bool HasOrderedClause,
279 bool HasDistScheduleChunks) {
281 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
285 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
286 HasNonmonotonicModifier, HasOrderedClause);
301 assert(!Br->isConditional() &&
302 "BB's terminator must be an unconditional branch (or degenerate)");
305 Br->setSuccessor(0,
Target);
310 NewBr->setDebugLoc(
DL);
315 assert(New->getFirstInsertionPt() == New->begin() &&
316 "Target BB must not have PHI nodes");
332 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
336 NewBr->setDebugLoc(
DL);
348 Builder.SetInsertPoint(Old);
352 Builder.SetCurrentDebugLocation(
DebugLoc);
362 New->replaceSuccessorsPhiUsesWith(Old, New);
371 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
373 Builder.SetInsertPoint(Builder.GetInsertBlock());
376 Builder.SetCurrentDebugLocation(
DebugLoc);
385 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
387 Builder.SetInsertPoint(Builder.GetInsertBlock());
390 Builder.SetCurrentDebugLocation(
DebugLoc);
407 const Twine &Name =
"",
bool AsPtr =
true,
408 bool Is64Bit =
false) {
409 Builder.restoreIP(OuterAllocaIP);
413 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
417 FakeVal = FakeValAddr;
419 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
424 Builder.restoreIP(InnerAllocaIP);
427 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
430 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
443enum OpenMPOffloadingRequiresDirFlags {
445 OMP_REQ_UNDEFINED = 0x000,
447 OMP_REQ_NONE = 0x001,
449 OMP_REQ_REVERSE_OFFLOAD = 0x002,
451 OMP_REQ_UNIFIED_ADDRESS = 0x004,
453 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
455 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
462 : RequiresFlags(OMP_REQ_UNDEFINED) {}
466 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
467 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
470 RequiresFlags(OMP_REQ_UNDEFINED) {
471 if (HasRequiresReverseOffload)
472 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
473 if (HasRequiresUnifiedAddress)
474 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
475 if (HasRequiresUnifiedSharedMemory)
476 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
477 if (HasRequiresDynamicAllocators)
478 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
482 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
486 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
490 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
494 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
499 :
static_cast<int64_t
>(OMP_REQ_NONE);
504 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
506 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
511 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
513 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
518 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
520 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
525 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
527 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
540 constexpr size_t MaxDim = 3;
545 Value *DynCGroupMemFallbackFlag =
547 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
548 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
554 Value *NumThreads3D =
585 auto FnAttrs = Attrs.getFnAttrs();
586 auto RetAttrs = Attrs.getRetAttrs();
588 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
593 bool Param =
true) ->
void {
594 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
595 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
596 if (HasSignExt || HasZeroExt) {
597 assert(AS.getNumAttributes() == 1 &&
598 "Currently not handling extension attr combined with others.");
600 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
603 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
610#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
611#include "llvm/Frontend/OpenMP/OMPKinds.def"
615#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
617 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
618 addAttrSet(RetAttrs, RetAttrSet, false); \
619 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
620 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
621 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
623#include "llvm/Frontend/OpenMP/OMPKinds.def"
637#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
639 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
641 Fn = M.getFunction(Str); \
643#include "llvm/Frontend/OpenMP/OMPKinds.def"
649#define OMP_RTL(Enum, Str, ...) \
651 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
653#include "llvm/Frontend/OpenMP/OMPKinds.def"
657 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
667 LLVMContext::MD_callback,
669 2, {-1, -1},
true)}));
682 assert(Fn &&
"Failed to create OpenMP runtime function");
693 Builder.SetInsertPoint(FiniBB);
705 FiniBB = OtherFiniBB;
707 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
715 auto EndIt = FiniBB->end();
716 if (FiniBB->size() >= 1)
717 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
722 FiniBB->replaceAllUsesWith(OtherFiniBB);
723 FiniBB->eraseFromParent();
724 FiniBB = OtherFiniBB;
731 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
754 for (
auto Inst =
Block->getReverseIterator()->begin();
755 Inst !=
Block->getReverseIterator()->end();) {
784 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
802 ParallelRegionBlockSet.
clear();
804 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
814 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
823 ".omp_par", ArgsInZeroAddressSpace);
827 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
828 assert(Extractor.isEligible() &&
829 "Expected OpenMP outlining to be possible!");
831 for (
auto *V : OI.ExcludeArgsFromAggregate)
832 Extractor.excludeArgFromAggregate(V);
835 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
839 if (TargetCpuAttr.isStringAttribute())
842 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
843 if (TargetFeaturesAttr.isStringAttribute())
844 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
847 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
849 "OpenMP outlined functions should not return a value!");
854 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
861 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
868 "Expected instructions to add in the outlined region entry");
870 End = ArtificialEntry.
rend();
875 if (
I.isTerminator()) {
877 if (OI.EntryBB->getTerminator())
878 OI.EntryBB->getTerminator()->adoptDbgRecords(
879 &ArtificialEntry,
I.getIterator(),
false);
883 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
886 OI.EntryBB->moveBefore(&ArtificialEntry);
893 if (OI.PostOutlineCB)
894 OI.PostOutlineCB(*OutlinedFn);
896 if (OI.FixUpNonEntryAllocas) {
932 errs() <<
"Error of kind: " << Kind
933 <<
" when emitting offload entries and metadata during "
934 "OMPIRBuilder finalization \n";
940 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
941 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
942 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
943 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
960 ConstantInt::get(I32Ty,
Value), Name);
973 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
977 if (UsedArray.
empty())
984 GV->setSection(
"llvm.metadata");
990 auto *Int8Ty =
Builder.getInt8Ty();
993 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1001 unsigned Reserve2Flags) {
1003 LocFlags |= OMP_IDENT_FLAG_KMPC;
1010 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1011 ConstantInt::get(Int32, Reserve2Flags),
1012 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1014 size_t SrcLocStrArgIdx = 4;
1015 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1019 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1026 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1027 if (
GV.getInitializer() == Initializer)
1032 M, OpenMPIRBuilder::Ident,
1035 M.getDataLayout().getDefaultGlobalsAddressSpace());
1047 SrcLocStrSize = LocStr.
size();
1056 if (
GV.isConstant() &&
GV.hasInitializer() &&
1057 GV.getInitializer() == Initializer)
1060 SrcLocStr =
Builder.CreateGlobalString(
1061 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1069 unsigned Line,
unsigned Column,
1075 Buffer.
append(FunctionName);
1077 Buffer.
append(std::to_string(Line));
1079 Buffer.
append(std::to_string(Column));
1087 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1098 if (
DIFile *DIF = DIL->getFile())
1099 if (std::optional<StringRef> Source = DIF->getSource())
1105 DIL->getColumn(), SrcLocStrSize);
1111 Loc.IP.getBlock()->getParent());
1117 "omp_global_thread_num");
1122 bool ForceSimpleCall,
bool CheckCancelFlag) {
1132 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1135 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1138 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1141 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1144 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1157 bool UseCancelBarrier =
1162 ? OMPRTL___kmpc_cancel_barrier
1163 : OMPRTL___kmpc_barrier),
1166 if (UseCancelBarrier && CheckCancelFlag)
1176 omp::Directive CanceledDirective) {
1181 auto *UI =
Builder.CreateUnreachable();
1189 Builder.SetInsertPoint(ElseTI);
1190 auto ElseIP =
Builder.saveIP();
1198 Builder.SetInsertPoint(ThenTI);
1200 Value *CancelKind =
nullptr;
1201 switch (CanceledDirective) {
1202#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1203 case DirectiveEnum: \
1204 CancelKind = Builder.getInt32(Value); \
1206#include "llvm/Frontend/OpenMP/OMPKinds.def"
1223 Builder.SetInsertPoint(UI->getParent());
1224 UI->eraseFromParent();
1231 omp::Directive CanceledDirective) {
1236 auto *UI =
Builder.CreateUnreachable();
1239 Value *CancelKind =
nullptr;
1240 switch (CanceledDirective) {
1241#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1242 case DirectiveEnum: \
1243 CancelKind = Builder.getInt32(Value); \
1245#include "llvm/Frontend/OpenMP/OMPKinds.def"
1262 Builder.SetInsertPoint(UI->getParent());
1263 UI->eraseFromParent();
1276 auto *KernelArgsPtr =
1277 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1282 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1285 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1289 NumThreads, HostPtr, KernelArgsPtr};
1316 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1320 Value *Return =
nullptr;
1340 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1341 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1348 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1350 auto CurFn =
Builder.GetInsertBlock()->getParent();
1357 emitBlock(OffloadContBlock, CurFn,
true);
1362 Value *CancelFlag, omp::Directive CanceledDirective) {
1364 "Unexpected cancellation!");
1384 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1393 Builder.SetInsertPoint(CancellationBlock);
1394 Builder.CreateBr(*FiniBBOrErr);
1397 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1416 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1419 "Expected at least tid and bounded tid as arguments");
1420 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1423 assert(CI &&
"Expected call instruction to outlined function");
1424 CI->
getParent()->setName(
"omp_parallel");
1426 Builder.SetInsertPoint(CI);
1427 Type *PtrTy = OMPIRBuilder->VoidPtr;
1431 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1435 Value *Args = ArgsAlloca;
1439 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1440 Builder.restoreIP(CurrentIP);
1443 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1445 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1447 Builder.CreateStore(V, StoreAddress);
1451 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1452 : Builder.getInt32(1);
1455 Value *Parallel60CallArgs[] = {
1459 NumThreads ? NumThreads : Builder.getInt32(-1),
1460 Builder.getInt32(-1),
1464 Builder.getInt64(NumCapturedVars),
1465 Builder.getInt32(0)};
1473 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1476 Builder.SetInsertPoint(PrivTID);
1478 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1485 I->eraseFromParent();
1508 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1516 F->addMetadata(LLVMContext::MD_callback,
1525 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1528 "Expected at least tid and bounded tid as arguments");
1529 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1532 CI->
getParent()->setName(
"omp_parallel");
1533 Builder.SetInsertPoint(CI);
1536 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1540 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1542 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1549 auto PtrTy = OMPIRBuilder->VoidPtr;
1550 if (IfCondition && NumCapturedVars == 0) {
1558 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1561 Builder.SetInsertPoint(PrivTID);
1563 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1570 I->eraseFromParent();
1578 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1593 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1597 if (NumThreads && !
Config.isTargetDevice()) {
1600 Builder.CreateIntCast(NumThreads, Int32,
false)};
1605 if (ProcBind != OMP_PROC_BIND_default) {
1609 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1631 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1634 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1637 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1641 PointerType ::get(
M.getContext(), 0),
1642 "zero.addr.ascast");
1666 if (IP.getBlock()->end() == IP.getPoint()) {
1672 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1673 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1674 "Unexpected insertion point for finalization call!");
1686 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1692 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1710 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1713 assert(BodyGenCB &&
"Expected body generation callback!");
1715 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1718 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1721 if (
Config.isTargetDevice()) {
1724 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1726 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1727 ThreadID, ToBeDeletedVec);
1733 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1735 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1757 ".omp_par", ArgsInZeroAddressSpace);
1762 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1764 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1769 return GV->getValueType() == OpenMPIRBuilder::Ident;
1774 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1780 if (&V == TIDAddr || &V == ZeroAddr) {
1786 for (
Use &U : V.uses())
1788 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1798 if (!V.getType()->isPointerTy()) {
1802 Builder.restoreIP(OuterAllocaIP);
1804 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1808 Builder.SetInsertPoint(InsertBB,
1813 Builder.restoreIP(InnerAllocaIP);
1814 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1817 Value *ReplacementValue =
nullptr;
1820 ReplacementValue = PrivTID;
1823 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1831 assert(ReplacementValue &&
1832 "Expected copy/create callback to set replacement value!");
1833 if (ReplacementValue == &V)
1838 UPtr->set(ReplacementValue);
1863 for (
Value *Output : Outputs)
1867 "OpenMP outlining should not produce live-out values!");
1869 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1871 for (
auto *BB : Blocks)
1872 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1880 assert(FiniInfo.DK == OMPD_parallel &&
1881 "Unexpected finalization stack state!");
1892 Builder.CreateBr(*FiniBBOrErr);
1896 Term->eraseFromParent();
1902 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1903 UI->eraseFromParent();
1970 if (Dependencies.
empty())
1990 Type *DependInfo = OMPBuilder.DependInfo;
1993 Value *DepArray =
nullptr;
1995 Builder.SetInsertPoint(
1999 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2001 Builder.restoreIP(OldIP);
2003 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2005 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2007 Value *Addr = Builder.CreateStructGEP(
2009 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2010 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2011 Builder.CreateStore(DepValPtr, Addr);
2014 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2015 Builder.CreateStore(
2016 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2019 Value *Flags = Builder.CreateStructGEP(
2021 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2022 Builder.CreateStore(
2023 ConstantInt::get(Builder.getInt8Ty(),
2024 static_cast<unsigned int>(Dep.DepKind)),
2031Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2033 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2048 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2052 "omp_taskloop_dup",
M);
2055 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2056 DestTaskArg->
setName(
"dest_task");
2057 SrcTaskArg->
setName(
"src_task");
2058 LastprivateFlagArg->
setName(
"lastprivate_flag");
2060 IRBuilderBase::InsertPointGuard Guard(
Builder);
2064 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2065 Type *TaskWithPrivatesTy =
2068 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2070 PrivatesTy, TaskPrivates,
2075 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2076 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2078 DestTaskContextPtr->
setName(
"destPtr");
2079 SrcTaskContextPtr->
setName(
"srcPtr");
2084 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2085 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2086 if (!AfterIPOrError)
2088 Builder.restoreIP(*AfterIPOrError);
2098 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2100 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2106 uint32_t SrcLocStrSize;
2122 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2125 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2130 llvm::CanonicalLoopInfo *CLI = result.
get();
2132 OI.
EntryBB = TaskloopAllocaBB;
2133 OI.OuterAllocaBB = AllocaIP.getBlock();
2134 OI.ExitBB = TaskloopExitBB;
2140 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2142 TaskloopAllocaIP,
"lb",
false,
true);
2144 TaskloopAllocaIP,
"ub",
false,
true);
2146 TaskloopAllocaIP,
"step",
false,
true);
2149 OI.Inputs.insert(FakeLB);
2150 OI.Inputs.insert(FakeUB);
2151 OI.Inputs.insert(FakeStep);
2152 if (TaskContextStructPtrVal)
2153 OI.Inputs.insert(TaskContextStructPtrVal);
2155 (TaskContextStructPtrVal && DupCB) ||
2156 (!TaskContextStructPtrVal && !DupCB) &&
2157 "Task context struct ptr and duplication callback must be both set "
2163 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2167 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2168 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2171 if (!TaskDupFnOrErr) {
2174 Value *TaskDupFn = *TaskDupFnOrErr;
2176 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2177 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2178 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2179 FakeStep, Final, Mergeable,
2180 Priority](
Function &OutlinedFn)
mutable {
2182 assert(OutlinedFn.hasOneUse() &&
2183 "there must be a single user for the outlined function");
2189 IRBuilderBase::InsertPoint CurrentIp =
Builder.saveIP();
2191 Value *CastedLBVal =
2192 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2193 Value *CastedUBVal =
2194 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2195 Value *CastedStepVal =
2196 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2199 Builder.SetInsertPoint(StaleCI);
2212 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2233 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2235 AllocaInst *ArgStructAlloca =
2237 assert(ArgStructAlloca &&
2238 "Unable to find the alloca instruction corresponding to arguments "
2239 "for extracted function");
2240 StructType *ArgStructType =
2242 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2243 "arguments for extracted function");
2244 Value *SharedsSize =
2245 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ArgStructType));
2250 CallInst *TaskData =
Builder.CreateCall(
2251 TaskAllocFn, {Ident, ThreadID,
Flags,
2252 TaskSize, SharedsSize,
2257 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2258 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2263 ArgStructType, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2266 ArgStructType, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2269 ArgStructType, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2275 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2281 Value *GrainSizeVal =
2282 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2284 Value *TaskDup = TaskDupFn;
2286 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2287 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2292 Builder.CreateCall(TaskloopFn, Args);
2299 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2304 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2306 LoadInst *SharedsOutlined =
2307 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2308 OutlinedFn.getArg(1)->replaceUsesWithIf(
2310 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2313 Type *IVTy =
IV->getType();
2319 Value *TaskLB =
nullptr;
2320 Value *TaskUB =
nullptr;
2321 Value *LoadTaskLB =
nullptr;
2322 Value *LoadTaskUB =
nullptr;
2323 for (Instruction &
I : *TaskloopAllocaBB) {
2324 if (
I.getOpcode() == Instruction::GetElementPtr) {
2327 switch (CI->getZExtValue()) {
2336 }
else if (
I.getOpcode() == Instruction::Load) {
2338 if (
Load.getPointerOperand() == TaskLB) {
2339 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2341 }
else if (
Load.getPointerOperand() == TaskUB) {
2342 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2348 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2350 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2351 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2352 Value *TripCountMinusOne =
2353 Builder.CreateSDiv(
Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
2354 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2355 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2356 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2358 CLI->setTripCount(CastedTripCount);
2360 Builder.SetInsertPoint(CLI->getBody(),
2361 CLI->getBody()->getFirstInsertionPt());
2377 assert(CLI->getIndVar()->getNumUses() == 3 &&
2378 "Canonical loop should have exactly three uses of the ind var");
2379 for (User *IVUser : CLI->getIndVar()->users()) {
2381 if (
Mul->getOpcode() == Instruction::Mul) {
2382 for (User *MulUser :
Mul->users()) {
2384 if (
Add->getOpcode() == Instruction::Add) {
2385 Add->setOperand(1, CastedTaskLB);
2393 FakeLB->replaceAllUsesWith(CastedLBVal);
2394 FakeUB->replaceAllUsesWith(CastedUBVal);
2395 FakeStep->replaceAllUsesWith(CastedStepVal);
2397 I->eraseFromParent();
2402 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2442 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2453 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2455 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2456 Mergeable, Priority, EventHandle, TaskAllocaBB,
2457 ToBeDeleted](
Function &OutlinedFn)
mutable {
2459 assert(OutlinedFn.hasOneUse() &&
2460 "there must be a single user for the outlined function");
2465 bool HasShareds = StaleCI->
arg_size() > 1;
2466 Builder.SetInsertPoint(StaleCI);
2491 Flags =
Builder.CreateOr(FinalFlag, Flags);
2504 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2513 assert(ArgStructAlloca &&
2514 "Unable to find the alloca instruction corresponding to arguments "
2515 "for extracted function");
2518 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2519 "arguments for extracted function");
2521 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ArgStructType));
2527 TaskAllocFn, {Ident, ThreadID, Flags,
2528 TaskSize, SharedsSize,
2536 OMPRTL___kmpc_task_allow_completion_event);
2540 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2542 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2543 Builder.CreateStore(EventVal, EventHandleAddr);
2549 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2550 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2568 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2571 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2573 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2576 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2577 PriorityData, {Zero, Zero});
2578 Builder.CreateStore(Priority, CmplrData);
2605 Builder.GetInsertPoint()->getParent()->getTerminator();
2606 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2607 Builder.SetInsertPoint(IfTerminator);
2610 Builder.SetInsertPoint(ElseTI);
2612 if (Dependencies.
size()) {
2617 {Ident, ThreadID,
Builder.getInt32(Dependencies.
size()), DepArray,
2618 ConstantInt::get(
Builder.getInt32Ty(), 0),
2633 Builder.SetInsertPoint(ThenTI);
2636 if (Dependencies.
size()) {
2641 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
2642 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
2653 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2655 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2656 OutlinedFn.getArg(1)->replaceUsesWithIf(
2657 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2661 I->eraseFromParent();
2665 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2691 Builder.SetInsertPoint(TaskgroupExitBB);
2734 unsigned CaseNumber = 0;
2735 for (
auto SectionCB : SectionCBs) {
2737 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2739 Builder.SetInsertPoint(CaseBB);
2753 Value *LB = ConstantInt::get(I32Ty, 0);
2754 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2755 Value *ST = ConstantInt::get(I32Ty, 1);
2757 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2762 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2763 WorksharingLoopType::ForStaticLoop, !IsNowait);
2769 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2773 assert(FiniInfo.DK == OMPD_sections &&
2774 "Unexpected finalization stack state!");
2775 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2789 if (IP.getBlock()->end() != IP.getPoint())
2800 auto *CaseBB =
Loc.IP.getBlock();
2801 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2802 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2808 Directive OMPD = Directive::OMPD_sections;
2811 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2822Value *OpenMPIRBuilder::getGPUThreadID() {
2825 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2829Value *OpenMPIRBuilder::getGPUWarpSize() {
2834Value *OpenMPIRBuilder::getNVPTXWarpID() {
2835 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2836 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2839Value *OpenMPIRBuilder::getNVPTXLaneID() {
2840 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2841 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2842 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2843 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2850 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2851 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2852 assert(FromSize > 0 &&
"From size must be greater than zero");
2853 assert(ToSize > 0 &&
"To size must be greater than zero");
2854 if (FromType == ToType)
2856 if (FromSize == ToSize)
2857 return Builder.CreateBitCast(From, ToType);
2859 return Builder.CreateIntCast(From, ToType,
true);
2865 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2866 CastItem,
Builder.getPtrTy(0));
2867 Builder.CreateStore(From, ValCastItem);
2868 return Builder.CreateLoad(ToType, CastItem);
2875 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2876 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2880 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2882 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2884 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2885 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2886 Value *WarpSizeCast =
2888 Value *ShuffleCall =
2890 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2897 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2909 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2910 Value *ElemPtr = DstAddr;
2911 Value *Ptr = SrcAddr;
2912 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2916 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2919 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2920 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2924 if ((
Size / IntSize) > 1) {
2925 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2926 SrcAddrGEP,
Builder.getPtrTy());
2943 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
2945 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
2948 Value *Res = createRuntimeShuffleFunction(
2951 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
2953 Builder.CreateAlignedStore(Res, ElemPtr,
2954 M.getDataLayout().getPrefTypeAlign(ElemType));
2956 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2957 Value *LocalElemPtr =
2958 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2964 Value *Res = createRuntimeShuffleFunction(
2965 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2968 Res =
Builder.CreateTrunc(Res, ElemType);
2969 Builder.CreateStore(Res, ElemPtr);
2970 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2972 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2978Error OpenMPIRBuilder::emitReductionListCopy(
2983 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2984 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2988 for (
auto En :
enumerate(ReductionInfos)) {
2990 Value *SrcElementAddr =
nullptr;
2991 AllocaInst *DestAlloca =
nullptr;
2992 Value *DestElementAddr =
nullptr;
2993 Value *DestElementPtrAddr =
nullptr;
2995 bool ShuffleInElement =
false;
2998 bool UpdateDestListPtr =
false;
3002 ReductionArrayTy, SrcBase,
3003 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3004 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3008 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3009 ReductionArrayTy, DestBase,
3010 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3011 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3017 Type *DestAllocaType =
3018 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3019 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3020 ".omp.reduction.element");
3022 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3023 DestElementAddr = DestAlloca;
3026 DestElementAddr->
getName() +
".ascast");
3028 ShuffleInElement =
true;
3029 UpdateDestListPtr =
true;
3041 if (ShuffleInElement) {
3042 Type *ShuffleType = RI.ElementType;
3043 Value *ShuffleSrcAddr = SrcElementAddr;
3044 Value *ShuffleDestAddr = DestElementAddr;
3045 AllocaInst *LocalStorage =
nullptr;
3048 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3049 assert(RI.ByRefAllocatedType &&
3050 "Expected by-ref allocated type to be set");
3055 ShuffleType = RI.ByRefElementType;
3058 RI.DataPtrPtrGen(
Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3061 return GenResult.takeError();
3063 ShuffleSrcAddr =
Builder.CreateLoad(
Builder.getPtrTy(), ShuffleSrcAddr);
3069 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3071 ShuffleDestAddr = LocalStorage;
3075 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3076 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3081 RI.DataPtrPtrGen(
Builder.saveIP(),
3082 Builder.CreatePointerBitCastOrAddrSpaceCast(
3083 DestAlloca,
Builder.getPtrTy(),
".ascast"),
3087 return GenResult.takeError();
3090 LocalStorage,
Builder.getPtrTy(),
".ascast"),
3094 switch (RI.EvaluationKind) {
3096 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3098 Builder.CreateStore(Elem, DestElementAddr);
3102 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3103 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3105 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3107 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3109 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3111 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3112 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3113 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3114 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3115 Builder.CreateStore(SrcReal, DestRealPtr);
3116 Builder.CreateStore(SrcImg, DestImgPtr);
3121 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3123 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3124 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3136 if (UpdateDestListPtr) {
3137 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3138 DestElementAddr,
Builder.getPtrTy(),
3139 DestElementAddr->
getName() +
".ascast");
3140 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3147Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3151 LLVMContext &Ctx =
M.getContext();
3153 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3157 "_omp_reduction_inter_warp_copy_func", &
M);
3162 Builder.SetInsertPoint(EntryBB);
3179 StringRef TransferMediumName =
3180 "__openmp_nvptx_data_transfer_temporary_storage";
3181 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3182 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3184 if (!TransferMedium) {
3185 TransferMedium =
new GlobalVariable(
3193 Value *GPUThreadID = getGPUThreadID();
3195 Value *LaneID = getNVPTXLaneID();
3197 Value *WarpID = getNVPTXWarpID();
3201 Builder.GetInsertBlock()->getFirstInsertionPt());
3205 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3206 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3207 AllocaInst *NumWarpsAlloca =
3208 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3209 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3210 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3211 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3212 NumWarpsAlloca,
Builder.getPtrTy(0),
3213 NumWarpsAlloca->
getName() +
".ascast");
3214 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3215 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3224 for (
auto En :
enumerate(ReductionInfos)) {
3230 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3231 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3232 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3233 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3236 unsigned NumIters = RealTySize / TySize;
3239 Value *Cnt =
nullptr;
3240 Value *CntAddr =
nullptr;
3247 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3249 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3250 CntAddr->
getName() +
".ascast");
3262 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3263 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3270 omp::Directive::OMPD_unknown,
3274 return BarrierIP1.takeError();
3280 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3281 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3285 auto *RedListArrayTy =
3288 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3290 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3291 {ConstantInt::get(IndexTy, 0),
3292 ConstantInt::get(IndexTy, En.index())});
3298 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3301 return GenRes.takeError();
3312 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3317 Builder.CreateStore(Elem, MediumPtr,
3329 omp::Directive::OMPD_unknown,
3333 return BarrierIP2.takeError();
3340 Value *NumWarpsVal =
3343 Value *IsActiveThread =
3344 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3345 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3352 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3354 Value *TargetElemPtrPtr =
3355 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3356 {ConstantInt::get(IndexTy, 0),
3357 ConstantInt::get(IndexTy, En.index())});
3358 Value *TargetElemPtrVal =
3360 Value *TargetElemPtr = TargetElemPtrVal;
3364 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3367 return GenRes.takeError();
3369 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3377 Value *SrcMediumValue =
3378 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3379 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3389 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3390 Builder.CreateStore(Cnt, CntAddr,
false);
3392 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3396 RealTySize %= TySize;
3406Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3409 LLVMContext &Ctx =
M.getContext();
3410 FunctionType *FuncTy =
3412 {Builder.getPtrTy(), Builder.getInt16Ty(),
3413 Builder.getInt16Ty(), Builder.getInt16Ty()},
3417 "_omp_reduction_shuffle_and_reduce_func", &
M);
3427 Builder.SetInsertPoint(EntryBB);
3438 Type *ReduceListArgType = ReduceListArg->
getType();
3442 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3443 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3444 LaneIDArg->
getName() +
".addr");
3446 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3447 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3448 AlgoVerArg->
getName() +
".addr");
3455 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3457 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3458 ReduceListAlloca, ReduceListArgType,
3459 ReduceListAlloca->
getName() +
".ascast");
3460 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3461 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3462 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3463 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3464 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3465 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3466 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3467 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 RemoteReductionListAlloca,
Builder.getPtrTy(),
3469 RemoteReductionListAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3472 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3473 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3474 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3476 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3477 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3478 Value *RemoteLaneOffset =
3479 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3480 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3487 Error EmitRedLsCpRes = emitReductionListCopy(
3489 ReduceList, RemoteListAddrCast, IsByRef,
3490 {RemoteLaneOffset,
nullptr,
nullptr});
3493 return EmitRedLsCpRes;
3518 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3523 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3524 Value *RemoteOffsetComp =
3526 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3527 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3528 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3534 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3536 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3537 ReduceList,
Builder.getPtrTy());
3538 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3539 RemoteListAddrCast,
Builder.getPtrTy());
3541 ->addFnAttr(Attribute::NoUnwind);
3552 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3553 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3558 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3562 EmitRedLsCpRes = emitReductionListCopy(
3564 RemoteListAddrCast, ReduceList, IsByRef);
3567 return EmitRedLsCpRes;
3581Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3585 LLVMContext &Ctx =
M.getContext();
3588 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3592 "_omp_reduction_list_to_global_copy_func", &
M);
3599 Builder.SetInsertPoint(EntryBlock);
3609 BufferArg->
getName() +
".addr");
3613 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3614 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3615 BufferArgAlloca,
Builder.getPtrTy(),
3616 BufferArgAlloca->
getName() +
".ascast");
3617 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3618 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3619 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3620 ReduceListArgAlloca,
Builder.getPtrTy(),
3621 ReduceListArgAlloca->
getName() +
".ascast");
3623 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3624 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3625 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3627 Value *LocalReduceList =
3629 Value *BufferArgVal =
3633 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3634 for (
auto En :
enumerate(ReductionInfos)) {
3636 auto *RedListArrayTy =
3640 RedListArrayTy, LocalReduceList,
3641 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3647 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3649 ReductionsBufferTy, BufferVD, 0, En.index());
3651 switch (RI.EvaluationKind) {
3653 Value *TargetElement;
3655 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3656 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3659 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3662 return GenResult.takeError();
3665 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3668 Builder.CreateStore(TargetElement, GlobVal);
3672 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3673 RI.ElementType, ElemPtr, 0, 0,
".realp");
3675 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3677 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3679 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3681 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3682 RI.ElementType, GlobVal, 0, 0,
".realp");
3683 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3684 RI.ElementType, GlobVal, 0, 1,
".imagp");
3685 Builder.CreateStore(SrcReal, DestRealPtr);
3686 Builder.CreateStore(SrcImg, DestImgPtr);
3691 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3693 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3694 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3705Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3709 LLVMContext &Ctx =
M.getContext();
3712 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3716 "_omp_reduction_list_to_global_reduce_func", &
M);
3723 Builder.SetInsertPoint(EntryBlock);
3733 BufferArg->
getName() +
".addr");
3737 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3738 auto *RedListArrayTy =
3743 Value *LocalReduceList =
3744 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3748 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3749 BufferArgAlloca,
Builder.getPtrTy(),
3750 BufferArgAlloca->
getName() +
".ascast");
3751 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3752 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3753 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3754 ReduceListArgAlloca,
Builder.getPtrTy(),
3755 ReduceListArgAlloca->
getName() +
".ascast");
3756 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3757 LocalReduceList,
Builder.getPtrTy(),
3758 LocalReduceList->
getName() +
".ascast");
3760 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3761 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3762 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3767 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3768 for (
auto En :
enumerate(ReductionInfos)) {
3772 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3776 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3777 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3778 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3783 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3784 RedListArrayTy, LocalReduceListAddrCast,
3785 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3787 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3789 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3790 ReductionsBufferTy, BufferVD, 0, En.index());
3792 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3793 Value *ByRefDataPtr;
3796 RI.DataPtrPtrGen(
Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
3799 return GenResult.takeError();
3801 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
3802 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3804 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3812 ->addFnAttr(Attribute::NoUnwind);
3818Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3822 LLVMContext &Ctx =
M.getContext();
3825 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3829 "_omp_reduction_global_to_list_copy_func", &
M);
3836 Builder.SetInsertPoint(EntryBlock);
3846 BufferArg->
getName() +
".addr");
3850 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3851 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3852 BufferArgAlloca,
Builder.getPtrTy(),
3853 BufferArgAlloca->
getName() +
".ascast");
3854 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3855 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3856 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3857 ReduceListArgAlloca,
Builder.getPtrTy(),
3858 ReduceListArgAlloca->
getName() +
".ascast");
3859 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3860 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3861 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3863 Value *LocalReduceList =
3868 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3869 for (
auto En :
enumerate(ReductionInfos)) {
3870 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3871 auto *RedListArrayTy =
3875 RedListArrayTy, LocalReduceList,
3876 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3881 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3882 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3883 ReductionsBufferTy, BufferVD, 0, En.index());
3889 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3895 return GenResult.takeError();
3900 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
3901 Builder.CreateStore(TargetElement, ElemPtr);
3905 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3914 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3916 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3918 Builder.CreateStore(SrcReal, DestRealPtr);
3919 Builder.CreateStore(SrcImg, DestImgPtr);
3926 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
3927 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
3939Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
3943 LLVMContext &Ctx =
M.getContext();
3946 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3950 "_omp_reduction_global_to_list_reduce_func", &
M);
3957 Builder.SetInsertPoint(EntryBlock);
3967 BufferArg->
getName() +
".addr");
3971 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3977 Value *LocalReduceList =
3978 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3982 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3983 BufferArgAlloca,
Builder.getPtrTy(),
3984 BufferArgAlloca->
getName() +
".ascast");
3985 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3986 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3987 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3988 ReduceListArgAlloca,
Builder.getPtrTy(),
3989 ReduceListArgAlloca->
getName() +
".ascast");
3990 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3991 LocalReduceList,
Builder.getPtrTy(),
3992 LocalReduceList->
getName() +
".ascast");
3994 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3995 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3996 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4001 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4002 for (
auto En :
enumerate(ReductionInfos)) {
4006 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4010 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4011 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4012 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4017 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4018 RedListArrayTy, ReductionList,
4019 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4022 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4023 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4024 ReductionsBufferTy, BufferVD, 0, En.index());
4026 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4027 Value *ByRefDataPtr;
4029 RI.DataPtrPtrGen(
Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
4031 return GenResult.takeError();
4033 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
4034 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4036 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4044 ->addFnAttr(Attribute::NoUnwind);
4050std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4051 std::string Suffix =
4053 return (Name + Suffix).str();
4056Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4059 AttributeList FuncAttrs) {
4061 {Builder.getPtrTy(), Builder.getPtrTy()},
4063 std::string
Name = getReductionFuncName(ReducerName);
4071 Builder.SetInsertPoint(EntryBB);
4075 Value *LHSArrayPtr =
nullptr;
4076 Value *RHSArrayPtr =
nullptr;
4083 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4085 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4086 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4087 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4088 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4089 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4090 Builder.CreateStore(Arg0, LHSAddrCast);
4091 Builder.CreateStore(Arg1, RHSAddrCast);
4092 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4093 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4097 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4099 for (
auto En :
enumerate(ReductionInfos)) {
4102 RedArrayTy, RHSArrayPtr,
4103 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4105 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4106 RHSI8Ptr, RI.PrivateVariable->getType(),
4107 RHSI8Ptr->
getName() +
".ascast");
4110 RedArrayTy, LHSArrayPtr,
4111 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4113 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4114 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4123 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4124 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4125 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4132 return AfterIP.takeError();
4133 if (!
Builder.GetInsertBlock())
4134 return ReductionFunc;
4138 if (!IsByRef.
empty() && !IsByRef[En.index()])
4139 Builder.CreateStore(Reduced, LHSPtr);
4144 for (
auto En :
enumerate(ReductionInfos)) {
4145 unsigned Index = En.index();
4147 Value *LHSFixupPtr, *RHSFixupPtr;
4148 Builder.restoreIP(RI.ReductionGenClang(
4149 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4154 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4159 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4166 return ReductionFunc;
4174 assert(RI.Variable &&
"expected non-null variable");
4175 assert(RI.PrivateVariable &&
"expected non-null private variable");
4176 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4177 "expected non-null reduction generator callback");
4180 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4181 "expected variables and their private equivalents to have the same "
4184 assert(RI.Variable->getType()->isPointerTy() &&
4185 "expected variables to be pointers");
4194 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4208 if (ReductionInfos.
size() == 0)
4218 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4222 AttributeList FuncAttrs;
4223 AttrBuilder AttrBldr(Ctx);
4225 AttrBldr.addAttribute(Attr);
4226 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4227 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4231 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4233 if (!ReductionResult)
4235 Function *ReductionFunc = *ReductionResult;
4239 if (GridValue.has_value())
4240 Config.setGridValue(GridValue.value());
4255 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4259 Value *ReductionListAlloca =
4260 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4261 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4262 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4265 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4266 for (
auto En :
enumerate(ReductionInfos)) {
4269 RedArrayTy, ReductionList,
4270 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4273 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4278 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4279 Builder.CreateStore(CastElem, ElemPtr);
4283 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4289 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4295 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4297 unsigned MaxDataSize = 0;
4299 for (
auto En :
enumerate(ReductionInfos)) {
4300 auto Size =
M.getDataLayout().getTypeStoreSize(En.value().ElementType);
4301 if (
Size > MaxDataSize)
4303 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4304 ? En.value().ByRefElementType
4305 : En.value().ElementType;
4308 Value *ReductionDataSize =
4309 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4310 if (!IsTeamsReduction) {
4311 Value *SarFuncCast =
4312 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4314 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4315 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4318 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4323 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4325 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4328 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4333 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4338 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4343 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4350 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4352 Value *Args3[] = {SrcLocInfo,
4353 KernelTeamsReductionPtr,
4354 Builder.getInt32(ReductionBufNum),
4365 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4382 for (
auto En :
enumerate(ReductionInfos)) {
4390 Value *LHSPtr, *RHSPtr;
4392 &LHSPtr, &RHSPtr, CurFunc));
4405 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4407 "red.value." +
Twine(En.index()));
4418 if (!IsByRef.
empty() && !IsByRef[En.index()])
4423 if (ContinuationBlock) {
4424 Builder.CreateBr(ContinuationBlock);
4425 Builder.SetInsertPoint(ContinuationBlock);
4427 Config.setEmitLLVMUsed();
4438 ".omp.reduction.func", &M);
4448 Builder.SetInsertPoint(ReductionFuncBlock);
4449 Value *LHSArrayPtr =
nullptr;
4450 Value *RHSArrayPtr =
nullptr;
4461 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4463 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4464 Value *LHSAddrCast =
4465 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4466 Value *RHSAddrCast =
4467 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4468 Builder.CreateStore(Arg0, LHSAddrCast);
4469 Builder.CreateStore(Arg1, RHSAddrCast);
4470 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4471 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4473 LHSArrayPtr = ReductionFunc->
getArg(0);
4474 RHSArrayPtr = ReductionFunc->
getArg(1);
4477 unsigned NumReductions = ReductionInfos.
size();
4480 for (
auto En :
enumerate(ReductionInfos)) {
4482 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4483 RedArrayTy, LHSArrayPtr, 0, En.index());
4484 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4485 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4488 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4489 RedArrayTy, RHSArrayPtr, 0, En.index());
4490 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4491 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4500 Builder.restoreIP(*AfterIP);
4502 if (!Builder.GetInsertBlock())
4506 if (!IsByRef[En.index()])
4507 Builder.CreateStore(Reduced, LHSPtr);
4509 Builder.CreateRetVoid();
4516 bool IsNoWait,
bool IsTeamsReduction) {
4520 IsByRef, IsNoWait, IsTeamsReduction);
4527 if (ReductionInfos.
size() == 0)
4537 unsigned NumReductions = ReductionInfos.
size();
4540 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4542 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4544 for (
auto En :
enumerate(ReductionInfos)) {
4545 unsigned Index = En.index();
4547 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4548 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4555 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4565 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4570 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4571 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4573 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4575 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4576 : RuntimeFunction::OMPRTL___kmpc_reduce);
4579 {Ident, ThreadId, NumVariables, RedArraySize,
4580 RedArray, ReductionFunc, Lock},
4591 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4592 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4593 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4598 Builder.SetInsertPoint(NonAtomicRedBlock);
4599 for (
auto En :
enumerate(ReductionInfos)) {
4605 if (!IsByRef[En.index()]) {
4607 "red.value." +
Twine(En.index()));
4609 Value *PrivateRedValue =
4611 "red.private.value." +
Twine(En.index()));
4619 if (!
Builder.GetInsertBlock())
4622 if (!IsByRef[En.index()])
4626 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4627 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4629 Builder.CreateBr(ContinuationBlock);
4634 Builder.SetInsertPoint(AtomicRedBlock);
4635 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4642 if (!
Builder.GetInsertBlock())
4645 Builder.CreateBr(ContinuationBlock);
4658 if (!
Builder.GetInsertBlock())
4661 Builder.SetInsertPoint(ContinuationBlock);
4672 Directive OMPD = Directive::OMPD_master;
4677 Value *Args[] = {Ident, ThreadId};
4685 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4696 Directive OMPD = Directive::OMPD_masked;
4702 Value *ArgsEnd[] = {Ident, ThreadId};
4710 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4720 Call->setDoesNotThrow();
4735 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4737 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4738 ScanVarsType, ScanRedInfo);
4749 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4752 Type *DestTy = ScanVarsType[i];
4753 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4756 Builder.CreateStore(Src, Val);
4761 Builder.GetInsertBlock()->getParent());
4764 IV = ScanRedInfo->
IV;
4767 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4770 Type *DestTy = ScanVarsType[i];
4772 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4774 Builder.CreateStore(Src, ScanVars[i]);
4788 Builder.GetInsertBlock()->getParent());
4793Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4797 Builder.restoreIP(AllocaIP);
4799 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4801 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4808 Builder.restoreIP(CodeGenIP);
4810 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4811 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4815 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4816 AllocSpan,
nullptr,
"arr");
4817 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4835 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4844Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4850 Value *PrivateVar = RedInfo.PrivateVariable;
4851 Value *OrigVar = RedInfo.Variable;
4855 Type *SrcTy = RedInfo.ElementType;
4860 Builder.CreateStore(Src, OrigVar);
4883 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4908 Builder.GetInsertBlock()->getModule(),
4915 Builder.GetInsertBlock()->getModule(),
4921 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
4922 Builder.SetInsertPoint(InputBB);
4925 Builder.SetInsertPoint(LoopBB);
4941 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4943 Builder.SetInsertPoint(InnerLoopBB);
4947 Value *ReductionVal = RedInfo.PrivateVariable;
4950 Type *DestTy = RedInfo.ElementType;
4953 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4956 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4961 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
4964 Builder.CreateStore(Result, LHSPtr);
4967 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
4969 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
4970 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4973 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4979 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5000 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5007Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5019 Error Err = InputLoopGen();
5030 Error Err = ScanLoopGen(Builder.saveIP());
5037void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5074 Builder.SetInsertPoint(Preheader);
5077 Builder.SetInsertPoint(Header);
5078 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5079 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5084 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5085 Builder.CreateCondBr(Cmp, Body, Exit);
5090 Builder.SetInsertPoint(Latch);
5092 "omp_" + Name +
".next",
true);
5103 CL->Header = Header;
5122 NextBB, NextBB, Name);
5154 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5163 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5164 ScanRedInfo->
Span = TripCount;
5170 ScanRedInfo->
IV =
IV;
5171 createScanBBs(ScanRedInfo);
5174 assert(Terminator->getNumSuccessors() == 1);
5175 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5178 Builder.GetInsertBlock()->getParent());
5181 Builder.GetInsertBlock()->getParent());
5182 Builder.CreateBr(ContinueBlock);
5188 const auto &&InputLoopGen = [&]() ->
Error {
5190 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5191 ComputeIP, Name,
true, ScanRedInfo);
5195 Builder.restoreIP((*LoopInfo)->getAfterIP());
5201 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5205 Builder.restoreIP((*LoopInfo)->getAfterIP());
5209 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5217 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5227 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5228 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5232 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5248 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5251 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5255 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5260 Value *CountIfLooping;
5261 if (InclusiveStop) {
5262 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5268 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5271 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5272 "omp_" + Name +
".tripcount");
5277 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5284 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5291 ScanRedInfo->
IV = IndVar;
5292 return BodyGenCB(
Builder.saveIP(), IndVar);
5298 Builder.getCurrentDebugLocation());
5309 unsigned Bitwidth = Ty->getIntegerBitWidth();
5312 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5315 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5325 unsigned Bitwidth = Ty->getIntegerBitWidth();
5328 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5331 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5339 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5341 "Require dedicated allocate IP");
5347 uint32_t SrcLocStrSize;
5353 Type *IVTy =
IV->getType();
5354 FunctionCallee StaticInit =
5355 LoopType == WorksharingLoopType::DistributeForStaticLoop
5358 FunctionCallee StaticFini =
5362 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5365 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5366 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5367 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5368 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5377 Constant *One = ConstantInt::get(IVTy, 1);
5378 Builder.CreateStore(Zero, PLowerBound);
5380 Builder.CreateStore(UpperBound, PUpperBound);
5381 Builder.CreateStore(One, PStride);
5386 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5387 ? OMPScheduleType::OrderedDistribute
5390 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5394 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5395 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5398 PLowerBound, PUpperBound});
5399 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5400 Value *PDistUpperBound =
5401 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5402 Args.push_back(PDistUpperBound);
5407 BuildInitCall(SchedulingType,
Builder);
5408 if (HasDistSchedule &&
5409 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5410 Constant *DistScheduleSchedType = ConstantInt::get(
5415 BuildInitCall(DistScheduleSchedType,
Builder);
5417 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5418 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5419 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5420 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5421 CLI->setTripCount(TripCount);
5427 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5431 return Builder.CreateAdd(OldIV, LowerBound);
5443 omp::Directive::OMPD_for,
false,
5446 return BarrierIP.takeError();
5473 Reachable.insert(
Block);
5483 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5487OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5491 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5492 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5497 Type *IVTy =
IV->getType();
5499 "Max supported tripcount bitwidth is 64 bits");
5501 :
Type::getInt64Ty(Ctx);
5504 Constant *One = ConstantInt::get(InternalIVTy, 1);
5511 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5514 if (ChunkSize || DistScheduleChunkSize)
5519 FunctionCallee StaticInit =
5521 FunctionCallee StaticFini =
5527 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5528 Value *PLowerBound =
5529 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5530 Value *PUpperBound =
5531 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5532 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5541 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5542 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5543 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5544 "distschedulechunksize");
5545 Value *CastedTripCount =
5546 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5549 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5551 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5552 Builder.CreateStore(Zero, PLowerBound);
5553 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5554 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5556 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5557 Builder.CreateStore(UpperBound, PUpperBound);
5558 Builder.CreateStore(One, PStride);
5562 uint32_t SrcLocStrSize;
5566 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5567 PUpperBound, PStride, One,
5568 this](
Value *SchedulingType,
Value *ChunkSize,
5571 StaticInit, {SrcLoc, ThreadNum,
5572 SchedulingType, PLastIter,
5573 PLowerBound, PUpperBound,
5577 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5578 if (DistScheduleSchedType != OMPScheduleType::None &&
5579 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5580 SchedType != OMPScheduleType::OrderedDistribute) {
5584 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5588 Value *FirstChunkStart =
5589 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5590 Value *FirstChunkStop =
5591 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5592 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5594 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5595 Value *NextChunkStride =
5596 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5600 Value *DispatchCounter;
5608 DispatchCounter = Counter;
5611 FirstChunkStart, CastedTripCount, NextChunkStride,
5634 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5635 Value *IsLastChunk =
5636 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5637 Value *CountUntilOrigTripCount =
5638 Builder.CreateSub(CastedTripCount, DispatchCounter);
5640 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5641 Value *BackcastedChunkTC =
5642 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5643 CLI->setTripCount(BackcastedChunkTC);
5648 Value *BackcastedDispatchCounter =
5649 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5650 CLI->mapIndVar([&](Instruction *) ->
Value * {
5652 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5665 return AfterIP.takeError();
5680static FunctionCallee
5683 unsigned Bitwidth = Ty->getIntegerBitWidth();
5686 case WorksharingLoopType::ForStaticLoop:
5689 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5692 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5694 case WorksharingLoopType::DistributeStaticLoop:
5697 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5700 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5702 case WorksharingLoopType::DistributeForStaticLoop:
5705 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5708 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5711 if (Bitwidth != 32 && Bitwidth != 64) {
5723 Function &LoopBodyFn,
bool NoLoop) {
5734 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5735 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5736 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5737 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5742 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5743 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5747 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5748 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5749 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5750 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5751 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5753 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5777 Builder.restoreIP({Preheader, Preheader->
end()});
5780 Builder.CreateBr(CLI->
getExit());
5788 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5796 "Expected unique undroppable user of outlined function");
5798 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5800 "Expected outlined function call to be located in loop preheader");
5802 if (OutlinedFnCallInstruction->
arg_size() > 1)
5809 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5811 for (
auto &ToBeDeletedItem : ToBeDeleted)
5812 ToBeDeletedItem->eraseFromParent();
5819 uint32_t SrcLocStrSize;
5828 SmallVector<Instruction *, 4> ToBeDeleted;
5830 OI.OuterAllocaBB = AllocaIP.getBlock();
5835 "omp.prelatch",
true);
5853 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
5855 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5857 CodeExtractorAnalysisCache CEAC(*OuterFn);
5858 CodeExtractor Extractor(Blocks,
5871 SetVector<Value *> SinkingCands, HoistingCands;
5875 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5882 for (
auto Use :
Users) {
5884 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5885 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
5891 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5898 OI.PostOutlineCB = [=, ToBeDeletedVec =
5899 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5909 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5910 bool HasSimdModifier,
bool HasMonotonicModifier,
5911 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5913 Value *DistScheduleChunkSize) {
5914 if (
Config.isTargetDevice())
5915 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5917 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5918 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5920 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5921 OMPScheduleType::ModifierOrdered;
5923 if (HasDistSchedule) {
5924 DistScheduleSchedType = DistScheduleChunkSize
5925 ? OMPScheduleType::OrderedDistributeChunked
5926 : OMPScheduleType::OrderedDistribute;
5928 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5929 case OMPScheduleType::BaseStatic:
5930 case OMPScheduleType::BaseDistribute:
5931 assert((!ChunkSize || !DistScheduleChunkSize) &&
5932 "No chunk size with static-chunked schedule");
5933 if (IsOrdered && !HasDistSchedule)
5934 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5935 NeedsBarrier, ChunkSize);
5937 if (DistScheduleChunkSize)
5938 return applyStaticChunkedWorkshareLoop(
5939 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5940 DistScheduleChunkSize, DistScheduleSchedType);
5941 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
5944 case OMPScheduleType::BaseStaticChunked:
5945 case OMPScheduleType::BaseDistributeChunked:
5946 if (IsOrdered && !HasDistSchedule)
5947 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5948 NeedsBarrier, ChunkSize);
5950 return applyStaticChunkedWorkshareLoop(
5951 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5952 DistScheduleChunkSize, DistScheduleSchedType);
5954 case OMPScheduleType::BaseRuntime:
5955 case OMPScheduleType::BaseAuto:
5956 case OMPScheduleType::BaseGreedy:
5957 case OMPScheduleType::BaseBalanced:
5958 case OMPScheduleType::BaseSteal:
5959 case OMPScheduleType::BaseGuidedSimd:
5960 case OMPScheduleType::BaseRuntimeSimd:
5962 "schedule type does not support user-defined chunk sizes");
5964 case OMPScheduleType::BaseDynamicChunked:
5965 case OMPScheduleType::BaseGuidedChunked:
5966 case OMPScheduleType::BaseGuidedIterativeChunked:
5967 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5968 case OMPScheduleType::BaseStaticBalancedChunked:
5969 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5970 NeedsBarrier, ChunkSize);
5983 unsigned Bitwidth = Ty->getIntegerBitWidth();
5986 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5989 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5997static FunctionCallee
5999 unsigned Bitwidth = Ty->getIntegerBitWidth();
6002 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6005 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6012static FunctionCallee
6014 unsigned Bitwidth = Ty->getIntegerBitWidth();
6017 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6020 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6025OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6028 bool NeedsBarrier,
Value *Chunk) {
6029 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6031 "Require dedicated allocate IP");
6033 "Require valid schedule type");
6035 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6036 OMPScheduleType::ModifierOrdered;
6041 uint32_t SrcLocStrSize;
6047 Type *IVTy =
IV->getType();
6052 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6054 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6055 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6056 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6057 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6066 Constant *One = ConstantInt::get(IVTy, 1);
6067 Builder.CreateStore(One, PLowerBound);
6069 Builder.CreateStore(UpperBound, PUpperBound);
6070 Builder.CreateStore(One, PStride);
6087 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6099 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6102 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6103 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6106 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6107 Builder.CreateCondBr(MoreWork, Header, Exit);
6113 PI->setIncomingBlock(0, OuterCond);
6114 PI->setIncomingValue(0, LowerBound);
6119 Br->setSuccessor(0, OuterCond);
6125 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6128 CI->setOperand(1, UpperBound);
6132 assert(BI->getSuccessor(1) == Exit);
6133 BI->setSuccessor(1, OuterCond);
6147 omp::Directive::OMPD_for,
false,
6150 return BarrierIP.takeError();
6169 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6174 if (BBsToErase.
count(UseInst->getParent()))
6181 while (BBsToErase.
remove_if(HasRemainingUses)) {
6192 assert(
Loops.size() >= 1 &&
"At least one loop required");
6193 size_t NumLoops =
Loops.size();
6197 return Loops.front();
6209 Loop->collectControlBlocks(OldControlBBs);
6213 if (ComputeIP.
isSet())
6220 Value *CollapsedTripCount =
nullptr;
6223 "All loops to collapse must be valid canonical loops");
6224 Value *OrigTripCount = L->getTripCount();
6225 if (!CollapsedTripCount) {
6226 CollapsedTripCount = OrigTripCount;
6231 CollapsedTripCount =
6232 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6238 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6244 Builder.restoreIP(Result->getBodyIP());
6246 Value *Leftover = Result->getIndVar();
6248 NewIndVars.
resize(NumLoops);
6249 for (
int i = NumLoops - 1; i >= 1; --i) {
6250 Value *OrigTripCount =
Loops[i]->getTripCount();
6252 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6253 NewIndVars[i] = NewIndVar;
6255 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6258 NewIndVars[0] = Leftover;
6267 BasicBlock *ContinueBlock = Result->getBody();
6269 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6276 ContinueBlock =
nullptr;
6277 ContinuePred = NextSrc;
6284 for (
size_t i = 0; i < NumLoops - 1; ++i)
6285 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6291 for (
size_t i = NumLoops - 1; i > 0; --i)
6292 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6295 ContinueWith(Result->getLatch(),
nullptr);
6302 for (
size_t i = 0; i < NumLoops; ++i)
6303 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6317std::vector<CanonicalLoopInfo *>
6321 "Must pass as many tile sizes as there are loops");
6322 int NumLoops =
Loops.size();
6323 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6335 Loop->collectControlBlocks(OldControlBBs);
6343 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6344 OrigTripCounts.
push_back(L->getTripCount());
6355 for (
int i = 0; i < NumLoops - 1; ++i) {
6368 for (
int i = 0; i < NumLoops; ++i) {
6370 Value *OrigTripCount = OrigTripCounts[i];
6383 Value *FloorTripOverflow =
6384 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6386 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6387 Value *FloorTripCount =
6388 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6389 "omp_floor" +
Twine(i) +
".tripcount",
true);
6392 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6398 std::vector<CanonicalLoopInfo *> Result;
6399 Result.reserve(NumLoops * 2);
6412 auto EmbeddNewLoop =
6413 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6416 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6421 Enter = EmbeddedLoop->
getBody();
6423 OutroInsertBefore = EmbeddedLoop->
getLatch();
6424 return EmbeddedLoop;
6428 const Twine &NameBase) {
6431 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6432 Result.push_back(EmbeddedLoop);
6436 EmbeddNewLoops(FloorCount,
"floor");
6442 for (
int i = 0; i < NumLoops; ++i) {
6446 Value *FloorIsEpilogue =
6448 Value *TileTripCount =
6455 EmbeddNewLoops(TileCounts,
"tile");
6460 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6469 BodyEnter =
nullptr;
6470 BodyEntered = ExitBB;
6482 Builder.restoreIP(Result.back()->getBodyIP());
6483 for (
int i = 0; i < NumLoops; ++i) {
6486 Value *OrigIndVar = OrigIndVars[i];
6514 if (Properties.
empty())
6537 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6541 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6549 if (
I.mayReadOrWriteMemory()) {
6553 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6576 const Twine &NamePrefix) {
6605 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6607 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6610 Builder.SetInsertPoint(SplitBeforeIt);
6612 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6615 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6618 Builder.SetInsertPoint(ElseBlock);
6624 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6626 ExistingBlocks.
append(L->block_begin(), L->block_end());
6632 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6634 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6641 if (
Block == ThenBlock)
6642 NewBB->
setName(NamePrefix +
".if.else");
6645 VMap[
Block] = NewBB;
6653 L->getLoopLatch()->splitBasicBlock(
6654 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
6658 L->addBasicBlockToLoop(ThenBlock, LI);
6664 if (TargetTriple.
isX86()) {
6665 if (Features.
lookup(
"avx512f"))
6667 else if (Features.
lookup(
"avx"))
6671 if (TargetTriple.
isPPC())
6673 if (TargetTriple.
isWasm())
6680 Value *IfCond, OrderKind Order,
6699 if (AlignedVars.
size()) {
6701 for (
auto &AlignedItem : AlignedVars) {
6702 Value *AlignedPtr = AlignedItem.first;
6703 Value *Alignment = AlignedItem.second;
6706 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6714 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6727 Reachable.insert(
Block);
6737 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6753 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6755 if (Simdlen || Safelen) {
6759 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6785static std::unique_ptr<TargetMachine>
6789 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6790 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6801 std::nullopt, OptLevel));
6825 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6826 FAM.registerPass([&]() {
return TIRA; });
6840 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6845 nullptr, ORE,
static_cast<int>(OptLevel),
6866 <<
" Threshold=" << UP.
Threshold <<
"\n"
6869 <<
" PartialOptSizeThreshold="
6889 Ptr = Load->getPointerOperand();
6891 Ptr = Store->getPointerOperand();
6898 if (Alloca->getParent() == &
F->getEntryBlock())
6918 int MaxTripCount = 0;
6919 bool MaxOrZero =
false;
6920 unsigned TripMultiple = 0;
6922 bool UseUpperBound =
false;
6924 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6926 unsigned Factor = UP.
Count;
6927 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6938 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6954 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6967 *UnrolledCLI =
Loop;
6972 "unrolling only makes sense with a factor of 2 or larger");
6974 Type *IndVarTy =
Loop->getIndVarType();
6981 std::vector<CanonicalLoopInfo *>
LoopNest =
6996 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6999 (*UnrolledCLI)->assertOK();
7017 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7036 if (!CPVars.
empty()) {
7041 Directive OMPD = Directive::OMPD_single;
7046 Value *Args[] = {Ident, ThreadId};
7055 if (
Error Err = FiniCB(IP))
7076 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7083 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7086 ConstantInt::get(Int64, 0), CPVars[
I],
7089 }
else if (!IsNowait) {
7092 omp::Directive::OMPD_unknown,
false,
7107 Directive OMPD = Directive::OMPD_critical;
7112 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7113 Value *Args[] = {Ident, ThreadId, LockVar};
7130 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7138 const Twine &Name,
bool IsDependSource) {
7142 "OpenMP runtime requires depend vec with i64 type");
7155 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7169 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7187 Directive OMPD = Directive::OMPD_ordered;
7196 Value *Args[] = {Ident, ThreadId};
7206 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7213 bool HasFinalize,
bool IsCancellable) {
7220 BasicBlock *EntryBB = Builder.GetInsertBlock();
7229 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7240 "Unexpected control flow graph state!!");
7242 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7244 return AfterIP.takeError();
7249 "Unexpected Insertion point location!");
7252 auto InsertBB = merged ? ExitPredBB : ExitBB;
7255 Builder.SetInsertPoint(InsertBB);
7257 return Builder.saveIP();
7261 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7263 if (!Conditional || !EntryCall)
7269 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7279 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7283 UI->eraseFromParent();
7291 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7299 "Unexpected finalization stack state!");
7302 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7304 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7305 return std::move(Err);
7309 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7319 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7353 "copyin.not.master.end");
7360 Builder.SetInsertPoint(OMP_Entry);
7361 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7362 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7363 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7364 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7366 Builder.SetInsertPoint(CopyBegin);
7383 Value *Args[] = {ThreadId,
Size, Allocator};
7400 Value *Args[] = {ThreadId, Addr, Allocator};
7408 Value *DependenceAddress,
bool HaveNowaitClause) {
7416 if (Device ==
nullptr)
7418 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7419 if (NumDependences ==
nullptr) {
7420 NumDependences = ConstantInt::get(Int32, 0);
7424 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7426 Ident, ThreadId, InteropVar, InteropTypeVal,
7427 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7436 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7444 if (Device ==
nullptr)
7446 if (NumDependences ==
nullptr) {
7447 NumDependences = ConstantInt::get(Int32, 0);
7451 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7453 Ident, ThreadId, InteropVar, Device,
7454 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7463 Value *NumDependences,
7464 Value *DependenceAddress,
7465 bool HaveNowaitClause) {
7472 if (Device ==
nullptr)
7474 if (NumDependences ==
nullptr) {
7475 NumDependences = ConstantInt::get(Int32, 0);
7479 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7481 Ident, ThreadId, InteropVar, Device,
7482 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7512 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7513 "expected num_threads and num_teams to be specified");
7532 const std::string DebugPrefix =
"_debug__";
7533 if (KernelName.
ends_with(DebugPrefix)) {
7534 KernelName = KernelName.
drop_back(DebugPrefix.length());
7535 Kernel =
M.getFunction(KernelName);
7541 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7546 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7547 if (MaxThreadsVal < 0)
7548 MaxThreadsVal = std::max(
7551 if (MaxThreadsVal > 0)
7564 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7567 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7568 Constant *DynamicEnvironmentInitializer =
7572 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7574 DL.getDefaultGlobalsAddressSpace());
7578 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7579 ? DynamicEnvironmentGV
7581 DynamicEnvironmentPtr);
7584 ConfigurationEnvironment, {
7585 UseGenericStateMachineVal,
7586 MayUseNestedParallelismVal,
7593 ReductionBufferLength,
7596 KernelEnvironment, {
7597 ConfigurationEnvironmentInitializer,
7601 std::string KernelEnvironmentName =
7602 (KernelName +
"_kernel_environment").str();
7605 KernelEnvironmentInitializer, KernelEnvironmentName,
7607 DL.getDefaultGlobalsAddressSpace());
7611 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7612 ? KernelEnvironmentGV
7614 KernelEnvironmentPtr);
7615 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7617 KernelLaunchEnvironment =
7618 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7619 ? KernelLaunchEnvironment
7620 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7621 KernelLaunchEnvParamTy);
7623 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7635 auto *UI =
Builder.CreateUnreachable();
7641 Builder.SetInsertPoint(WorkerExitBB);
7645 Builder.SetInsertPoint(CheckBBTI);
7646 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7648 CheckBBTI->eraseFromParent();
7649 UI->eraseFromParent();
7657 int32_t TeamsReductionDataSize,
7658 int32_t TeamsReductionBufferLength) {
7663 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7667 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7673 const std::string DebugPrefix =
"_debug__";
7675 KernelName = KernelName.
drop_back(DebugPrefix.length());
7676 auto *KernelEnvironmentGV =
7677 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7678 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7679 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
7681 KernelEnvironmentInitializer,
7682 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7684 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7686 KernelEnvironmentGV->setInitializer(NewInitializer);
7691 if (
Kernel.hasFnAttribute(Name)) {
7692 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7698std::pair<int32_t, int32_t>
7700 int32_t ThreadLimit =
7701 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7704 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7705 if (!Attr.isValid() || !Attr.isStringAttribute())
7706 return {0, ThreadLimit};
7707 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7710 return {0, ThreadLimit};
7711 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7717 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7718 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7719 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7721 return {0, ThreadLimit};
7727 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7730 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7738std::pair<int32_t, int32_t>
7741 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7745 int32_t LB, int32_t UB) {
7752 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7755void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7764 else if (
T.isNVPTX())
7766 else if (
T.isSPIRV())
7771Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
7772 StringRef EntryFnIDName) {
7773 if (
Config.isTargetDevice()) {
7774 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7778 return new GlobalVariable(
7783Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
7784 StringRef EntryFnName) {
7788 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
7789 "Named kernel already exists?");
7790 return new GlobalVariable(
7803 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
7807 OutlinedFn = *CBResult;
7809 OutlinedFn =
nullptr;
7815 if (!IsOffloadEntry)
7818 std::string EntryFnIDName =
7820 ? std::string(EntryFnName)
7824 EntryFnName, EntryFnIDName);
7832 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7833 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7834 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7836 EntryInfo, EntryAddr, OutlinedFnID,
7838 return OutlinedFnID;
7855 if (
Config.IsTargetDevice.value_or(
false)) {
7866 bool IsStandAlone = !BodyGenCB;
7873 MapInfo = &GenMapInfoCB(
Builder.saveIP());
7875 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
7876 true, DeviceAddrCB))
7883 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
7893 SrcLocInfo, DeviceID,
7900 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7904 if (Info.HasNoWait) {
7914 if (Info.HasNoWait) {
7918 emitBlock(OffloadContBlock, CurFn,
true);
7924 bool RequiresOuterTargetTask = Info.HasNoWait;
7925 if (!RequiresOuterTargetTask)
7926 cantFail(TaskBodyCB(
nullptr,
nullptr,
7930 {}, RTArgs, Info.HasNoWait));
7933 omp::OMPRTL___tgt_target_data_begin_mapper);
7937 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
7941 Builder.CreateStore(LI, DeviceMap.second.second);
7977 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
7986 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8008 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8009 return BeginThenGen(AllocaIP,
Builder.saveIP());
8024 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8025 return EndThenGen(AllocaIP,
Builder.saveIP());
8028 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8029 return BeginThenGen(AllocaIP,
Builder.saveIP());
8040 bool IsGPUDistribute) {
8041 assert((IVSize == 32 || IVSize == 64) &&
8042 "IV size is not compatible with the omp runtime");
8044 if (IsGPUDistribute)
8046 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8047 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8048 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8049 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8051 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8052 : omp::OMPRTL___kmpc_for_static_init_4u)
8053 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8054 : omp::OMPRTL___kmpc_for_static_init_8u);
8061 assert((IVSize == 32 || IVSize == 64) &&
8062 "IV size is not compatible with the omp runtime");
8064 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8065 : omp::OMPRTL___kmpc_dispatch_init_4u)
8066 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8067 : omp::OMPRTL___kmpc_dispatch_init_8u);
8074 assert((IVSize == 32 || IVSize == 64) &&
8075 "IV size is not compatible with the omp runtime");
8077 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8078 : omp::OMPRTL___kmpc_dispatch_next_4u)
8079 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8080 : omp::OMPRTL___kmpc_dispatch_next_8u);
8087 assert((IVSize == 32 || IVSize == 64) &&
8088 "IV size is not compatible with the omp runtime");
8090 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8091 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8092 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8093 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8104 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8112 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8116 if (NewVar && (arg == NewVar->
getArg()))
8126 auto UpdateDebugRecord = [&](
auto *DR) {
8129 for (
auto Loc : DR->location_ops()) {
8130 auto Iter = ValueReplacementMap.find(
Loc);
8131 if (Iter != ValueReplacementMap.end()) {
8132 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8133 ArgNo = std::get<1>(Iter->second) + 1;
8137 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8144 "Unexpected debug intrinsic");
8146 UpdateDebugRecord(&DVR);
8151 Module *M = Func->getParent();
8154 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8156 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
8157 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
8159 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
8185 for (
auto &Arg : Inputs)
8186 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8190 for (
auto &Arg : Inputs)
8191 ParameterTypes.
push_back(Arg->getType());
8194 auto BB = Builder.GetInsertBlock();
8195 auto M = BB->getModule();
8206 if (TargetCpuAttr.isStringAttribute())
8207 Func->addFnAttr(TargetCpuAttr);
8209 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8210 if (TargetFeaturesAttr.isStringAttribute())
8211 Func->addFnAttr(TargetFeaturesAttr);
8216 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8227 Builder.SetInsertPoint(EntryBB);
8233 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8243 splitBB(Builder,
true,
"outlined.body");
8249 Builder.restoreIP(*AfterIP);
8254 Builder.CreateRetVoid();
8258 auto AllocaIP = Builder.saveIP();
8263 const auto &ArgRange =
8265 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
8298 if (Instr->getFunction() == Func)
8299 Instr->replaceUsesOfWith(
Input, InputCopy);
8305 for (
auto InArg :
zip(Inputs, ArgRange)) {
8307 Argument &Arg = std::get<1>(InArg);
8308 Value *InputCopy =
nullptr;
8311 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8314 Builder.restoreIP(*AfterIP);
8315 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8335 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8342 ReplaceValue(
Input, InputCopy, Func);
8346 for (
auto Deferred : DeferredReplacement)
8347 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8350 ValueReplacementMap);
8358 Value *TaskWithPrivates,
8359 Type *TaskWithPrivatesTy) {
8361 Type *TaskTy = OMPIRBuilder.Task;
8364 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8365 Value *Shareds = TaskT;
8375 if (TaskWithPrivatesTy != TaskTy)
8376 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8393 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8398 assert((!NumOffloadingArrays || PrivatesTy) &&
8399 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8432 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8433 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8439 ".omp_target_task_proxy_func",
8440 Builder.GetInsertBlock()->getModule());
8441 Value *ThreadId = ProxyFn->getArg(0);
8442 Value *TaskWithPrivates = ProxyFn->getArg(1);
8443 ThreadId->
setName(
"thread.id");
8444 TaskWithPrivates->
setName(
"task");
8446 bool HasShareds = SharedArgsOperandNo > 0;
8447 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8450 Builder.SetInsertPoint(EntryBB);
8456 if (HasOffloadingArrays) {
8457 assert(TaskTy != TaskWithPrivatesTy &&
8458 "If there are offloading arrays to pass to the target"
8459 "TaskTy cannot be the same as TaskWithPrivatesTy");
8462 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8463 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8465 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8469 auto *ArgStructAlloca =
8471 assert(ArgStructAlloca &&
8472 "Unable to find the alloca instruction corresponding to arguments "
8473 "for extracted function");
8477 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8479 Value *SharedsSize =
8480 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8483 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8485 Builder.CreateMemCpy(
8486 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8488 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8491 Builder.CreateRetVoid();
8497 return GEP->getSourceElementType();
8499 return Alloca->getAllocatedType();
8522 if (OffloadingArraysToPrivatize.
empty())
8523 return OMPIRBuilder.Task;
8526 for (
Value *V : OffloadingArraysToPrivatize) {
8527 assert(V->getType()->isPointerTy() &&
8528 "Expected pointer to array to privatize. Got a non-pointer value "
8531 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8537 "struct.task_with_privates");
8551 EntryFnName, Inputs, CBFunc,
8556 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8693 TargetTaskAllocaBB->
begin());
8697 OI.
EntryBB = TargetTaskAllocaBB;
8703 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8706 Builder.restoreIP(TargetTaskBodyIP);
8707 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8725 bool NeedsTargetTask = HasNoWait && DeviceID;
8726 if (NeedsTargetTask) {
8732 OffloadingArraysToPrivatize.
push_back(V);
8737 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8738 DeviceID, OffloadingArraysToPrivatize](
8741 "there must be a single user for the outlined function");
8755 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8756 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8758 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8759 "Wrong number of arguments for StaleCI when shareds are present");
8760 int SharedArgOperandNo =
8761 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8767 if (!OffloadingArraysToPrivatize.
empty())
8772 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8773 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8775 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8778 Builder.SetInsertPoint(StaleCI);
8795 OMPRTL___kmpc_omp_target_task_alloc);
8807 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8814 auto *ArgStructAlloca =
8816 assert(ArgStructAlloca &&
8817 "Unable to find the alloca instruction corresponding to arguments "
8818 "for extracted function");
8819 auto *ArgStructType =
8821 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8822 "arguments for extracted function");
8824 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ArgStructType));
8843 TaskSize, SharedsSize,
8846 if (NeedsTargetTask) {
8847 assert(DeviceID &&
"Expected non-empty device ID.");
8857 *
this,
Builder, TaskData, TaskWithPrivatesTy);
8858 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8861 if (!OffloadingArraysToPrivatize.
empty()) {
8863 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8864 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8865 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8872 "ElementType should match ArrayType");
8875 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
8877 Dst, Alignment, PtrToPrivatize, Alignment,
8878 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
8892 if (!NeedsTargetTask) {
8901 ConstantInt::get(
Builder.getInt32Ty(), 0),
8914 }
else if (DepArray) {
8922 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
8923 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
8933 I->eraseFromParent();
8938 << *(
Builder.GetInsertBlock()) <<
"\n");
8940 << *(
Builder.GetInsertBlock()->getParent()->getParent())
8952 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8969 bool HasNoWait,
Value *DynCGroupMem,
8976 Builder.restoreIP(IP);
8978 return Builder.saveIP();
8981 bool HasDependencies = Dependencies.
size() > 0;
8982 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8999 if (OutlinedFnID && DeviceID)
9001 EmitTargetCallFallbackCB, KArgs,
9002 DeviceID, RTLoc, TargetTaskAllocaIP);
9010 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9017 auto &&EmitTargetCallElse =
9023 if (RequiresOuterTargetTask) {
9030 Dependencies, EmptyRTArgs, HasNoWait);
9032 return EmitTargetCallFallbackCB(Builder.saveIP());
9035 Builder.restoreIP(AfterIP);
9039 auto &&EmitTargetCallThen =
9042 Info.HasNoWait = HasNoWait;
9046 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
9054 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9059 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9061 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9065 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9068 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9076 Value *MaxThreadsClause =
9078 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9081 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9083 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9084 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9086 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9087 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9089 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9092 unsigned NumTargetItems =
Info.NumberOfPtrs;
9100 Builder.getInt64Ty(),
9102 : Builder.getInt64(0);
9106 DynCGroupMem = Builder.getInt32(0);
9109 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9110 HasNoWait, DynCGroupMemFallback);
9117 if (RequiresOuterTargetTask)
9119 RTLoc, AllocaIP, Dependencies,
9123 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9124 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9127 Builder.restoreIP(AfterIP);
9134 if (!OutlinedFnID) {
9135 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9141 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9146 EmitTargetCallElse, AllocaIP));
9173 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9174 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9180 if (!
Config.isTargetDevice())
9182 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9183 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9184 DynCGroupMemFallback);
9198 return OS.
str().str();
9203 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9209 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9211 assert(Elem.second->getValueType() == Ty &&
9212 "OMP internal variable has different type than requested");
9225 :
M.getTargetTriple().isAMDGPU()
9227 :
DL.getDefaultGlobalsAddressSpace();
9236 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9237 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9244Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9245 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9246 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9257 return SizePtrToInt;
9262 std::string VarName) {
9270 return MaptypesArrayGlobal;
9275 unsigned NumOperands,
9284 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9288 ArrI64Ty,
nullptr,
".offload_sizes");
9299 int64_t DeviceID,
unsigned NumOperands) {
9305 Value *ArgsBaseGEP =
9307 {Builder.getInt32(0), Builder.getInt32(0)});
9310 {Builder.getInt32(0), Builder.getInt32(0)});
9311 Value *ArgSizesGEP =
9313 {Builder.getInt32(0), Builder.getInt32(0)});
9317 Builder.getInt32(NumOperands),
9318 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9319 MaptypesArg, MapnamesArg, NullPtr});
9326 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9327 "expected region end call to runtime only when end call is separate");
9329 auto VoidPtrTy = UnqualPtrTy;
9330 auto VoidPtrPtrTy = UnqualPtrTy;
9332 auto Int64PtrTy = UnqualPtrTy;
9334 if (!Info.NumberOfPtrs) {
9346 Info.RTArgs.BasePointersArray,
9349 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9353 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9357 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9358 : Info.RTArgs.MapTypesArray,
9364 if (!Info.EmitDebug)
9368 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9373 if (!Info.HasMapper)
9377 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9398 "struct.descriptor_dim");
9400 enum { OffsetFD = 0, CountFD, StrideFD };
9404 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9407 if (NonContigInfo.
Dims[
I] == 1)
9412 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9414 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9415 unsigned RevIdx = EE -
II - 1;
9418 {Builder.getInt64(0), Builder.getInt64(II)});
9420 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9422 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9423 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9425 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9427 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9428 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9430 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9432 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9433 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9437 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9438 DimsAddr,
Builder.getPtrTy());
9441 Info.RTArgs.PointersArray, 0,
I);
9443 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9448void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9452 StringRef Prefix = IsInit ?
".init" :
".del";
9458 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9459 Value *DeleteBit = Builder.CreateAnd(
9462 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9463 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9468 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9470 Value *PtrAndObjBit = Builder.CreateAnd(
9473 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9474 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9475 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
9476 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9477 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9478 DeleteCond = Builder.CreateIsNull(
9483 DeleteCond =
Builder.CreateIsNotNull(
9499 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9500 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9501 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9502 MapTypeArg =
Builder.CreateOr(
9505 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9506 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9510 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9511 ArraySize, MapTypeArg, MapName};
9537 MapperFn->
addFnAttr(Attribute::NoInline);
9538 MapperFn->
addFnAttr(Attribute::NoUnwind);
9548 auto SavedIP =
Builder.saveIP();
9549 Builder.SetInsertPoint(EntryBB);
9561 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9563 Value *PtrBegin = BeginIn;
9569 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9570 MapType, MapName, ElementSize, HeadBB,
9581 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9582 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9588 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9589 PtrPHI->addIncoming(PtrBegin, HeadBB);
9594 return Info.takeError();
9598 Value *OffloadingArgs[] = {MapperHandle};
9602 Value *ShiftedPreviousSize =
9606 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9607 Value *CurBaseArg = Info->BasePointers[
I];
9608 Value *CurBeginArg = Info->Pointers[
I];
9609 Value *CurSizeArg = Info->Sizes[
I];
9610 Value *CurNameArg = Info->Names.size()
9616 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9618 Value *MemberMapType =
9619 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9636 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9637 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9638 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9648 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9654 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9655 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9656 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9662 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9663 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9664 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9670 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9671 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9677 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9678 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9679 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9685 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9686 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9697 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9698 CurSizeArg, CurMapType, CurNameArg};
9700 auto ChildMapperFn = CustomMapperCB(
I);
9702 return ChildMapperFn.takeError();
9703 if (*ChildMapperFn) {
9718 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9719 "omp.arraymap.next");
9720 PtrPHI->addIncoming(PtrNext, LastBB);
9721 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9723 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9728 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9729 MapType, MapName, ElementSize, DoneBB,
9743 bool IsNonContiguous,
9747 Info.clearArrayInfo();
9750 if (Info.NumberOfPtrs == 0)
9759 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
9760 PointerArrayType,
nullptr,
".offload_baseptrs");
9762 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
9763 PointerArrayType,
nullptr,
".offload_ptrs");
9765 PointerArrayType,
nullptr,
".offload_mappers");
9766 Info.RTArgs.MappersArray = MappersArray;
9773 ConstantInt::get(Int64Ty, 0));
9775 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
9778 if (IsNonContiguous &&
9779 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9781 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9789 RuntimeSizes.
set(
I);
9792 if (RuntimeSizes.
all()) {
9794 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
9795 SizeArrayType,
nullptr,
".offload_sizes");
9801 auto *SizesArrayGbl =
9806 if (!RuntimeSizes.
any()) {
9807 Info.RTArgs.SizesArray = SizesArrayGbl;
9809 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
9810 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
9813 SizeArrayType,
nullptr,
".offload_sizes");
9817 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9818 SizesArrayGbl, OffloadSizeAlign,
9823 Info.RTArgs.SizesArray = Buffer;
9831 for (
auto mapFlag : CombinedInfo.
Types)
9833 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9837 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9843 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9844 Info.EmitDebug =
true;
9846 Info.RTArgs.MapNamesArray =
9848 Info.EmitDebug =
false;
9853 if (Info.separateBeginEndCalls()) {
9854 bool EndMapTypesDiffer =
false;
9856 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9857 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9858 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9859 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9860 EndMapTypesDiffer =
true;
9863 if (EndMapTypesDiffer) {
9865 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9870 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
9873 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
9875 Builder.CreateAlignedStore(BPVal, BP,
9876 M.getDataLayout().getPrefTypeAlign(PtrTy));
9878 if (Info.requiresDevicePointerInfo()) {
9882 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
9885 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
9887 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9889 DeviceAddrCB(
I, BP);
9895 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
9898 Builder.CreateAlignedStore(PVal,
P,
9899 M.getDataLayout().getPrefTypeAlign(PtrTy));
9901 if (RuntimeSizes.
test(
I)) {
9903 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9909 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
9912 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
9915 auto CustomMFunc = CustomMapperCB(
I);
9917 return CustomMFunc.takeError();
9919 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9923 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9925 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9929 Info.NumberOfPtrs == 0)
9946 Builder.ClearInsertionPoint();
9976 auto CondConstant = CI->getSExtValue();
9978 return ThenGen(AllocaIP,
Builder.saveIP());
9980 return ElseGen(AllocaIP,
Builder.saveIP());
10008bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10012 "Unexpected Atomic Ordering.");
10014 bool Flush =
false;
10076 assert(
X.Var->getType()->isPointerTy() &&
10077 "OMP Atomic expects a pointer to target memory");
10078 Type *XElemTy =
X.ElemTy;
10081 "OMP atomic read expected a scalar type");
10083 Value *XRead =
nullptr;
10087 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10096 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10099 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10101 XRead = AtomicLoadRes.first;
10108 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10111 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10113 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10116 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10117 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10128 assert(
X.Var->getType()->isPointerTy() &&
10129 "OMP Atomic expects a pointer to target memory");
10130 Type *XElemTy =
X.ElemTy;
10133 "OMP atomic write expected a scalar type");
10141 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10144 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10152 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10157 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10164 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10165 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10171 Type *XTy =
X.Var->getType();
10173 "OMP Atomic expects a pointer to target memory");
10174 Type *XElemTy =
X.ElemTy;
10177 "OMP atomic update expected a scalar type");
10180 "OpenMP atomic does not support LT or GT operations");
10184 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10185 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10187 return AtomicResult.takeError();
10188 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10193Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10197 return Builder.CreateAdd(Src1, Src2);
10199 return Builder.CreateSub(Src1, Src2);
10201 return Builder.CreateAnd(Src1, Src2);
10203 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10205 return Builder.CreateOr(Src1, Src2);
10207 return Builder.CreateXor(Src1, Src2);
10229Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10232 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10233 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10236 bool emitRMWOp =
false;
10244 emitRMWOp = XElemTy;
10247 emitRMWOp = (IsXBinopExpr && XElemTy);
10254 std::pair<Value *, Value *> Res;
10256 AtomicRMWInst *RMWInst =
10257 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10258 if (
T.isAMDGPU()) {
10259 if (IsIgnoreDenormalMode)
10260 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10262 if (!IsFineGrainedMemory)
10263 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10265 if (!IsRemoteMemory)
10269 Res.first = RMWInst;
10274 Res.second = Res.first;
10276 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10280 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10283 unsigned LoadSize =
10286 OpenMPIRBuilder::AtomicInfo atomicInfo(
10288 OldVal->
getAlign(),
true , AllocaIP,
X);
10289 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10292 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10299 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10300 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10301 Builder.SetInsertPoint(ContBB);
10303 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10305 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10308 Value *Upd = *CBResult;
10309 Builder.CreateStore(Upd, NewAtomicAddr);
10312 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10313 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10314 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10315 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10318 Res.first = OldExprVal;
10321 if (UnreachableInst *ExitTI =
10324 Builder.SetInsertPoint(ExitBB);
10326 Builder.SetInsertPoint(ExitTI);
10329 IntegerType *IntCastTy =
10332 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10341 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10348 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10349 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10350 Builder.SetInsertPoint(ContBB);
10352 PHI->addIncoming(OldVal, CurBB);
10357 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10358 X->getName() +
".atomic.fltCast");
10360 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10361 X->getName() +
".atomic.ptrCast");
10365 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10368 Value *Upd = *CBResult;
10369 Builder.CreateStore(Upd, NewAtomicAddr);
10370 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10374 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10375 Result->setVolatile(VolatileX);
10376 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10377 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10378 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10379 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10381 Res.first = OldExprVal;
10385 if (UnreachableInst *ExitTI =
10388 Builder.SetInsertPoint(ExitBB);
10390 Builder.SetInsertPoint(ExitTI);
10401 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10402 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10407 Type *XTy =
X.Var->getType();
10409 "OMP Atomic expects a pointer to target memory");
10410 Type *XElemTy =
X.ElemTy;
10413 "OMP atomic capture expected a scalar type");
10415 "OpenMP atomic does not support LT or GT operations");
10422 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10423 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10426 Value *CapturedVal =
10427 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10428 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10430 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10442 IsPostfixUpdate, IsFailOnly, Failure);
10454 assert(
X.Var->getType()->isPointerTy() &&
10455 "OMP atomic expects a pointer to target memory");
10458 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10459 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10462 bool IsInteger = E->getType()->isIntegerTy();
10464 if (
Op == OMPAtomicCompareOp::EQ) {
10479 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10481 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10483 "OldValue and V must be of same type");
10484 if (IsPostfixUpdate) {
10485 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10487 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10500 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10502 CurBBTI,
X.Var->getName() +
".atomic.exit");
10508 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10510 Builder.SetInsertPoint(ContBB);
10511 Builder.CreateStore(OldValue, V.Var);
10517 Builder.SetInsertPoint(ExitBB);
10519 Builder.SetInsertPoint(ExitTI);
10522 Value *CapturedValue =
10523 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10524 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10530 assert(R.Var->getType()->isPointerTy() &&
10531 "r.var must be of pointer type");
10532 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10534 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10535 Value *ResultCast = R.IsSigned
10536 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10537 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10538 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10541 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10542 "Op should be either max or min at this point");
10543 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10554 if (IsXBinopExpr) {
10583 Value *CapturedValue =
nullptr;
10584 if (IsPostfixUpdate) {
10585 CapturedValue = OldValue;
10610 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10611 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10613 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10617 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10637 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
10664 bool SubClausesPresent =
10665 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10667 if (!
Config.isTargetDevice() && SubClausesPresent) {
10668 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10669 "if lowerbound is non-null, then upperbound must also be non-null "
10670 "for bounds on num_teams");
10672 if (NumTeamsUpper ==
nullptr)
10673 NumTeamsUpper =
Builder.getInt32(0);
10675 if (NumTeamsLower ==
nullptr)
10676 NumTeamsLower = NumTeamsUpper;
10680 "argument to if clause must be an integer value");
10684 IfExpr =
Builder.CreateICmpNE(IfExpr,
10685 ConstantInt::get(IfExpr->
getType(), 0));
10686 NumTeamsUpper =
Builder.CreateSelect(
10687 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
10690 NumTeamsLower =
Builder.CreateSelect(
10691 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
10694 if (ThreadLimit ==
nullptr)
10695 ThreadLimit =
Builder.getInt32(0);
10700 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
10705 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10717 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10719 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10721 auto HostPostOutlineCB = [
this, Ident,
10722 ToBeDeleted](
Function &OutlinedFn)
mutable {
10727 "there must be a single user for the outlined function");
10732 "Outlined function must have two or three arguments only");
10734 bool HasShared = OutlinedFn.
arg_size() == 3;
10742 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10743 "outlined function.");
10744 Builder.SetInsertPoint(StaleCI);
10751 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10755 I->eraseFromParent();
10758 if (!
Config.isTargetDevice())
10777 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
10792 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10797 if (
Config.isTargetDevice()) {
10812 std::string VarName) {
10821 return MapNamesArrayGlobal;
10826void OpenMPIRBuilder::initializeTypes(
Module &M) {
10830 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10831#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10832#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10833 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10834 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10835#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10836 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10837 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10838#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10839 T = StructType::getTypeByName(Ctx, StructName); \
10841 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10843 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10844#include "llvm/Frontend/OpenMP/OMPKinds.def"
10855 while (!Worklist.
empty()) {
10859 if (
BlockSet.insert(SuccBB).second)
10871 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
10883 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10884 Fn->
addFnAttr(Attribute::MustProgress);
10902 auto &&GetMDInt = [
this](
unsigned V) {
10909 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
10910 auto &&TargetRegionMetadataEmitter =
10911 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10926 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
10927 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10928 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10929 GetMDInt(E.getOrder())};
10932 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
10941 auto &&DeviceGlobalVarMetadataEmitter =
10942 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10952 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
10953 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
10957 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
10964 DeviceGlobalVarMetadataEmitter);
10966 for (
const auto &E : OrderedEntries) {
10967 assert(E.first &&
"All ordered entries must exist!");
10968 if (
const auto *CE =
10971 if (!CE->getID() || !CE->getAddress()) {
10975 if (!
M.getNamedValue(FnName))
10983 }
else if (
const auto *CE =
dyn_cast<
10992 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
10994 if (!CE->getAddress()) {
10999 if (CE->getVarSize() == 0)
11003 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11004 (!
Config.isTargetDevice() && CE->getAddress())) &&
11005 "Declaret target link address is set.");
11006 if (
Config.isTargetDevice())
11008 if (!CE->getAddress()) {
11015 if (!CE->getAddress()) {
11028 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11032 OMPTargetGlobalVarEntryIndirectVTable))
11041 Flags, CE->getLinkage(), CE->getVarName());
11044 Flags, CE->getLinkage());
11055 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11061 Config.getRequiresFlags());
11071 OS <<
"_" <<
Count;
11076 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11079 EntryInfo.
Line, NewCount);
11087 auto FileIDInfo = CallBack();
11091 FileID =
Status->getUniqueID().getFile();
11095 FileID =
hash_value(std::get<0>(FileIDInfo));
11099 std::get<1>(FileIDInfo));
11105 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11107 !(Remain & 1); Remain = Remain >> 1)
11125 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11127 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11134 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11140 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11141 Flags |= MemberOfFlag;
11147 bool IsDeclaration,
bool IsExternallyVisible,
11149 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11150 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11151 std::function<
Constant *()> GlobalInitializer,
11162 Config.hasRequiresUnifiedSharedMemory())) {
11167 if (!IsExternallyVisible)
11169 OS <<
"_decl_tgt_ref_ptr";
11172 Value *Ptr =
M.getNamedValue(PtrName);
11181 if (!
Config.isTargetDevice()) {
11182 if (GlobalInitializer)
11183 GV->setInitializer(GlobalInitializer());
11189 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11190 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11191 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11203 bool IsDeclaration,
bool IsExternallyVisible,
11205 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11206 std::vector<Triple> TargetTriple,
11207 std::function<
Constant *()> GlobalInitializer,
11211 (TargetTriple.empty() && !
Config.isTargetDevice()))
11222 !
Config.hasRequiresUnifiedSharedMemory()) {
11224 VarName = MangledName;
11227 if (!IsDeclaration)
11229 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11232 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11236 if (
Config.isTargetDevice() &&
11245 if (!
M.getNamedValue(RefName)) {
11249 GvAddrRef->setConstant(
true);
11251 GvAddrRef->setInitializer(Addr);
11252 GeneratedRefs.push_back(GvAddrRef);
11261 if (
Config.isTargetDevice()) {
11262 VarName = (Addr) ? Addr->
getName() :
"";
11266 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11267 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11268 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11269 VarName = (Addr) ? Addr->
getName() :
"";
11271 VarSize =
M.getDataLayout().getPointerSize();
11290 auto &&GetMDInt = [MN](
unsigned Idx) {
11295 auto &&GetMDString = [MN](
unsigned Idx) {
11297 return V->getString();
11300 switch (GetMDInt(0)) {
11304 case OffloadEntriesInfoManager::OffloadEntryInfo::
11305 OffloadingEntryInfoTargetRegion: {
11315 case OffloadEntriesInfoManager::OffloadEntryInfo::
11316 OffloadingEntryInfoDeviceGlobalVar:
11329 if (HostFilePath.
empty())
11333 if (std::error_code Err = Buf.getError()) {
11335 "OpenMPIRBuilder: " +
11343 if (std::error_code Err =
M.getError()) {
11345 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11357 return OffloadEntriesTargetRegion.empty() &&
11358 OffloadEntriesDeviceGlobalVar.empty();
11361unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11363 auto It = OffloadEntriesTargetRegionCount.find(
11364 getTargetRegionEntryCountKey(EntryInfo));
11365 if (It == OffloadEntriesTargetRegionCount.end())
11370void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11372 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11373 EntryInfo.
Count + 1;
11379 OffloadEntriesTargetRegion[EntryInfo] =
11382 ++OffloadingEntriesNum;
11388 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
11391 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11395 if (OMPBuilder->Config.isTargetDevice()) {
11400 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11401 Entry.setAddress(Addr);
11403 Entry.setFlags(Flags);
11409 "Target region entry already registered!");
11411 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11412 ++OffloadingEntriesNum;
11414 incrementTargetRegionEntryInfoCount(EntryInfo);
11421 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11423 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11424 if (It == OffloadEntriesTargetRegion.end()) {
11428 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11436 for (
const auto &It : OffloadEntriesTargetRegion) {
11437 Action(It.first, It.second);
11443 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11444 ++OffloadingEntriesNum;
11450 if (OMPBuilder->Config.isTargetDevice()) {
11454 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11456 if (Entry.getVarSize() == 0) {
11457 Entry.setVarSize(VarSize);
11458 Entry.setLinkage(Linkage);
11462 Entry.setVarSize(VarSize);
11463 Entry.setLinkage(Linkage);
11464 Entry.setAddress(Addr);
11467 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11468 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11469 "Entry not initialized!");
11470 if (Entry.getVarSize() == 0) {
11471 Entry.setVarSize(VarSize);
11472 Entry.setLinkage(Linkage);
11479 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11480 Addr, VarSize, Flags, Linkage,
11483 OffloadEntriesDeviceGlobalVar.try_emplace(
11484 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
11485 ++OffloadingEntriesNum;
11492 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
11493 Action(E.getKey(), E.getValue());
11500void CanonicalLoopInfo::collectControlBlocks(
11507 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
11519void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
11531void CanonicalLoopInfo::mapIndVar(
11541 for (
Use &U : OldIV->
uses()) {
11545 if (
User->getParent() == getCond())
11547 if (
User->getParent() == getLatch())
11553 Value *NewIV = Updater(OldIV);
11556 for (Use *U : ReplacableUses)
11577 "Preheader must terminate with unconditional branch");
11579 "Preheader must jump to header");
11583 "Header must terminate with unconditional branch");
11584 assert(Header->getSingleSuccessor() == Cond &&
11585 "Header must jump to exiting block");
11588 assert(Cond->getSinglePredecessor() == Header &&
11589 "Exiting block only reachable from header");
11592 "Exiting block must terminate with conditional branch");
11594 "Exiting block must have two successors");
11596 "Exiting block's first successor jump to the body");
11598 "Exiting block's second successor must exit the loop");
11602 "Body only reachable from exiting block");
11607 "Latch must terminate with unconditional branch");
11608 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
11611 assert(Latch->getSinglePredecessor() !=
nullptr);
11616 "Exit block must terminate with unconditional branch");
11617 assert(Exit->getSingleSuccessor() == After &&
11618 "Exit block must jump to after block");
11622 "After block only reachable from exit block");
11626 assert(IndVar &&
"Canonical induction variable not found?");
11628 "Induction variable must be an integer");
11630 "Induction variable must be a PHI in the loop header");
11636 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11644 assert(TripCount &&
"Loop trip count not found?");
11646 "Trip count and induction variable must have the same type");
11650 "Exit condition must be a signed less-than comparison");
11652 "Exit condition must compare the induction variable");
11654 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...